From 0b07b6485ca13d08fd15e0b372f0baff61ff5a66 Mon Sep 17 00:00:00 2001 From: Hillium Date: Sun, 26 Apr 2020 13:50:52 +0800 Subject: [PATCH 01/52] restore: add pipelined CreateTable. --- pkg/restore/client.go | 82 +++++++++++++---- pkg/restore/pipeline_items.go | 12 +++ tests/br_full_index/workload | 2 +- tests/config/tidb.toml | 162 ++++++++++++++++++++++++++++++++-- 4 files changed, 237 insertions(+), 21 deletions(-) create mode 100644 pkg/restore/pipeline_items.go diff --git a/pkg/restore/client.go b/pkg/restore/client.go index 2b685e599..bee9f39df 100644 --- a/pkg/restore/client.go +++ b/pkg/restore/client.go @@ -324,27 +324,79 @@ func (rc *Client) CreateTables( Data: make([]*import_sstpb.RewriteRule, 0), } newTables := make([]*model.TableInfo, 0, len(tables)) - for _, table := range tables { - if rc.IsSkipCreateSQL() { - log.Info("skip create table and alter autoIncID", zap.Stringer("table", table.Info.Name)) - } else { - err := rc.db.CreateTable(rc.ctx, table) - if err != nil { - return nil, nil, err - } - } - newTableInfo, err := rc.GetTableSchema(dom, table.Db.Name, table.Info.Name) - if err != nil { - return nil, nil, err - } - rules := GetRewriteRules(newTableInfo, table.Info, newTS) + dataCh, errCh := rc.GoCreateTables(context.TODO(), dom, tables, newTS) + for et := range dataCh { + rules := et.RewriteRule rewriteRules.Table = append(rewriteRules.Table, rules.Table...) rewriteRules.Data = append(rewriteRules.Data, rules.Data...) - newTables = append(newTables, newTableInfo) + newTables = append(newTables, et.Table) + } + if err, ok := <-errCh; ok { + return nil, nil, err } return rewriteRules, newTables, nil } +func (rc *Client) createTable(dom *domain.Domain, table *utils.Table, newTS uint64) (CreatedTable, error) { + if rc.IsSkipCreateSQL() { + log.Info("skip create table and alter autoIncID", zap.Stringer("table", table.Info.Name)) + } else { + err := rc.db.CreateTable(rc.ctx, table) + if err != nil { + return CreatedTable{}, err + } + } + newTableInfo, err := rc.GetTableSchema(dom, table.Db.Name, table.Info.Name) + if err != nil { + return CreatedTable{}, err + } + rules := GetRewriteRules(newTableInfo, table.Info, newTS) + et := CreatedTable{ + RewriteRule: rules, + Table: newTableInfo, + } + return et, nil +} + +// GoCreateTables create tables, and generate their information. +func (rc *Client) GoCreateTables( + ctx context.Context, + dom *domain.Domain, + tables []*utils.Table, + newTS uint64, + ) (<-chan CreatedTable, <-chan error) { + // Could we have a smaller size of tables? + outCh := make(chan CreatedTable, len(tables)) + errCh := make(chan error, 1) + go func() { + defer close(outCh) + defer close(errCh) + for _, table := range tables { + select { + case <-ctx.Done(): + log.Error("create table canceled", + zap.Error(ctx.Err()), + zap.Stringer("table", table.Info.Name), + zap.Stringer("database", table.Db.Name)) + errCh <- ctx.Err() + return + default: + } + rt, err := rc.createTable(dom, table, newTS) + if err != nil { + log.Error("create table failed", + zap.Error(err), + zap.Stringer("table", table.Info.Name), + zap.Stringer("database", table.Db.Name)) + errCh <- err + return + } + outCh <- rt + } + }() + return outCh, errCh +} + // RemoveTiFlashReplica removes all the tiflash replicas of a table // TODO: remove this after tiflash supports restore func (rc *Client) RemoveTiFlashReplica( diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go new file mode 100644 index 000000000..c1505c045 --- /dev/null +++ b/pkg/restore/pipeline_items.go @@ -0,0 +1,12 @@ +package restore + +import ( + "github.com/pingcap/parser/model" +) + +// CreatedTable is a table is created on restore process, +// but not yet filled by data. +type CreatedTable struct { + RewriteRule *RewriteRules + Table *model.TableInfo +} \ No newline at end of file diff --git a/tests/br_full_index/workload b/tests/br_full_index/workload index 448ca3c1a..84335df96 100644 --- a/tests/br_full_index/workload +++ b/tests/br_full_index/workload @@ -1,4 +1,4 @@ -recordcount=1000 +recordcount=100 operationcount=0 workload=core diff --git a/tests/config/tidb.toml b/tests/config/tidb.toml index 301aabd94..b432842b1 100644 --- a/tests/config/tidb.toml +++ b/tests/config/tidb.toml @@ -1,6 +1,158 @@ -# config of tidb - -# Schema lease duration -# There are lot of ddl in the tests, setting this -# to 360s to test whther BR is gracefully shutdown. +host = "0.0.0.0" +advertise-address = "0.0.0.0" +port = 4000 +cors = "" +store = "tikv" +path = "127.0.0.1:2379" +socket = "" lease = "360s" +run-ddl = true +split-table = true +token-limit = 1000 +oom-use-tmp-storage = true +tmp-storage-path = "/var/folders/7g/f3yqsrhn5m75k7h_810s96_m0000gn/T/tidb/tmp-storage" +oom-action = "cancel" +mem-quota-query = 1073741824 +enable-streaming = false +enable-batch-dml = false +lower-case-table-names = 2 +server-version = "" +compatible-kill-query = false +check-mb4-value-in-utf8 = true +max-index-length = 3072 +alter-primary-key = false +treat-old-version-utf8-as-utf8mb4 = true +enable-table-lock = false +delay-clean-table-lock = 0 +split-region-max-num = 1000 +repair-mode = false +repair-table-list = [] +max-server-connections = 4096 +new_collations_enabled_on_first_bootstrap = false +enable-dynamic-config = true + +[txn-local-latches] + enabled = false + capacity = 2048000 + +[log] + level = "info" + format = "text" + disable-timestamp = "" + enable-timestamp = "" + disable-error-stack = "" + enable-error-stack = "" + enable-slow-log = true + slow-query-file = "tidb-slow.log" + slow-threshold = 300 + expensive-threshold = 10000 + query-log-max-len = 4096 + record-plan-in-slow-log = 1 + [log.file] + filename = "/tmp/backup_restore_test/tidb.log" + max-size = 300 + max-days = 0 + max-backups = 0 + +[security] + skip-grant-table = false + ssl-ca = "" + ssl-cert = "" + ssl-key = "" + require-secure-transport = false + cluster-ssl-ca = "" + cluster-ssl-cert = "" + cluster-ssl-key = "" + +[status] + status-host = "0.0.0.0" + metrics-addr = "" + status-port = 10080 + metrics-interval = 15 + report-status = true + record-db-qps = false + +[performance] + max-procs = 0 + max-memory = 0 + stats-lease = "3s" + stmt-count-limit = 5000 + feedback-probability = 0.05 + query-feedback-limit = 1024 + pseudo-estimate-ratio = 0.8 + force-priority = "NO_PRIORITY" + bind-info-lease = "3s" + txn-total-size-limit = 104857600 + tcp-keep-alive = true + cross-join = true + run-auto-analyze = true + +[prepared-plan-cache] + enabled = false + capacity = 100 + memory-guard-ratio = 0.1 + +[opentracing] + enable = false + rpc-metrics = false + [opentracing.sampler] + type = "const" + param = 1.0 + sampling-server-url = "" + max-operations = 0 + sampling-refresh-interval = 0 + [opentracing.reporter] + queue-size = 0 + buffer-flush-interval = 0 + log-spans = false + local-agent-host-port = "" + +[proxy-protocol] + networks = "" + header-timeout = 5 + +[tikv-client] + grpc-connection-count = 4 + grpc-keepalive-time = 10 + grpc-keepalive-timeout = 3 + commit-timeout = "41s" + max-batch-size = 128 + overload-threshold = 200 + max-batch-wait-time = 0 + batch-wait-size = 8 + enable-chunk-rpc = true + region-cache-ttl = 600 + store-limit = 0 + [tikv-client.copr-cache] + enabled = false + capacity-mb = 0.0 + admission-max-result-mb = 0.0 + admission-min-process-ms = 0 + +[binlog] + enable = false + ignore-error = false + write-timeout = "15s" + binlog-socket = "" + strategy = "range" + +[plugin] + dir = "" + load = "" + +[pessimistic-txn] + enable = true + max-retry-count = 256 + +[stmt-summary] + enable = true + max-stmt-count = 200 + max-sql-length = 4096 + refresh-interval = 1800 + history-size = 24 + +[isolation-read] + engines = ["tikv", "tiflash", "tidb"] + +[experimental] + allow-auto-random = false From 061b669ac03880b3b7e4f92eee4dbf4cd2e76c44 Mon Sep 17 00:00:00 2001 From: Hillium Date: Sun, 26 Apr 2020 17:36:09 +0800 Subject: [PATCH 02/52] restore: add pipelined ValidateFileRanges. --- pkg/restore/client.go | 17 ++++-- pkg/restore/pipeline_items.go | 8 +++ pkg/restore/util.go | 98 ++++++++++++++++++++++++++++++----- pkg/restore/util_test.go | 26 ++++++++++ pkg/task/restore.go | 40 ++++++++++---- 5 files changed, 160 insertions(+), 29 deletions(-) diff --git a/pkg/restore/client.go b/pkg/restore/client.go index bee9f39df..6c421abdb 100644 --- a/pkg/restore/client.go +++ b/pkg/restore/client.go @@ -324,7 +324,8 @@ func (rc *Client) CreateTables( Data: make([]*import_sstpb.RewriteRule, 0), } newTables := make([]*model.TableInfo, 0, len(tables)) - dataCh, errCh := rc.GoCreateTables(context.TODO(), dom, tables, newTS) + errCh := make(chan error, 1) + dataCh := rc.GoCreateTables(context.TODO(), dom, tables, newTS, errCh) for et := range dataCh { rules := et.RewriteRule rewriteRules.Table = append(rewriteRules.Table, rules.Table...) @@ -354,6 +355,7 @@ func (rc *Client) createTable(dom *domain.Domain, table *utils.Table, newTS uint et := CreatedTable{ RewriteRule: rules, Table: newTableInfo, + OldTable: table.Info, } return et, nil } @@ -364,13 +366,14 @@ func (rc *Client) GoCreateTables( dom *domain.Domain, tables []*utils.Table, newTS uint64, - ) (<-chan CreatedTable, <-chan error) { + errCh chan<- error, + ) <-chan CreatedTable { // Could we have a smaller size of tables? outCh := make(chan CreatedTable, len(tables)) - errCh := make(chan error, 1) go func() { defer close(outCh) - defer close(errCh) + defer log.Info("all tables created") + for _, table := range tables { select { case <-ctx.Done(): @@ -391,10 +394,14 @@ func (rc *Client) GoCreateTables( errCh <- err return } + log.Debug("table created and send to next", + zap.Int("output chan size", len(outCh)), + zap.Stringer("table", table.Info.Name), + zap.Stringer("database", table.Db.Name),) outCh <- rt } }() - return outCh, errCh + return outCh } // RemoveTiFlashReplica removes all the tiflash replicas of a table diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index c1505c045..f5cc185ac 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -1,6 +1,7 @@ package restore import ( + "github.com/pingcap/br/pkg/rtree" "github.com/pingcap/parser/model" ) @@ -9,4 +10,11 @@ import ( type CreatedTable struct { RewriteRule *RewriteRules Table *model.TableInfo + OldTable *model.TableInfo +} + +type TableWithRange struct { + CreatedTable + + Range []rtree.Range } \ No newline at end of file diff --git a/pkg/restore/util.go b/pkg/restore/util.go index 70678622e..47375a66f 100644 --- a/pkg/restore/util.go +++ b/pkg/restore/util.go @@ -132,29 +132,99 @@ func ValidateFileRanges( for _, file := range files { // We skips all default cf files because we don't range overlap. if !fileAppended[file.GetName()] && strings.Contains(file.GetName(), "write") { - err := ValidateFileRewriteRule(file, rewriteRules) + rng, err := validateAndGetFileRange(file, rewriteRules) if err != nil { return nil, err } - startID := tablecodec.DecodeTableID(file.GetStartKey()) - endID := tablecodec.DecodeTableID(file.GetEndKey()) - if startID != endID { - log.Error("table ids dont match", - zap.Int64("startID", startID), - zap.Int64("endID", endID), - zap.Stringer("file", file)) - return nil, errors.New("table ids dont match") - } - ranges = append(ranges, rtree.Range{ - StartKey: file.GetStartKey(), - EndKey: file.GetEndKey(), - }) + ranges = append(ranges, rng) fileAppended[file.GetName()] = true } } return ranges, nil } +// MapTableToFiles makes a map that mapping table ID to its backup files. +// aware that one file can and only can hold one table. +func MapTableToFiles(files []*backup.File) map[int64][]*backup.File { + result := map[int64][]*backup.File{ } + for _, file := range files { + tableID := tablecodec.DecodeTableID(file.GetStartKey()) + tableEndID := tablecodec.DecodeTableID(file.GetEndKey()) + if tableID != tableEndID { + log.Warn("key range spread between many files.", + zap.String("file name", file.Name), + zap.Binary("start key", file.GetStartKey()), + zap.Binary("end key", file.GetEndKey())) + } + if tableID == 0 { + log.Warn("invalid table key of file", + zap.String("file name", file.Name), + zap.Binary("start key", file.GetStartKey()), + zap.Binary("end key", file.GetEndKey())) + } + result[tableID] = append(result[tableID], file) + } + return result +} + +func GoValidateFileRanges( + ctx context.Context, + tableStream <-chan CreatedTable, + fileOfTable map[int64][]*backup.File, + errCh chan<- error, + ) <-chan TableWithRange { + outCh := make(chan TableWithRange, len(fileOfTable)) + go func() { + defer close(outCh) + defer log.Info("all range generated") + + for t := range tableStream { + select { + case <-ctx.Done(): + errCh <- ctx.Err() + return + default: + } + files := fileOfTable[t.OldTable.ID] + ranges, err := ValidateFileRanges(files, t.RewriteRule) + if err != nil { + errCh <- err + return + } + tableWithRange := TableWithRange{ + CreatedTable: t, + Range: AttachFilesToRanges(files, ranges), + } + log.Debug("sending range info", + zap.Stringer("table", t.Table.Name), + zap.Int("files", len(files)), + zap.Int("range size", len(ranges)), + zap.Int("output channel size", len(outCh))) + outCh <- tableWithRange + } + }() + return outCh +} + +// validateAndGetFileRange validates a file, if success, return the key range of this file. +func validateAndGetFileRange(file *backup.File, rules *RewriteRules) (rtree.Range, error) { + err := ValidateFileRewriteRule(file, rules) + if err != nil { + return rtree.Range{}, err + } + startID := tablecodec.DecodeTableID(file.GetStartKey()) + endID := tablecodec.DecodeTableID(file.GetEndKey()) + if startID != endID { + log.Error("table ids dont match", + zap.Int64("startID", startID), + zap.Int64("endID", endID), + zap.Stringer("file", file)) + return rtree.Range{}, errors.New("table ids dont match") + } + r := rtree.Range{ StartKey: file.GetStartKey(), EndKey: file.GetEndKey() } + return r, nil +} + // AttachFilesToRanges attach files to ranges. // Panic if range is overlapped or no range for files. func AttachFilesToRanges( diff --git a/pkg/restore/util_test.go b/pkg/restore/util_test.go index d1a738fdb..edab30c51 100644 --- a/pkg/restore/util_test.go +++ b/pkg/restore/util_test.go @@ -38,6 +38,32 @@ func (s *testRestoreUtilSuite) TestGetSSTMetaFromFile(c *C) { c.Assert(string(sstMeta.GetRange().GetEnd()), Equals, "t2\xff") } +func (s *testRestoreUtilSuite) TestMapTableToFiles(c *C) { + filesOfTable1 := []*backup.File { + {Name: "table1-1.sst", + StartKey: tablecodec.EncodeTablePrefix(1), + EndKey: tablecodec.EncodeTablePrefix(1)}, + {Name: "table1-2.sst", + StartKey: tablecodec.EncodeTablePrefix(1), + EndKey: tablecodec.EncodeTablePrefix(1)}, + {Name: "table1-3/sst", + StartKey: tablecodec.EncodeTablePrefix(1), + EndKey: tablecodec.EncodeTablePrefix(1)},} + filesOfTable2 := []*backup.File { + {Name: "table2-1.sst", + StartKey: tablecodec.EncodeTablePrefix(2), + EndKey: tablecodec.EncodeTablePrefix(2)}, + {Name: "table2-2.sst", + StartKey: tablecodec.EncodeTablePrefix(2), + EndKey: tablecodec.EncodeTablePrefix(2)}, + } + + result := MapTableToFiles(append(filesOfTable2, filesOfTable1...)) + + c.Assert(result[1], DeepEquals, filesOfTable1) + c.Assert(result[2], DeepEquals, filesOfTable2) +} + func (s *testRestoreUtilSuite) TestValidateFileRanges(c *C) { rules := &RewriteRules{ Table: []*import_sstpb.RewriteRule{&import_sstpb.RewriteRule{ diff --git a/pkg/task/restore.go b/pkg/task/restore.go index a21010358..e8163bb43 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -4,9 +4,9 @@ package task import ( "context" - "github.com/pingcap/errors" "github.com/pingcap/kvproto/pkg/backup" + "github.com/pingcap/kvproto/pkg/import_sstpb" "github.com/pingcap/log" "github.com/pingcap/parser/model" "github.com/pingcap/tidb-tools/pkg/filter" @@ -174,15 +174,40 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf } } - rewriteRules, newTables, err := client.CreateTables(mgr.GetDomain(), tables, newTS) - if err != nil { - return err - } + errCh := make(chan error, 2) + tableStream := client.GoCreateTables(ctx, mgr.GetDomain(), tables, newTS, errCh) placementRules, err := client.GetPlacementRules(cfg.PD) if err != nil { return err } + tableFileMap := restore.MapTableToFiles(files) + rangeStream := restore.GoValidateFileRanges(ctx, tableStream, tableFileMap, errCh) + + var newTables []*model.TableInfo + var ranges []rtree.Range + rewriteRules := &restore.RewriteRules{ + Table: []*import_sstpb.RewriteRule{}, + Data: []*import_sstpb.RewriteRule{}, + } + for ct := range rangeStream { + newTables = append(newTables, ct.Table) + ranges = append(ranges, ct.Range...) + rewriteRules.Table = append(rewriteRules.Table, ct.RewriteRule.Table...) + rewriteRules.Data = append(rewriteRules.Data, ct.RewriteRule.Data...) + } + log.Debug("Go back to sequential path.", + zap.Int("files", len(files)), + zap.Int("new tables", len(newTables)), + zap.Int("ranges", len(ranges))) + select { + case err, ok := <-errCh: + if ok { + return err + } + default: + } + err = client.RemoveTiFlashReplica(tables, newTables, placementRules) if err != nil { return err @@ -192,17 +217,12 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf _ = client.RecoverTiFlashReplica(tables) }() - ranges, err := restore.ValidateFileRanges(files, rewriteRules) - if err != nil { - return err - } summary.CollectInt("restore ranges", len(ranges)) if err = splitPrepareWork(ctx, client, newTables); err != nil { return err } - ranges = restore.AttachFilesToRanges(files, ranges) // Redirect to log if there is no log file to avoid unreadable output. updateCh := g.StartProgress( From d711deddf300cd0ce4dfc0d40416d92c0ec4e0b8 Mon Sep 17 00:00:00 2001 From: Hillium Date: Mon, 27 Apr 2020 12:54:32 +0800 Subject: [PATCH 03/52] restore: pipelining restore process. --- pkg/restore/client.go | 51 +++++++-- pkg/restore/pipeline_items.go | 10 +- pkg/restore/util.go | 23 +++- pkg/task/restore.go | 193 +++++++++++++++++++++------------- 4 files changed, 186 insertions(+), 91 deletions(-) diff --git a/pkg/restore/client.go b/pkg/restore/client.go index 6c421abdb..f6be60801 100644 --- a/pkg/restore/client.go +++ b/pkg/restore/client.go @@ -332,8 +332,12 @@ func (rc *Client) CreateTables( rewriteRules.Data = append(rewriteRules.Data, rules.Data...) newTables = append(newTables, et.Table) } - if err, ok := <-errCh; ok { - return nil, nil, err + select { + case err, ok := <-errCh: + if ok { + return nil, nil, err + } + default: } return rewriteRules, newTables, nil } @@ -354,8 +358,8 @@ func (rc *Client) createTable(dom *domain.Domain, table *utils.Table, newTS uint rules := GetRewriteRules(newTableInfo, table.Info, newTS) et := CreatedTable{ RewriteRule: rules, - Table: newTableInfo, - OldTable: table.Info, + Table: newTableInfo, + OldTable: table, } return et, nil } @@ -367,7 +371,7 @@ func (rc *Client) GoCreateTables( tables []*utils.Table, newTS uint64, errCh chan<- error, - ) <-chan CreatedTable { +) <-chan CreatedTable { // Could we have a smaller size of tables? outCh := make(chan CreatedTable, len(tables)) go func() { @@ -397,7 +401,7 @@ func (rc *Client) GoCreateTables( log.Debug("table created and send to next", zap.Int("output chan size", len(outCh)), zap.Stringer("table", table.Info.Name), - zap.Stringer("database", table.Db.Name),) + zap.Stringer("database", table.Db.Name)) outCh <- rt } }() @@ -461,15 +465,40 @@ func (rc *Client) RemoveTiFlashReplica( return nil } +// RemoveTiFlashOfTable removes TiFlash replica of some table, +// returns the removed count of TiFlash nodes. +// TODO: save the removed TiFlash information into disk. +// TODO: remove this after tiflash supports restore. +func (rc *Client) RemoveTiFlashOfTable(table CreatedTable, rule []placement.Rule) (int, error) { + if rule := utils.SearchPlacementRule(table.Table.ID, rule, placement.Learner); rule != nil { + if rule.Count > 0 { + err := rc.db.AlterTiflashReplica(rc.ctx, table.OldTable, 0) + if err != nil { + return 0, errors.Trace(err) + } + } + } + return 0, nil +} + +// RecoverTiFlashOfTable recoveres TiFlash replica of some table. +// TODO: remove this after tiflash supports restore +func (rc *Client) RecoverTiFlashOfTable(table *utils.Table) error { + if table.TiFlashReplicas > 0 { + err := rc.db.AlterTiflashReplica(rc.ctx, table, table.TiFlashReplicas) + if err != nil { + return errors.Trace(err) + } + } + return nil +} + // RecoverTiFlashReplica recovers all the tiflash replicas of a table // TODO: remove this after tiflash supports restore func (rc *Client) RecoverTiFlashReplica(tables []*utils.Table) error { for _, table := range tables { - if table.TiFlashReplicas > 0 { - err := rc.db.AlterTiflashReplica(rc.ctx, table, table.TiFlashReplicas) - if err != nil { - return errors.Trace(err) - } + if err := rc.RecoverTiFlashOfTable(table); err != nil { + return err } } return nil diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index f5cc185ac..681bf1ab1 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -1,7 +1,10 @@ +// Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. + package restore import ( "github.com/pingcap/br/pkg/rtree" + "github.com/pingcap/br/pkg/utils" "github.com/pingcap/parser/model" ) @@ -9,12 +12,13 @@ import ( // but not yet filled by data. type CreatedTable struct { RewriteRule *RewriteRules - Table *model.TableInfo - OldTable *model.TableInfo + Table *model.TableInfo + OldTable *utils.Table } +// TableWithRange is a CreatedTable that has been bind to some of key ranges. type TableWithRange struct { CreatedTable Range []rtree.Range -} \ No newline at end of file +} diff --git a/pkg/restore/util.go b/pkg/restore/util.go index 47375a66f..8d925caef 100644 --- a/pkg/restore/util.go +++ b/pkg/restore/util.go @@ -121,6 +121,19 @@ func getSSTMetaFromFile( } } +// EstimateRangeSize estimates the total range count by file. +func EstimateRangeSize(files []*backup.File) int { + result := 0 + visitedSet := map[string]bool{} + for _, f := range files { + if !visitedSet[f.GetName()] && strings.Contains(f.GetName(), "write") { + result++ + visitedSet[f.GetName()] = true + } + } + return result +} + // ValidateFileRanges checks and returns the ranges of the files. func ValidateFileRanges( files []*backup.File, @@ -146,9 +159,9 @@ func ValidateFileRanges( // MapTableToFiles makes a map that mapping table ID to its backup files. // aware that one file can and only can hold one table. func MapTableToFiles(files []*backup.File) map[int64][]*backup.File { - result := map[int64][]*backup.File{ } + result := map[int64][]*backup.File{} for _, file := range files { - tableID := tablecodec.DecodeTableID(file.GetStartKey()) + tableID := tablecodec.DecodeTableID(file.GetStartKey()) tableEndID := tablecodec.DecodeTableID(file.GetEndKey()) if tableID != tableEndID { log.Warn("key range spread between many files.", @@ -172,7 +185,7 @@ func GoValidateFileRanges( tableStream <-chan CreatedTable, fileOfTable map[int64][]*backup.File, errCh chan<- error, - ) <-chan TableWithRange { +) <-chan TableWithRange { outCh := make(chan TableWithRange, len(fileOfTable)) go func() { defer close(outCh) @@ -185,7 +198,7 @@ func GoValidateFileRanges( return default: } - files := fileOfTable[t.OldTable.ID] + files := fileOfTable[t.OldTable.Info.ID] ranges, err := ValidateFileRanges(files, t.RewriteRule) if err != nil { errCh <- err @@ -221,7 +234,7 @@ func validateAndGetFileRange(file *backup.File, rules *RewriteRules) (rtree.Rang zap.Stringer("file", file)) return rtree.Range{}, errors.New("table ids dont match") } - r := rtree.Range{ StartKey: file.GetStartKey(), EndKey: file.GetEndKey() } + r := rtree.Range{StartKey: file.GetStartKey(), EndKey: file.GetEndKey()} return r, nil } diff --git a/pkg/task/restore.go b/pkg/task/restore.go index e8163bb43..09ace8a27 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -4,11 +4,13 @@ package task import ( "context" + "github.com/pingcap/errors" "github.com/pingcap/kvproto/pkg/backup" "github.com/pingcap/kvproto/pkg/import_sstpb" "github.com/pingcap/log" "github.com/pingcap/parser/model" + "github.com/pingcap/pd/v4/server/schedule/placement" "github.com/pingcap/tidb-tools/pkg/filter" "github.com/pingcap/tidb/config" "github.com/spf13/pflag" @@ -174,7 +176,7 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf } } - errCh := make(chan error, 2) + errCh := make(chan error, 32) tableStream := client.GoCreateTables(ctx, mgr.GetDomain(), tables, newTS, errCh) placementRules, err := client.GetPlacementRules(cfg.PD) if err != nil { @@ -184,52 +186,15 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf tableFileMap := restore.MapTableToFiles(files) rangeStream := restore.GoValidateFileRanges(ctx, tableStream, tableFileMap, errCh) - var newTables []*model.TableInfo - var ranges []rtree.Range - rewriteRules := &restore.RewriteRules{ - Table: []*import_sstpb.RewriteRule{}, - Data: []*import_sstpb.RewriteRule{}, - } - for ct := range rangeStream { - newTables = append(newTables, ct.Table) - ranges = append(ranges, ct.Range...) - rewriteRules.Table = append(rewriteRules.Table, ct.RewriteRule.Table...) - rewriteRules.Data = append(rewriteRules.Data, ct.RewriteRule.Data...) - } - log.Debug("Go back to sequential path.", - zap.Int("files", len(files)), - zap.Int("new tables", len(newTables)), - zap.Int("ranges", len(ranges))) - select { - case err, ok := <-errCh: - if ok { - return err - } - default: - } - - err = client.RemoveTiFlashReplica(tables, newTables, placementRules) - if err != nil { - return err - } - - defer func() { - _ = client.RecoverTiFlashReplica(tables) - }() - - summary.CollectInt("restore ranges", len(ranges)) - - if err = splitPrepareWork(ctx, client, newTables); err != nil { - return err - } - + rangeSize := restore.EstimateRangeSize(files) + summary.CollectInt("restore ranges", rangeSize) // Redirect to log if there is no log file to avoid unreadable output. updateCh := g.StartProgress( ctx, cmdName, // Split/Scatter + Download/Ingest - int64(len(ranges)+len(files)), + int64(restore.EstimateRangeSize(files)+len(files)), !cfg.LogProgress) clusterCfg, err := restorePreWork(ctx, client, mgr) @@ -258,33 +223,7 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf rejectStoreMap[store.GetId()] = true } - for { - if len(ranges) == 0 { - break - } - batchSize = utils.MinInt(batchSize, len(ranges)) - var rangeBatch []rtree.Range - ranges, rangeBatch = ranges[batchSize:], ranges[0:batchSize:batchSize] - - // Split regions by the given rangeBatch. - err = restore.SplitRanges(ctx, client, rangeBatch, rewriteRules, updateCh) - if err != nil { - log.Error("split regions failed", zap.Error(err)) - return err - } - - // Collect related files in the given rangeBatch. - fileBatch := make([]*backup.File, 0, 2*len(rangeBatch)) - for _, rg := range rangeBatch { - fileBatch = append(fileBatch, rg.Files...) - } - - // After split, we can restore backup files. - err = client.RestoreFiles(fileBatch, rewriteRules, rejectStoreMap, updateCh) - if err != nil { - break - } - } + afterRestoreStream := goRestore(ctx, rangeStream, placementRules, client, updateCh, rejectStoreMap, errCh) // Always run the post-work even on error, so we don't stuck in the import // mode or emptied schedulers @@ -292,15 +231,34 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf err = errRestorePostWork } - if errSplitPostWork := splitPostWork(ctx, client, newTables); err == nil { - err = errSplitPostWork - } - // If any error happened, return now, don't execute checksum. if err != nil { return err } + var newTables []*model.TableInfo + var ranges []rtree.Range + rewriteRules := &restore.RewriteRules{ + Table: []*import_sstpb.RewriteRule{}, + Data: []*import_sstpb.RewriteRule{}, + } + for ct := range afterRestoreStream { + newTables = append(newTables, ct.Table) + ranges = append(ranges, ct.Range...) + rewriteRules.Table = append(rewriteRules.Table, ct.RewriteRule.Table...) + rewriteRules.Data = append(rewriteRules.Data, ct.RewriteRule.Data...) + } + log.Debug("Go back to sequential path.", + zap.Int("files", len(files)), + zap.Int("new tables", len(newTables)), + zap.Int("ranges", len(ranges))) + select { + case err, ok := <-errCh: + if ok { + return err + } + default: + } // Restore has finished. updateCh.Close() @@ -501,3 +459,94 @@ func enableTiDBConfig() { config.StoreGlobalConfig(conf) } + +// goRestore forks a goroutine to do the restore process. +// TODO: use a struct to contain general data structs(like, client + ctx + updateCh). +// NOTE: is ctx.WithValue() a good idea? It would be simpler but will broken the type-constraint. +func goRestore( + ctx context.Context, + inputCh <-chan restore.TableWithRange, + rules []placement.Rule, + client *restore.Client, + updateCh glue.Progress, + rejectStoreMap map[uint64]bool, + errCh chan<- error, +) <-chan restore.TableWithRange { + outCh := make(chan restore.TableWithRange) + go func() { + // We cache old tables so that we can 'batch' recover TiFlash and tables. + oldTables := []*utils.Table{} + newTables := []*model.TableInfo{} + defer close(outCh) + defer func() { + if err := splitPostWork(ctx, client, newTables); err != nil { + log.Error("failed on unset online restore placement rules", zap.Error(err)) + errCh <- err + } + if err := client.RecoverTiFlashReplica(oldTables); err != nil { + log.Error("failed on recover TiFlash replicas", zap.Error(err)) + errCh <- err + } + }() + for t := range inputCh { + select { + case <-ctx.Done(): + errCh <- ctx.Err() + default: + } + // Omit the number of TiFlash have been removed. + if _, err := client.RemoveTiFlashOfTable(t.CreatedTable, rules); err != nil { + log.Error("failed on remove TiFlash replicas", zap.Error(err)) + errCh <- err + return + } + oldTables = append(oldTables, t.OldTable) + + // Reusage of splitPrepareWork would be safe. + // But this operation sometime would be costly. + if err := splitPrepareWork(ctx, client, []*model.TableInfo{t.Table}); err != nil { + log.Error("failed on set online restore placement rules", zap.Error(err)) + errCh <- err + return + } + newTables = append(newTables, t.Table) + + // TODO: add batch / concurrence limit. + if err := restore.SplitRanges(ctx, client, t.Range, t.RewriteRule, updateCh); err != nil { + log.Error("failed on split range", + zap.Stringer("database", t.OldTable.Db.Name), + zap.Stringer("table", t.OldTable.Info.Name), + zap.Any("ranges", t.Range), + zap.Error(err), + ) + errCh <- err + return + } + + files := []*backup.File{} + for _, rng := range t.Range { + files = append(files, rng.Files...) + } + log.Info("restoring table", + zap.Stringer("database", t.OldTable.Db.Name), + zap.Stringer("table", t.OldTable.Info.Name), + zap.Int("ranges", len(t.Range)), + zap.Int("files", len(files)), + ) + + if err := client.RestoreFiles(files, t.RewriteRule, rejectStoreMap, updateCh); err != nil { + log.Error("failed on download & ingest", + zap.Stringer("database", t.OldTable.Db.Name), + zap.Stringer("table", t.OldTable.Info.Name), + zap.Any("files", files), + zap.Error(err), + ) + errCh <- err + return + } + + outCh <- t + } + }() + return outCh +} From f258d99afdabf2a1b805d6bd5f4e6d756c57890d Mon Sep 17 00:00:00 2001 From: Hillium Date: Mon, 27 Apr 2020 15:49:47 +0800 Subject: [PATCH 04/52] restore, task: use batching when pipelining. --- pkg/restore/pipeline_items.go | 100 ++++++++++++++++++++++++++++++++++ pkg/restore/range.go | 6 ++ pkg/task/restore.go | 50 ++++++----------- 3 files changed, 122 insertions(+), 34 deletions(-) diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index 681bf1ab1..5ff80dc38 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -3,9 +3,17 @@ package restore import ( + "context" + "fmt" + + "github.com/pingcap/br/pkg/glue" "github.com/pingcap/br/pkg/rtree" "github.com/pingcap/br/pkg/utils" + "github.com/pingcap/kvproto/pkg/backup" + "github.com/pingcap/kvproto/pkg/import_sstpb" + "github.com/pingcap/log" "github.com/pingcap/parser/model" + "go.uber.org/zap" ) // CreatedTable is a table is created on restore process, @@ -22,3 +30,95 @@ type TableWithRange struct { Range []rtree.Range } + +// Batcher collectes ranges to restore and send batching split/ingest request. +// TODO: support cross-table batching. (i.e. one table can occur in two batches) +type Batcher struct { + currentBatch []TableWithRange + currentSize int + + ctx context.Context + client *Client + rejectStoreMap map[uint64]bool + updateCh glue.Progress + BatchSizeThreshold int +} + +// NewBatcher creates a new batcher by client and updateCh. +func NewBatcher( + ctx context.Context, + client *Client, + rejectStoreMap map[uint64]bool, + updateCh glue.Progress, +) *Batcher { + return &Batcher{ + currentBatch: []TableWithRange{}, + client: client, + rejectStoreMap: rejectStoreMap, + updateCh: updateCh, + ctx: ctx, + BatchSizeThreshold: 1, + } +} + +// Send sends all pending requests in the batcher. +func (b *Batcher) Send() error { + ranges := []rtree.Range{} + rewriteRules := &RewriteRules{ + Table: []*import_sstpb.RewriteRule{}, + Data: []*import_sstpb.RewriteRule{}, + } + for _, t := range b.currentBatch { + ranges = append(ranges, t.Range...) + rewriteRules.Append(*t.RewriteRule) + } + + tableNames := []string{} + for _, t := range b.currentBatch { + tableNames = append(tableNames, fmt.Sprintf("%s.%s", t.OldTable.Db.Name, t.OldTable.Info.Name)) + } + log.Info("sending batch restore request", + zap.Int("range count", len(ranges)), + zap.Int("table count", len(b.currentBatch)), + zap.Strings("tables", tableNames), + ) + + if err := SplitRanges(b.ctx, b.client, ranges, rewriteRules, b.updateCh); err != nil { + log.Error("failed on split range", + zap.Any("ranges", ranges), + zap.Error(err), + ) + return err + } + + files := []*backup.File{} + for _, fs := range ranges { + files = append(files, fs.Files...) + } + if err := b.client.RestoreFiles(files, rewriteRules, b.rejectStoreMap, b.updateCh); err != nil { + return err + } + b.currentBatch = []TableWithRange{} + b.currentSize = 0 + return nil +} + +func (b *Batcher) sendIfFull() error { + if b.currentSize >= b.BatchSizeThreshold { + return b.Send() + } + return nil +} + +// AddRangeCount add current size of the Batcher, +// and sends cached requests if current size is greater than BatchThreshold. +func (b *Batcher) AddRangeCount(by int) error { + b.currentSize += by + return b.sendIfFull() +} + +// Add addes a task to bather. +func (b *Batcher) Add(tbs TableWithRange) error { + b.currentBatch = append(b.currentBatch, tbs) + return b.AddRangeCount(len(tbs.Range)) +} diff --git a/pkg/restore/range.go b/pkg/restore/range.go index 0d5192ca9..c88cd74ad 100644 --- a/pkg/restore/range.go +++ b/pkg/restore/range.go @@ -68,3 +68,9 @@ type RewriteRules struct { Table []*import_sstpb.RewriteRule Data []*import_sstpb.RewriteRule } + +// Combine append its argument to this rewrite rules. +func (r *RewriteRules) Append(other RewriteRules) { + r.Data = append(r.Data, other.Data...) + r.Table = append(r.Table, other.Table...) +} diff --git a/pkg/task/restore.go b/pkg/task/restore.go index 09ace8a27..8cad772d5 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -223,7 +223,9 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf rejectStoreMap[store.GetId()] = true } - afterRestoreStream := goRestore(ctx, rangeStream, placementRules, client, updateCh, rejectStoreMap, errCh) + batcher := restore.NewBatcher(ctx, client, rejectStoreMap, updateCh) + batcher.BatchSizeThreshold = batchSize + afterRestoreStream := goRestore(ctx, rangeStream, placementRules, client, batcher, errCh) // Always run the post-work even on error, so we don't stuck in the import // mode or emptied schedulers @@ -468,8 +470,7 @@ func goRestore( inputCh <-chan restore.TableWithRange, rules []placement.Rule, client *restore.Client, - updateCh glue.Progress, - rejectStoreMap map[uint64]bool, + batcher *restore.Batcher, errCh chan<- error, ) <-chan restore.TableWithRange { outCh := make(chan restore.TableWithRange) @@ -479,6 +480,10 @@ func goRestore( newTables := []*model.TableInfo{} defer close(outCh) defer func() { + log.Info("doing postwork", + zap.Int("new tables", len(newTables)), + zap.Int("old tables", len(oldTables)), + ) if err := splitPostWork(ctx, client, newTables); err != nil { log.Error("failed on unset online restore placement rules", zap.Error(err)) errCh <- err @@ -511,42 +516,19 @@ func goRestore( } newTables = append(newTables, t.Table) - // TODO: add batch / concurrence limit. - if err := restore.SplitRanges(ctx, client, t.Range, t.RewriteRule, updateCh); err != nil { - log.Error("failed on split range", - zap.Stringer("database", t.OldTable.Db.Name), - zap.Stringer("table", t.OldTable.Info.Name), - zap.Any("ranges", t.Range), - zap.Error(err), - ) - errCh <- err - return - } - - files := []*backup.File{} - for _, rng := range t.Range { - files = append(files, rng.Files...) - } - log.Info("restoring table", - zap.Stringer("database", t.OldTable.Db.Name), - zap.Stringer("table", t.OldTable.Info.Name), - zap.Int("ranges", len(t.Range)), - zap.Int("files", len(files)), - ) - - if err := client.RestoreFiles(files, t.RewriteRule, rejectStoreMap, updateCh); err != nil { - log.Error("failed on download & ingest", - zap.Stringer("database", t.OldTable.Db.Name), - zap.Stringer("table", t.OldTable.Info.Name), - zap.Any("files", files), - zap.Error(err), - ) + if err := batcher.Add(t); err != nil { errCh <- err return } - + outCh <- t } + + // when things done, we must clean pending requests. + if err := batcher.Send(); err != nil { + errCh <- err + return + } }() return outCh } From cefb696ceb8e4d4571b003ab89f2db89c66c5aaf Mon Sep 17 00:00:00 2001 From: Hillium Date: Tue, 28 Apr 2020 11:35:26 +0800 Subject: [PATCH 05/52] restore: batcher split by range(instead of table). --- pkg/restore/pipeline_items.go | 93 ++++++++++++++++++++--------------- pkg/restore/range.go | 8 +++ pkg/task/restore.go | 22 ++------- 3 files changed, 67 insertions(+), 56 deletions(-) diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index 5ff80dc38..4a14809d7 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -4,13 +4,13 @@ package restore import ( "context" - "fmt" + "github.com/pingcap/br/pkg/conn" "github.com/pingcap/br/pkg/glue" "github.com/pingcap/br/pkg/rtree" "github.com/pingcap/br/pkg/utils" + "github.com/pingcap/errors" "github.com/pingcap/kvproto/pkg/backup" - "github.com/pingcap/kvproto/pkg/import_sstpb" "github.com/pingcap/log" "github.com/pingcap/parser/model" "go.uber.org/zap" @@ -32,10 +32,9 @@ type TableWithRange struct { } // Batcher collectes ranges to restore and send batching split/ingest request. -// TODO: support cross-table batching. (i.e. one table can occur in two batches) type Batcher struct { - currentBatch []TableWithRange - currentSize int + currentBatch []rtree.Range + rewriteRules *RewriteRules ctx context.Context client *Client @@ -44,15 +43,31 @@ type Batcher struct { BatchSizeThreshold int } +// Len calculate the current size of this batcher. +func (b *Batcher) Len() int { + return len(b.currentBatch) +} + // NewBatcher creates a new batcher by client and updateCh. func NewBatcher( ctx context.Context, client *Client, - rejectStoreMap map[uint64]bool, updateCh glue.Progress, ) *Batcher { + tiflashStores, err := conn.GetAllTiKVStores(ctx, client.GetPDClient(), conn.TiFlashOnly) + if err != nil { + // After TiFlash support restore, we can remove this panic. + // The origin of this panic is at RunRestore, and its semantic is nearing panic, don't worry about it. + log.Panic("failed to get and remove TiFlash replicas", zap.Error(errors.Trace(err))) + } + rejectStoreMap := make(map[uint64]bool) + for _, store := range tiflashStores { + rejectStoreMap[store.GetId()] = true + } + return &Batcher{ - currentBatch: []TableWithRange{}, + currentBatch: []rtree.Range{}, + rewriteRules: EmptyRewriteRule(), client: client, rejectStoreMap: rejectStoreMap, updateCh: updateCh, @@ -61,29 +76,20 @@ func NewBatcher( } } -// Send sends all pending requests in the batcher. -func (b *Batcher) Send() error { - ranges := []rtree.Range{} - rewriteRules := &RewriteRules{ - Table: []*import_sstpb.RewriteRule{}, - Data: []*import_sstpb.RewriteRule{}, - } - for _, t := range b.currentBatch { - ranges = append(ranges, t.Range...) - rewriteRules.Append(*t.RewriteRule) +func (b *Batcher) splitPoint() int { + splitPoint := b.BatchSizeThreshold + if splitPoint > b.Len() { + return b.Len() } + return splitPoint +} - tableNames := []string{} - for _, t := range b.currentBatch { - tableNames = append(tableNames, fmt.Sprintf("%s.%s", t.OldTable.Db.Name, t.OldTable.Info.Name)) - } - log.Info("sending batch restore request", - zap.Int("range count", len(ranges)), - zap.Int("table count", len(b.currentBatch)), - zap.Strings("tables", tableNames), - ) +// Send sends all pending requests in the batcher. +func (b *Batcher) Send() error { + var ranges []rtree.Range + ranges, b.currentBatch = b.currentBatch[:b.splitPoint()], b.currentBatch[b.splitPoint():] - if err := SplitRanges(b.ctx, b.client, ranges, rewriteRules, b.updateCh); err != nil { + if err := SplitRanges(b.ctx, b.client, ranges, b.rewriteRules, b.updateCh); err != nil { log.Error("failed on split range", zap.Any("ranges", ranges), zap.Error(err), @@ -95,30 +101,39 @@ func (b *Batcher) Send() error { for _, fs := range ranges { files = append(files, fs.Files...) } - if err := b.client.RestoreFiles(files, rewriteRules, b.rejectStoreMap, b.updateCh); err != nil { + log.Info("send batch", + zap.Int("range count", len(ranges)), + zap.Int("file count", len(files)), + ) + if err := b.client.RestoreFiles(files, b.rewriteRules, b.rejectStoreMap, b.updateCh); err != nil { return err } - b.currentBatch = []TableWithRange{} - b.currentSize = 0 return nil } func (b *Batcher) sendIfFull() error { - if b.currentSize >= b.BatchSizeThreshold { + if b.Len() >= b.BatchSizeThreshold { return b.Send() } return nil } -// AddRangeCount add current size of the Batcher, -// and sends cached requests if current size is greater than BatchThreshold. -func (b *Batcher) AddRangeCount(by int) error { - b.currentSize += by +// Add addes a task to bather. +func (b *Batcher) Add(tbs TableWithRange) error { + log.Info("adding table to batch", + zap.Stringer("table", tbs.Table.Name), + zap.Stringer("database", tbs.OldTable.Db.Name), + zap.Int64("old id", tbs.OldTable.Info.ID), + zap.Int64("new id", tbs.Table.ID), + zap.Int("batch size", b.Len()), + ) + b.currentBatch = append(b.currentBatch, tbs.Range...) + b.rewriteRules.Append(*tbs.RewriteRule) return b.sendIfFull() } -// Add addes a task to bather. -func (b *Batcher) Add(tbs TableWithRange) error { - b.currentBatch = append(b.currentBatch, tbs) - return b.AddRangeCount(len(tbs.Range)) +// Close closes the batcher, sending all pending requests, close updateCh. +func (b *Batcher) Close() error { + defer b.updateCh.Close() + return b.Send() } diff --git a/pkg/restore/range.go b/pkg/restore/range.go index c88cd74ad..924c63ebb 100644 --- a/pkg/restore/range.go +++ b/pkg/restore/range.go @@ -74,3 +74,11 @@ func (r *RewriteRules) Append(other RewriteRules) { r.Data = append(r.Data, other.Data...) r.Table = append(r.Table, other.Table...) } + +// EmptyRewriteRule make a new, empty rewrite rule. +func EmptyRewriteRule() *RewriteRules { + return &RewriteRules{ + Table: []*import_sstpb.RewriteRule{}, + Data: []*import_sstpb.RewriteRule{}, + } +} diff --git a/pkg/task/restore.go b/pkg/task/restore.go index 8cad772d5..4f276c97f 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -214,16 +214,8 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf // Restore sst files in batch. batchSize := utils.MinInt(int(cfg.Concurrency), maxRestoreBatchSizeLimit) - tiflashStores, err := conn.GetAllTiKVStores(ctx, client.GetPDClient(), conn.TiFlashOnly) - if err != nil { - return errors.Trace(err) - } - rejectStoreMap := make(map[uint64]bool) - for _, store := range tiflashStores { - rejectStoreMap[store.GetId()] = true - } - batcher := restore.NewBatcher(ctx, client, rejectStoreMap, updateCh) + batcher := restore.NewBatcher(ctx, client, updateCh) batcher.BatchSizeThreshold = batchSize afterRestoreStream := goRestore(ctx, rangeStream, placementRules, client, batcher, errCh) @@ -255,14 +247,11 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf zap.Int("new tables", len(newTables)), zap.Int("ranges", len(ranges))) select { - case err, ok := <-errCh: - if ok { - return err - } + case err := <-errCh: + return err default: } - // Restore has finished. - updateCh.Close() + // Checksum if cfg.Checksum { @@ -464,7 +453,6 @@ func enableTiDBConfig() { // goRestore forks a goroutine to do the restore process. // TODO: use a struct to contain general data structs(like, client + ctx + updateCh). -// NOTE: is ctx.WithValue() a good idea? It would be simpler but will broken the type-constraint. func goRestore( ctx context.Context, inputCh <-chan restore.TableWithRange, @@ -525,7 +513,7 @@ func goRestore( } // when things done, we must clean pending requests. - if err := batcher.Send(); err != nil { + if err := batcher.Close(); err != nil { errCh <- err return } From 7846324a207e423d5da23ab9cf537dcdcc320e66 Mon Sep 17 00:00:00 2001 From: Hillium Date: Tue, 28 Apr 2020 14:39:35 +0800 Subject: [PATCH 06/52] restore,task: new way to for polling errCh. We use select instead of for range, so we can send error when context cancelled. --- pkg/restore/range.go | 2 +- pkg/restore/util.go | 42 +++++++------ pkg/task/restore.go | 136 +++++++++++++++++++++---------------------- 3 files changed, 92 insertions(+), 88 deletions(-) diff --git a/pkg/restore/range.go b/pkg/restore/range.go index 924c63ebb..df6a55546 100644 --- a/pkg/restore/range.go +++ b/pkg/restore/range.go @@ -69,7 +69,7 @@ type RewriteRules struct { Data []*import_sstpb.RewriteRule } -// Combine append its argument to this rewrite rules. +// Append append its argument to this rewrite rules. func (r *RewriteRules) Append(other RewriteRules) { r.Data = append(r.Data, other.Data...) r.Table = append(r.Table, other.Table...) diff --git a/pkg/restore/util.go b/pkg/restore/util.go index 8d925caef..a2611bc09 100644 --- a/pkg/restore/util.go +++ b/pkg/restore/util.go @@ -180,40 +180,44 @@ func MapTableToFiles(files []*backup.File) map[int64][]*backup.File { return result } +// GoValidateFileRanges validate files by a stream of tables and yields tables with range. func GoValidateFileRanges( ctx context.Context, tableStream <-chan CreatedTable, fileOfTable map[int64][]*backup.File, errCh chan<- error, ) <-chan TableWithRange { + // Could we have a smaller outCh size? outCh := make(chan TableWithRange, len(fileOfTable)) go func() { defer close(outCh) defer log.Info("all range generated") - - for t := range tableStream { + for { select { case <-ctx.Done(): errCh <- ctx.Err() return - default: - } - files := fileOfTable[t.OldTable.Info.ID] - ranges, err := ValidateFileRanges(files, t.RewriteRule) - if err != nil { - errCh <- err - return - } - tableWithRange := TableWithRange{ - CreatedTable: t, - Range: AttachFilesToRanges(files, ranges), + case t, ok := <-tableStream: + if !ok { + return + } + files := fileOfTable[t.OldTable.Info.ID] + ranges, err := ValidateFileRanges(files, t.RewriteRule) + if err != nil { + errCh <- err + return + } + tableWithRange := TableWithRange{ + CreatedTable: t, + Range: AttachFilesToRanges(files, ranges), + } + log.Debug("sending range info", + zap.Stringer("table", t.Table.Name), + zap.Int("files", len(files)), + zap.Int("range size", len(ranges)), + zap.Int("output channel size", len(outCh))) + outCh <- tableWithRange } - log.Debug("sending range info", - zap.Stringer("table", t.Table.Name), - zap.Int("files", len(files)), - zap.Int("range size", len(ranges)), - zap.Int("output channel size", len(outCh))) - outCh <- tableWithRange } }() return outCh diff --git a/pkg/task/restore.go b/pkg/task/restore.go index 4f276c97f..cc47a192d 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -7,7 +7,6 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/kvproto/pkg/backup" - "github.com/pingcap/kvproto/pkg/import_sstpb" "github.com/pingcap/log" "github.com/pingcap/parser/model" "github.com/pingcap/pd/v4/server/schedule/placement" @@ -176,6 +175,7 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf } } + // We make bigger errCh so we won't block on multi-part failed. errCh := make(chan error, 32) tableStream := client.GoCreateTables(ctx, mgr.GetDomain(), tables, newTS, errCh) placementRules, err := client.GetPlacementRules(cfg.PD) @@ -214,11 +214,12 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf // Restore sst files in batch. batchSize := utils.MinInt(int(cfg.Concurrency), maxRestoreBatchSizeLimit) - batcher := restore.NewBatcher(ctx, client, updateCh) batcher.BatchSizeThreshold = batchSize afterRestoreStream := goRestore(ctx, rangeStream, placementRules, client, batcher, errCh) + newTables, _, _, err := collectRestoreResults(ctx, afterRestoreStream, errCh) + // Always run the post-work even on error, so we don't stuck in the import // mode or emptied schedulers if errRestorePostWork := restorePostWork(ctx, client, mgr, clusterCfg); err == nil { @@ -230,39 +231,14 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf return err } - var newTables []*model.TableInfo - var ranges []rtree.Range - rewriteRules := &restore.RewriteRules{ - Table: []*import_sstpb.RewriteRule{}, - Data: []*import_sstpb.RewriteRule{}, - } - for ct := range afterRestoreStream { - newTables = append(newTables, ct.Table) - ranges = append(ranges, ct.Range...) - rewriteRules.Table = append(rewriteRules.Table, ct.RewriteRule.Table...) - rewriteRules.Data = append(rewriteRules.Data, ct.RewriteRule.Data...) - } - log.Debug("Go back to sequential path.", - zap.Int("files", len(files)), - zap.Int("new tables", len(newTables)), - zap.Int("ranges", len(ranges))) - select { - case err := <-errCh: - return err - default: - } - - // Checksum - if cfg.Checksum { - updateCh = g.StartProgress( - ctx, "Checksum", int64(len(newTables)), !cfg.LogProgress) - err = client.ValidateChecksum( - ctx, mgr.GetTiKV().GetClient(), tables, newTables, updateCh) - if err != nil { - return err - } - updateCh.Close() + // TODO: add checksum to pipeline. + updateCh = g.StartProgress( + ctx, "Checksum", int64(len(newTables)), !cfg.LogProgress) + err = client.ValidateChecksum( + ctx, mgr.GetTiKV().GetClient(), tables, newTables, updateCh) + if err != nil { + return err } // Set task summary to success status. @@ -451,8 +427,29 @@ func enableTiDBConfig() { config.StoreGlobalConfig(conf) } +// collectRestoreResults collectes result of pipelined restore process, +// block the current goroutine, until all the tasks finished. +// TODO: remove this function when all the link +func collectRestoreResults( + ctx context.Context, + ch <-chan restore.TableWithRange, + errCh <-chan error, +) (newTables []*model.TableInfo, ranges []rtree.Range, rewriteRules *restore.RewriteRules, err error) { + rewriteRules = restore.EmptyRewriteRule() + for ct := range ch { + newTables = append(newTables, ct.Table) + ranges = append(ranges, ct.Range...) + rewriteRules.Table = append(rewriteRules.Table, ct.RewriteRule.Table...) + rewriteRules.Data = append(rewriteRules.Data, ct.RewriteRule.Data...) + } + select { + case err = <-errCh: + default: + } + return +} + // goRestore forks a goroutine to do the restore process. -// TODO: use a struct to contain general data structs(like, client + ctx + updateCh). func goRestore( ctx context.Context, inputCh <-chan restore.TableWithRange, @@ -466,9 +463,13 @@ func goRestore( // We cache old tables so that we can 'batch' recover TiFlash and tables. oldTables := []*utils.Table{} newTables := []*model.TableInfo{} - defer close(outCh) defer func() { - log.Info("doing postwork", + // when things done, we must clean pending requests. + if err := batcher.Close(); err != nil { + errCh <- err + return + } + log.Info("doing postwork", zap.Int("new tables", len(newTables)), zap.Int("old tables", len(oldTables)), ) @@ -480,42 +481,41 @@ func goRestore( log.Error("failed on recover TiFlash replicas", zap.Error(err)) errCh <- err } + close(outCh) }() - for t := range inputCh { + + for { select { case <-ctx.Done(): errCh <- ctx.Err() - default: + case t, ok := <-inputCh: + if !ok { + return + } + // Omit the number of TiFlash have been removed. + if _, err := client.RemoveTiFlashOfTable(t.CreatedTable, rules); err != nil { + log.Error("failed on remove TiFlash replicas", zap.Error(err)) + errCh <- err + return + } + oldTables = append(oldTables, t.OldTable) + + // Reusage of splitPrepareWork would be safe. + // But this operation sometime would be costly. + if err := splitPrepareWork(ctx, client, []*model.TableInfo{t.Table}); err != nil { + log.Error("failed on set online restore placement rules", zap.Error(err)) + errCh <- err + return + } + newTables = append(newTables, t.Table) + + if err := batcher.Add(t); err != nil { + errCh <- err + return + } + + outCh <- t } - // Omit the number of TiFlash have been removed. - if _, err := client.RemoveTiFlashOfTable(t.CreatedTable, rules); err != nil { - log.Error("failed on remove TiFlash replicas", zap.Error(err)) - errCh <- err - return - } - oldTables = append(oldTables, t.OldTable) - - // Reusage of splitPrepareWork would be safe. - // But this operation sometime would be costly. - if err := splitPrepareWork(ctx, client, []*model.TableInfo{t.Table}); err != nil { - log.Error("failed on set online restore placement rules", zap.Error(err)) - errCh <- err - return - } - newTables = append(newTables, t.Table) - - if err := batcher.Add(t); err != nil { - errCh <- err - return - } - - outCh <- t - } - - // when things done, we must clean pending requests. - if err := batcher.Close(); err != nil { - errCh <- err - return } }() return outCh From c78a24bdc7d642d649f3f3964ee6d09895ef80a4 Mon Sep 17 00:00:00 2001 From: Hillium Date: Tue, 28 Apr 2020 17:29:31 +0800 Subject: [PATCH 07/52] restore, task: pipelining checksum. --- go.mod | 1 + pkg/restore/client.go | 177 +++++++++++++++++----------------- pkg/restore/pipeline_items.go | 40 ++++++-- pkg/task/restore.go | 40 ++++---- 4 files changed, 144 insertions(+), 114 deletions(-) diff --git a/go.mod b/go.mod index 986893afe..849a29148 100644 --- a/go.mod +++ b/go.mod @@ -37,6 +37,7 @@ require ( go.opencensus.io v0.22.2 // indirect go.uber.org/zap v1.14.1 golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45 + golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e google.golang.org/api v0.14.0 google.golang.org/grpc v1.25.1 ) diff --git a/pkg/restore/client.go b/pkg/restore/client.go index f6be60801..a576b9cfc 100644 --- a/pkg/restore/client.go +++ b/pkg/restore/client.go @@ -30,6 +30,7 @@ import ( "github.com/pingcap/tidb/tablecodec" "github.com/pingcap/tidb/util/codec" "go.uber.org/zap" + "golang.org/x/sync/errgroup" "google.golang.org/grpc" "google.golang.org/grpc/backoff" "google.golang.org/grpc/credentials" @@ -378,31 +379,34 @@ func (rc *Client) GoCreateTables( defer close(outCh) defer log.Info("all tables created") + group, ectx := errgroup.WithContext(ctx) + for _, table := range tables { - select { - case <-ctx.Done(): - log.Error("create table canceled", - zap.Error(ctx.Err()), - zap.Stringer("table", table.Info.Name), - zap.Stringer("database", table.Db.Name)) - errCh <- ctx.Err() - return - default: - } - rt, err := rc.createTable(dom, table, newTS) - if err != nil { - log.Error("create table failed", - zap.Error(err), - zap.Stringer("table", table.Info.Name), - zap.Stringer("database", table.Db.Name)) - errCh <- err - return - } - log.Debug("table created and send to next", - zap.Int("output chan size", len(outCh)), - zap.Stringer("table", table.Info.Name), - zap.Stringer("database", table.Db.Name)) - outCh <- rt + t := table + group.Go(func() error { + select { + case <-ectx.Done(): + return ectx.Err() + default: + } + rt, err := rc.createTable(dom, t, newTS) + if err != nil { + log.Error("create table failed", + zap.Error(err), + zap.Stringer("table", t.Info.Name), + zap.Stringer("database", t.Db.Name)) + return err + } + log.Debug("table created and send to next", + zap.Int("output chan size", len(outCh)), + zap.Stringer("table", t.Info.Name), + zap.Stringer("database", t.Db.Name)) + outCh <- rt + return nil + }) + } + if err := group.Wait(); err != nil { + errCh <- err } }() return outCh @@ -712,14 +716,15 @@ func (rc *Client) switchTiKVMode(ctx context.Context, mode import_sstpb.SwitchMo return nil } -//ValidateChecksum validate checksum after restore +// ValidateChecksum validate checksum after restore +// it returns a channel fires a struct{} when all things get done. func (rc *Client) ValidateChecksum( ctx context.Context, + tableStream <-chan TableWithRange, kvClient kv.Client, - tables []*utils.Table, - newTables []*model.TableInfo, + errCh chan<- error, updateCh glue.Progress, -) error { +) <-chan struct{} { start := time.Now() defer func() { elapsed := time.Since(start) @@ -727,74 +732,70 @@ func (rc *Client) ValidateChecksum( }() log.Info("Start to validate checksum") - wg := new(sync.WaitGroup) - errCh := make(chan error) + outCh := make(chan struct{}, 1) workers := utils.NewWorkerPool(defaultChecksumConcurrency, "RestoreChecksum") go func() { - for i, t := range tables { - table := t - newTable := newTables[i] - wg.Add(1) - workers.Apply(func() { - defer wg.Done() - - if table.NoChecksum() { - log.Info("table doesn't have checksum, skipping checksum", - zap.Stringer("db", table.Db.Name), - zap.Stringer("table", table.Info.Name)) - updateCh.Inc() - return - } - - startTS, err := rc.GetTS(ctx) - if err != nil { - errCh <- errors.Trace(err) - return - } - exe, err := checksum.NewExecutorBuilder(newTable, startTS). - SetOldTable(table). - Build() - if err != nil { - errCh <- errors.Trace(err) + defer func() { + log.Info("all checksum ended") + outCh <- struct{}{} + close(outCh) + }() + for { + select { + case <-ctx.Done(): + errCh <- ctx.Err() + case tbl, ok := <-tableStream: + if !ok { return } - checksumResp, err := exe.Execute(ctx, kvClient, func() { - // TODO: update progress here. + workers.Apply(func() { + err := rc.execChecksum(ctx, tbl.CreatedTable, kvClient) + if err != nil { + errCh <- err + } + updateCh.Inc() }) - if err != nil { - errCh <- errors.Trace(err) - return - } - - if checksumResp.Checksum != table.Crc64Xor || - checksumResp.TotalKvs != table.TotalKvs || - checksumResp.TotalBytes != table.TotalBytes { - log.Error("failed in validate checksum", - zap.String("database", table.Db.Name.L), - zap.String("table", table.Info.Name.L), - zap.Uint64("origin tidb crc64", table.Crc64Xor), - zap.Uint64("calculated crc64", checksumResp.Checksum), - zap.Uint64("origin tidb total kvs", table.TotalKvs), - zap.Uint64("calculated total kvs", checksumResp.TotalKvs), - zap.Uint64("origin tidb total bytes", table.TotalBytes), - zap.Uint64("calculated total bytes", checksumResp.TotalBytes), - ) - errCh <- errors.New("failed to validate checksum") - return - } - - updateCh.Inc() - }) + } } - wg.Wait() - close(errCh) }() - for err := range errCh { - if err != nil { - return err - } + return outCh +} + +func (rc *Client) execChecksum(ctx context.Context, tbl CreatedTable, kvClient kv.Client) error { + startTS, err := rc.GetTS(ctx) + if err != nil { + return errors.Trace(err) + } + exe, err := checksum.NewExecutorBuilder(tbl.Table, startTS). + SetOldTable(tbl.OldTable). + Build() + if err != nil { + return errors.Trace(err) + } + checksumResp, err := exe.Execute(ctx, kvClient, func() { + // TODO: update progress here. + }) + if err != nil { + return errors.Trace(err) + + } + + table := tbl.OldTable + if checksumResp.Checksum != table.Crc64Xor || + checksumResp.TotalKvs != table.TotalKvs || + checksumResp.TotalBytes != table.TotalBytes { + log.Error("failed in validate checksum", + zap.String("database", table.Db.Name.L), + zap.String("table", table.Info.Name.L), + zap.Uint64("origin tidb crc64", table.Crc64Xor), + zap.Uint64("calculated crc64", checksumResp.Checksum), + zap.Uint64("origin tidb total kvs", table.TotalKvs), + zap.Uint64("calculated total kvs", checksumResp.TotalKvs), + zap.Uint64("origin tidb total bytes", table.TotalBytes), + zap.Uint64("calculated total bytes", checksumResp.TotalBytes), + ) + return errors.New("failed to validate checksum") } - log.Info("validate checksum passed!!") return nil } diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index 4a14809d7..ad624f62d 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -4,6 +4,7 @@ package restore import ( "context" + "fmt" "github.com/pingcap/br/pkg/conn" "github.com/pingcap/br/pkg/glue" @@ -34,6 +35,7 @@ type TableWithRange struct { // Batcher collectes ranges to restore and send batching split/ingest request. type Batcher struct { currentBatch []rtree.Range + cachedTables []TableWithRange rewriteRules *RewriteRules ctx context.Context @@ -66,7 +68,6 @@ func NewBatcher( } return &Batcher{ - currentBatch: []rtree.Range{}, rewriteRules: EmptyRewriteRule(), client: client, rejectStoreMap: rejectStoreMap, @@ -84,17 +85,36 @@ func (b *Batcher) splitPoint() int { return splitPoint } +// drainSentTables drains the table just sent. +// note that this function assumes you call it only after a sent of bench. +func (b *Batcher) drainSentTables() (drained []TableWithRange) { + if b.Len() == 0 { + drained, b.cachedTables = b.cachedTables, []TableWithRange{} + return + } + cachedLen := len(b.cachedTables) + drained, b.cachedTables = b.cachedTables[:cachedLen-1], b.cachedTables[cachedLen-1:] + return +} + // Send sends all pending requests in the batcher. -func (b *Batcher) Send() error { +// returns tables sent in the current batch. +func (b *Batcher) Send() ([]TableWithRange, error) { var ranges []rtree.Range ranges, b.currentBatch = b.currentBatch[:b.splitPoint()], b.currentBatch[b.splitPoint():] + tbs := b.drainSentTables() + var tableNames []string + for _, t := range tbs { + tableNames = append(tableNames, fmt.Sprintf("%s.%s", t.OldTable.Db.Name, t.OldTable.Info.Name)) + } + log.Info("prepare split range by tables", zap.Strings("tables", tableNames)) if err := SplitRanges(b.ctx, b.client, ranges, b.rewriteRules, b.updateCh); err != nil { log.Error("failed on split range", zap.Any("ranges", ranges), zap.Error(err), ) - return err + return nil, err } files := []*backup.File{} @@ -106,20 +126,21 @@ func (b *Batcher) Send() error { zap.Int("file count", len(files)), ) if err := b.client.RestoreFiles(files, b.rewriteRules, b.rejectStoreMap, b.updateCh); err != nil { - return err + return nil, err } - return nil + + return tbs, nil } -func (b *Batcher) sendIfFull() error { +func (b *Batcher) sendIfFull() ([]TableWithRange, error) { if b.Len() >= b.BatchSizeThreshold { return b.Send() } - return nil + return []TableWithRange{}, nil } // Add addes a task to bather. -func (b *Batcher) Add(tbs TableWithRange) error { +func (b *Batcher) Add(tbs TableWithRange) ([]TableWithRange, error) { log.Info("adding table to batch", zap.Stringer("table", tbs.Table.Name), zap.Stringer("database", tbs.OldTable.Db.Name), @@ -128,12 +149,13 @@ func (b *Batcher) Add(tbs TableWithRange) error { zap.Int("batch size", b.Len()), ) b.currentBatch = append(b.currentBatch, tbs.Range...) + b.cachedTables = append(b.cachedTables, tbs) b.rewriteRules.Append(*tbs.RewriteRule) return b.sendIfFull() } // Close closes the batcher, sending all pending requests, close updateCh. -func (b *Batcher) Close() error { +func (b *Batcher) Close() ([]TableWithRange, error) { defer b.updateCh.Close() return b.Send() } diff --git a/pkg/task/restore.go b/pkg/task/restore.go index cc47a192d..5eea5df68 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -218,7 +218,16 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf batcher.BatchSizeThreshold = batchSize afterRestoreStream := goRestore(ctx, rangeStream, placementRules, client, batcher, errCh) - newTables, _, _, err := collectRestoreResults(ctx, afterRestoreStream, errCh) + // Checksum + updateCh = g.StartProgress( + ctx, "Checksum", int64(len(tables)), !cfg.LogProgress) + out := client.ValidateChecksum( + ctx, afterRestoreStream, mgr.GetTiKV().GetClient(), errCh, updateCh) + select { + case err = <-errCh: + case <-out: + log.Info("all works end.") + } // Always run the post-work even on error, so we don't stuck in the import // mode or emptied schedulers @@ -230,16 +239,7 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf if err != nil { return err } - - // Checksum - // TODO: add checksum to pipeline. - updateCh = g.StartProgress( - ctx, "Checksum", int64(len(newTables)), !cfg.LogProgress) - err = client.ValidateChecksum( - ctx, mgr.GetTiKV().GetClient(), tables, newTables, updateCh) - if err != nil { - return err - } + updateCh.Close() // Set task summary to success status. summary.SetSuccessStatus(true) @@ -429,7 +429,7 @@ func enableTiDBConfig() { // collectRestoreResults collectes result of pipelined restore process, // block the current goroutine, until all the tasks finished. -// TODO: remove this function when all the link +// TODO: remove this function when all the link is pipelined. func collectRestoreResults( ctx context.Context, ch <-chan restore.TableWithRange, @@ -458,17 +458,21 @@ func goRestore( batcher *restore.Batcher, errCh chan<- error, ) <-chan restore.TableWithRange { - outCh := make(chan restore.TableWithRange) + outCh := make(chan restore.TableWithRange, 8) go func() { // We cache old tables so that we can 'batch' recover TiFlash and tables. oldTables := []*utils.Table{} newTables := []*model.TableInfo{} defer func() { // when things done, we must clean pending requests. - if err := batcher.Close(); err != nil { + rem, err := batcher.Close() + if err != nil { errCh <- err return } + for _, t := range rem { + outCh <- t + } log.Info("doing postwork", zap.Int("new tables", len(newTables)), zap.Int("old tables", len(oldTables)), @@ -509,12 +513,14 @@ func goRestore( } newTables = append(newTables, t.Table) - if err := batcher.Add(t); err != nil { + sent, err := batcher.Add(t) + if err != nil { errCh <- err return } - - outCh <- t + for _, t := range sent { + outCh <- t + } } } }() From 93b594256b9db3f19c85cda66889e09c7698029b Mon Sep 17 00:00:00 2001 From: Hillium Date: Wed, 29 Apr 2020 17:04:16 +0800 Subject: [PATCH 08/52] restore, task: cancel parallel DDL request. --- pkg/restore/client.go | 66 +++++++++++++++++++---------------- pkg/restore/pipeline_items.go | 20 ++++++++--- pkg/restore/util_test.go | 16 ++++----- pkg/task/restore.go | 9 +++-- tests/br_full_index/workload | 2 +- 5 files changed, 68 insertions(+), 45 deletions(-) diff --git a/pkg/restore/client.go b/pkg/restore/client.go index a576b9cfc..c1c7535b7 100644 --- a/pkg/restore/client.go +++ b/pkg/restore/client.go @@ -30,7 +30,6 @@ import ( "github.com/pingcap/tidb/tablecodec" "github.com/pingcap/tidb/util/codec" "go.uber.org/zap" - "golang.org/x/sync/errgroup" "google.golang.org/grpc" "google.golang.org/grpc/backoff" "google.golang.org/grpc/credentials" @@ -326,6 +325,10 @@ func (rc *Client) CreateTables( } newTables := make([]*model.TableInfo, 0, len(tables)) errCh := make(chan error, 1) + tbMapping := map[string]int{} + for i, t := range tables { + tbMapping[t.Info.Name.String()] = i + } dataCh := rc.GoCreateTables(context.TODO(), dom, tables, newTS, errCh) for et := range dataCh { rules := et.RewriteRule @@ -333,6 +336,11 @@ func (rc *Client) CreateTables( rewriteRules.Data = append(rewriteRules.Data, rules.Data...) newTables = append(newTables, et.Table) } + // Let's ensure that the original order. + sort.Slice(newTables, func(i, j int) bool { + return tbMapping[newTables[i].Name.String()] < tbMapping[newTables[j].Name.String()] + }) + select { case err, ok := <-errCh: if ok { @@ -375,38 +383,36 @@ func (rc *Client) GoCreateTables( ) <-chan CreatedTable { // Could we have a smaller size of tables? outCh := make(chan CreatedTable, len(tables)) + createOneTable := func(t *utils.Table) error { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + rt, err := rc.createTable(dom, t, newTS) + if err != nil { + log.Error("create table failed", + zap.Error(err), + zap.Stringer("table", t.Info.Name), + zap.Stringer("database", t.Db.Name)) + return err + } + log.Debug("table created and send to next", + zap.Int("output chan size", len(outCh)), + zap.Stringer("table", t.Info.Name), + zap.Stringer("database", t.Db.Name)) + outCh <- rt + return nil + } go func() { defer close(outCh) defer log.Info("all tables created") - group, ectx := errgroup.WithContext(ctx) - for _, table := range tables { - t := table - group.Go(func() error { - select { - case <-ectx.Done(): - return ectx.Err() - default: - } - rt, err := rc.createTable(dom, t, newTS) - if err != nil { - log.Error("create table failed", - zap.Error(err), - zap.Stringer("table", t.Info.Name), - zap.Stringer("database", t.Db.Name)) - return err - } - log.Debug("table created and send to next", - zap.Int("output chan size", len(outCh)), - zap.Stringer("table", t.Info.Name), - zap.Stringer("database", t.Db.Name)) - outCh <- rt - return nil - }) - } - if err := group.Wait(); err != nil { - errCh <- err + if err := createOneTable(table); err != nil { + errCh <- err + return + } } }() return outCh @@ -716,9 +722,9 @@ func (rc *Client) switchTiKVMode(ctx context.Context, mode import_sstpb.SwitchMo return nil } -// ValidateChecksum validate checksum after restore +// GoValidateChecksum forks a goroutine to validate checksum after restore. // it returns a channel fires a struct{} when all things get done. -func (rc *Client) ValidateChecksum( +func (rc *Client) GoValidateChecksum( ctx context.Context, tableStream <-chan TableWithRange, kvClient kv.Client, diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index ad624f62d..63a833058 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -6,15 +6,16 @@ import ( "context" "fmt" - "github.com/pingcap/br/pkg/conn" - "github.com/pingcap/br/pkg/glue" - "github.com/pingcap/br/pkg/rtree" - "github.com/pingcap/br/pkg/utils" "github.com/pingcap/errors" "github.com/pingcap/kvproto/pkg/backup" "github.com/pingcap/log" "github.com/pingcap/parser/model" "go.uber.org/zap" + + "github.com/pingcap/br/pkg/conn" + "github.com/pingcap/br/pkg/glue" + "github.com/pingcap/br/pkg/rtree" + "github.com/pingcap/br/pkg/utils" ) // CreatedTable is a table is created on restore process, @@ -45,6 +46,17 @@ type Batcher struct { BatchSizeThreshold int } +// Exhasut drains all remaining errors in the channel, into a slice of errors. +func Exhasut(ec <-chan error) []error { + out := make([]error, 0, len(ec)) + select { + case err := <-ec: + out = append(out, err) + default: + } + return out +} + // Len calculate the current size of this batcher. func (b *Batcher) Len() int { return len(b.currentBatch) diff --git a/pkg/restore/util_test.go b/pkg/restore/util_test.go index edab30c51..1d6f782e7 100644 --- a/pkg/restore/util_test.go +++ b/pkg/restore/util_test.go @@ -39,23 +39,23 @@ func (s *testRestoreUtilSuite) TestGetSSTMetaFromFile(c *C) { } func (s *testRestoreUtilSuite) TestMapTableToFiles(c *C) { - filesOfTable1 := []*backup.File { + filesOfTable1 := []*backup.File{ {Name: "table1-1.sst", StartKey: tablecodec.EncodeTablePrefix(1), - EndKey: tablecodec.EncodeTablePrefix(1)}, + EndKey: tablecodec.EncodeTablePrefix(1)}, {Name: "table1-2.sst", StartKey: tablecodec.EncodeTablePrefix(1), - EndKey: tablecodec.EncodeTablePrefix(1)}, - {Name: "table1-3/sst", + EndKey: tablecodec.EncodeTablePrefix(1)}, + {Name: "table1-3.sst", StartKey: tablecodec.EncodeTablePrefix(1), - EndKey: tablecodec.EncodeTablePrefix(1)},} - filesOfTable2 := []*backup.File { + EndKey: tablecodec.EncodeTablePrefix(1)}} + filesOfTable2 := []*backup.File{ {Name: "table2-1.sst", StartKey: tablecodec.EncodeTablePrefix(2), - EndKey: tablecodec.EncodeTablePrefix(2)}, + EndKey: tablecodec.EncodeTablePrefix(2)}, {Name: "table2-2.sst", StartKey: tablecodec.EncodeTablePrefix(2), - EndKey: tablecodec.EncodeTablePrefix(2)}, + EndKey: tablecodec.EncodeTablePrefix(2)}, } result := MapTableToFiles(append(filesOfTable2, filesOfTable1...)) diff --git a/pkg/task/restore.go b/pkg/task/restore.go index 5eea5df68..b7ac41dc6 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -13,6 +13,7 @@ import ( "github.com/pingcap/tidb-tools/pkg/filter" "github.com/pingcap/tidb/config" "github.com/spf13/pflag" + "go.uber.org/multierr" "go.uber.org/zap" "github.com/pingcap/br/pkg/conn" @@ -219,12 +220,16 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf afterRestoreStream := goRestore(ctx, rangeStream, placementRules, client, batcher, errCh) // Checksum + // TODO: skip checksum when user specificated. + // TODO: allow checksum progress bar can appear together with each other. + // For now, we have to redirect one of them. updateCh = g.StartProgress( - ctx, "Checksum", int64(len(tables)), !cfg.LogProgress) - out := client.ValidateChecksum( + ctx, "Checksum", int64(len(tables)), true) + out := client.GoValidateChecksum( ctx, afterRestoreStream, mgr.GetTiKV().GetClient(), errCh, updateCh) select { case err = <-errCh: + err = multierr.Append(err, multierr.Combine(restore.Exhasut(errCh)...)) case <-out: log.Info("all works end.") } diff --git a/tests/br_full_index/workload b/tests/br_full_index/workload index 84335df96..448ca3c1a 100644 --- a/tests/br_full_index/workload +++ b/tests/br_full_index/workload @@ -1,4 +1,4 @@ -recordcount=100 +recordcount=1000 operationcount=0 workload=core From f3ec5ee15946488045da804f66bd44b1de7277a1 Mon Sep 17 00:00:00 2001 From: Hillium Date: Wed, 29 Apr 2020 20:13:35 +0800 Subject: [PATCH 09/52] restore: restore will now send batch periodly. --- pkg/restore/pipeline_items.go | 97 +++++++++++++++++++++++++++-------- pkg/task/restore.go | 28 +++------- 2 files changed, 82 insertions(+), 43 deletions(-) diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index 63a833058..aa07a5ba2 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -5,6 +5,7 @@ package restore import ( "context" "fmt" + "time" "github.com/pingcap/errors" "github.com/pingcap/kvproto/pkg/backup" @@ -18,6 +19,10 @@ import ( "github.com/pingcap/br/pkg/utils" ) +const ( + defaultBatcherOutputChannelSize = 1024 +) + // CreatedTable is a table is created on restore process, // but not yet filled by data. type CreatedTable struct { @@ -41,6 +46,9 @@ type Batcher struct { ctx context.Context client *Client + sender chan<- struct{} + sendErr chan<- error + outCh chan<- TableWithRange rejectStoreMap map[uint64]bool updateCh glue.Progress BatchSizeThreshold int @@ -63,11 +71,13 @@ func (b *Batcher) Len() int { } // NewBatcher creates a new batcher by client and updateCh. +// this batcher will work background, send batches per second, or batch size reaches limit. func NewBatcher( ctx context.Context, client *Client, updateCh glue.Progress, -) *Batcher { + errCh chan<- error, +) (*Batcher, <-chan TableWithRange) { tiflashStores, err := conn.GetAllTiKVStores(ctx, client.GetPDClient(), conn.TiFlashOnly) if err != nil { // After TiFlash support restore, we can remove this panic. @@ -78,15 +88,56 @@ func NewBatcher( for _, store := range tiflashStores { rejectStoreMap[store.GetId()] = true } - - return &Batcher{ + // use block channel here for forbid send table unexpectly. + mailbox := make(chan struct{}) + output := make(chan TableWithRange, defaultBatcherOutputChannelSize) + b := &Batcher{ rewriteRules: EmptyRewriteRule(), client: client, rejectStoreMap: rejectStoreMap, updateCh: updateCh, + sender: mailbox, + sendErr: errCh, + outCh: output, ctx: ctx, BatchSizeThreshold: 1, } + go b.workLoop(mailbox) + return b, output +} + +func (b *Batcher) workLoop(mailbox <-chan struct{}) { + tick := time.NewTicker(time.Second) + defer tick.Stop() + for { + select { + case <-b.ctx.Done(): + b.sendErr <- b.ctx.Err() + return + case _, ok := <-mailbox: + if !ok { + return + } + log.Info("sending batch because batcher is full", zap.Int("size", b.Len())) + b.asyncSend() + case <-tick.C: + if b.Len() > 0 { + log.Info("sending batch because time limit exceed", zap.Int("size", b.Len())) + b.asyncSend() + } + } + } +} + +func (b *Batcher) asyncSend() { + tbls, err := b.Send() + if err != nil { + b.sendErr <- err + return + } + for _, t := range tbls { + b.outCh <- t + } } func (b *Batcher) splitPoint() int { @@ -99,6 +150,9 @@ func (b *Batcher) splitPoint() int { // drainSentTables drains the table just sent. // note that this function assumes you call it only after a sent of bench. +// WARN: we make a very strong assertion here: any time we will just 'split' at the last table. +// NOTE: if you meet a problem like 'failed to checksum' when everything is alright, check this. +// TODO: remove Batcher::currentBatch, collect currentBatch each time when call this. func (b *Batcher) drainSentTables() (drained []TableWithRange) { if b.Len() == 0 { drained, b.cachedTables = b.cachedTables, []TableWithRange{} @@ -116,10 +170,6 @@ func (b *Batcher) Send() ([]TableWithRange, error) { ranges, b.currentBatch = b.currentBatch[:b.splitPoint()], b.currentBatch[b.splitPoint():] tbs := b.drainSentTables() var tableNames []string - for _, t := range tbs { - tableNames = append(tableNames, fmt.Sprintf("%s.%s", t.OldTable.Db.Name, t.OldTable.Info.Name)) - } - log.Info("prepare split range by tables", zap.Strings("tables", tableNames)) if err := SplitRanges(b.ctx, b.client, ranges, b.rewriteRules, b.updateCh); err != nil { log.Error("failed on split range", @@ -133,27 +183,31 @@ func (b *Batcher) Send() ([]TableWithRange, error) { for _, fs := range ranges { files = append(files, fs.Files...) } - log.Info("send batch", - zap.Int("range count", len(ranges)), - zap.Int("file count", len(files)), - ) + + for _, t := range tbs { + tableNames = append(tableNames, fmt.Sprintf("%s.%s", t.OldTable.Db.Name, t.OldTable.Info.Name)) + } + log.Debug("split range by tables done", zap.Strings("tables", tableNames)) + if err := b.client.RestoreFiles(files, b.rewriteRules, b.rejectStoreMap, b.updateCh); err != nil { return nil, err } - + log.Debug("send batch done", + zap.Int("range count", len(ranges)), + zap.Int("file count", len(files)), + ) return tbs, nil } -func (b *Batcher) sendIfFull() ([]TableWithRange, error) { +func (b *Batcher) sendIfFull() { if b.Len() >= b.BatchSizeThreshold { - return b.Send() + b.sender <- struct{}{} } - return []TableWithRange{}, nil } // Add addes a task to bather. -func (b *Batcher) Add(tbs TableWithRange) ([]TableWithRange, error) { - log.Info("adding table to batch", +func (b *Batcher) Add(tbs TableWithRange) { + log.Debug("adding table to batch", zap.Stringer("table", tbs.Table.Name), zap.Stringer("database", tbs.OldTable.Db.Name), zap.Int64("old id", tbs.OldTable.Info.ID), @@ -163,11 +217,12 @@ func (b *Batcher) Add(tbs TableWithRange) ([]TableWithRange, error) { b.currentBatch = append(b.currentBatch, tbs.Range...) b.cachedTables = append(b.cachedTables, tbs) b.rewriteRules.Append(*tbs.RewriteRule) - return b.sendIfFull() } // Close closes the batcher, sending all pending requests, close updateCh. -func (b *Batcher) Close() ([]TableWithRange, error) { - defer b.updateCh.Close() - return b.Send() +func (b *Batcher) Close() { + b.asyncSend() + close(b.outCh) + close(b.sender) + b.updateCh.Close() } diff --git a/pkg/task/restore.go b/pkg/task/restore.go index b7ac41dc6..59df07764 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -215,9 +215,9 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf // Restore sst files in batch. batchSize := utils.MinInt(int(cfg.Concurrency), maxRestoreBatchSizeLimit) - batcher := restore.NewBatcher(ctx, client, updateCh) + batcher, afterRestoreStream := restore.NewBatcher(ctx, client, updateCh, errCh) batcher.BatchSizeThreshold = batchSize - afterRestoreStream := goRestore(ctx, rangeStream, placementRules, client, batcher, errCh) + goRestore(ctx, rangeStream, placementRules, client, batcher, errCh) // Checksum // TODO: skip checksum when user specificated. @@ -462,22 +462,14 @@ func goRestore( client *restore.Client, batcher *restore.Batcher, errCh chan<- error, -) <-chan restore.TableWithRange { - outCh := make(chan restore.TableWithRange, 8) +) { go func() { // We cache old tables so that we can 'batch' recover TiFlash and tables. oldTables := []*utils.Table{} newTables := []*model.TableInfo{} defer func() { // when things done, we must clean pending requests. - rem, err := batcher.Close() - if err != nil { - errCh <- err - return - } - for _, t := range rem { - outCh <- t - } + batcher.Close() log.Info("doing postwork", zap.Int("new tables", len(newTables)), zap.Int("old tables", len(oldTables)), @@ -490,13 +482,13 @@ func goRestore( log.Error("failed on recover TiFlash replicas", zap.Error(err)) errCh <- err } - close(outCh) }() for { select { case <-ctx.Done(): errCh <- ctx.Err() + return case t, ok := <-inputCh: if !ok { return @@ -518,16 +510,8 @@ func goRestore( } newTables = append(newTables, t.Table) - sent, err := batcher.Add(t) - if err != nil { - errCh <- err - return - } - for _, t := range sent { - outCh <- t - } + batcher.Add(t) } } }() - return outCh } From bb31ec04035370891fd305bfa61e097fda78c5ce Mon Sep 17 00:00:00 2001 From: Hillium Date: Fri, 8 May 2020 14:50:36 +0800 Subject: [PATCH 10/52] restore: refactor batcher. --- .gitignore | 1 + pkg/restore/batcher_test.go | 1 + pkg/restore/client.go | 4 +- pkg/restore/pipeline_items.go | 196 ++++++++++++++++++++++------------ pkg/task/restore.go | 6 +- 5 files changed, 138 insertions(+), 70 deletions(-) create mode 100644 pkg/restore/batcher_test.go diff --git a/.gitignore b/.gitignore index cf88895a3..66fe8f9aa 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ coverage.txt docker/data/ docker/logs/ *.swp +.DS_Store diff --git a/pkg/restore/batcher_test.go b/pkg/restore/batcher_test.go new file mode 100644 index 000000000..9b70a15f0 --- /dev/null +++ b/pkg/restore/batcher_test.go @@ -0,0 +1 @@ +package restore diff --git a/pkg/restore/client.go b/pkg/restore/client.go index c1c7535b7..a5487aca3 100644 --- a/pkg/restore/client.go +++ b/pkg/restore/client.go @@ -726,7 +726,7 @@ func (rc *Client) switchTiKVMode(ctx context.Context, mode import_sstpb.SwitchMo // it returns a channel fires a struct{} when all things get done. func (rc *Client) GoValidateChecksum( ctx context.Context, - tableStream <-chan TableWithRange, + tableStream <-chan CreatedTable, kvClient kv.Client, errCh chan<- error, updateCh glue.Progress, @@ -755,7 +755,7 @@ func (rc *Client) GoValidateChecksum( return } workers.Apply(func() { - err := rc.execChecksum(ctx, tbl.CreatedTable, kvClient) + err := rc.execChecksum(ctx, tbl, kvClient) if err != nil { errCh <- err } diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index aa07a5ba2..5ddec4e49 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -40,17 +40,14 @@ type TableWithRange struct { // Batcher collectes ranges to restore and send batching split/ingest request. type Batcher struct { - currentBatch []rtree.Range cachedTables []TableWithRange rewriteRules *RewriteRules ctx context.Context - client *Client - sender chan<- struct{} + sendTrigger chan<- struct{} sendErr chan<- error - outCh chan<- TableWithRange - rejectStoreMap map[uint64]bool - updateCh glue.Progress + outCh chan<- CreatedTable + sender BatchSender BatchSizeThreshold int } @@ -67,38 +64,101 @@ func Exhasut(ec <-chan error) []error { // Len calculate the current size of this batcher. func (b *Batcher) Len() int { - return len(b.currentBatch) + result := 0 + for _, tbl := range b.cachedTables { + result += len(tbl.Range) + } + return result } -// NewBatcher creates a new batcher by client and updateCh. -// this batcher will work background, send batches per second, or batch size reaches limit. -func NewBatcher( - ctx context.Context, - client *Client, - updateCh glue.Progress, - errCh chan<- error, -) (*Batcher, <-chan TableWithRange) { - tiflashStores, err := conn.GetAllTiKVStores(ctx, client.GetPDClient(), conn.TiFlashOnly) +// BatchSender is the abstract of how the batcher send a batch. +type BatchSender interface { + // RestoreBatch will backup all ranges and tables + RestoreBatch(ranges []rtree.Range, rewriteRules *RewriteRules, tbs []CreatedTable) error + Close() +} + +type tikvSender struct { + client *Client + updateCh glue.Progress + ctx context.Context + rejectStoreMap map[uint64]bool +} + +// NewTiKVSender make a sender that send restore requests to TiKV. +func NewTiKVSender(ctx context.Context, cli *Client, updateCh glue.Progress) (BatchSender, error) { + tiflashStores, err := conn.GetAllTiKVStores(ctx, cli.GetPDClient(), conn.TiFlashOnly) if err != nil { // After TiFlash support restore, we can remove this panic. // The origin of this panic is at RunRestore, and its semantic is nearing panic, don't worry about it. - log.Panic("failed to get and remove TiFlash replicas", zap.Error(errors.Trace(err))) + log.Error("failed to get and remove TiFlash replicas", zap.Error(errors.Trace(err))) + return nil, err } rejectStoreMap := make(map[uint64]bool) for _, store := range tiflashStores { rejectStoreMap[store.GetId()] = true } + + return &tikvSender{ + client: cli, + updateCh: updateCh, + ctx: ctx, + rejectStoreMap: rejectStoreMap, + }, nil +} + +func (b *tikvSender) RestoreBatch(ranges []rtree.Range, rewriteRules *RewriteRules, tbs []CreatedTable) error { + if err := SplitRanges(b.ctx, b.client, ranges, rewriteRules, b.updateCh); err != nil { + log.Error("failed on split range", + zap.Any("ranges", ranges), + zap.Error(err), + ) + return err + } + + files := []*backup.File{} + for _, fs := range ranges { + files = append(files, fs.Files...) + } + + var tableNames []string + for _, t := range tbs { + tableNames = append(tableNames, fmt.Sprintf("%s.%s", t.OldTable.Db.Name, t.OldTable.Info.Name)) + } + log.Debug("split range by tables done", zap.Strings("tables", tableNames)) + + if err := b.client.RestoreFiles(files, rewriteRules, b.rejectStoreMap, b.updateCh); err != nil { + return err + } + log.Debug("send batch done", + zap.Int("range count", len(ranges)), + zap.Int("file count", len(files)), + ) + + return nil +} + +func (b *tikvSender) Close() { + b.updateCh.Close() +} + +// NewBatcher creates a new batcher by client and updateCh. +// this batcher will work background, send batches per second, or batch size reaches limit. +// and it will emit full-restored tables to the output channel returned. +func NewBatcher( + ctx context.Context, + sender BatchSender, + errCh chan<- error, +) (*Batcher, <-chan CreatedTable) { // use block channel here for forbid send table unexpectly. mailbox := make(chan struct{}) - output := make(chan TableWithRange, defaultBatcherOutputChannelSize) + output := make(chan CreatedTable, defaultBatcherOutputChannelSize) b := &Batcher{ rewriteRules: EmptyRewriteRule(), - client: client, - rejectStoreMap: rejectStoreMap, - updateCh: updateCh, - sender: mailbox, + sendTrigger: mailbox, sendErr: errCh, outCh: output, + sender: sender, ctx: ctx, BatchSizeThreshold: 1, } @@ -148,81 +208,83 @@ func (b *Batcher) splitPoint() int { return splitPoint } -// drainSentTables drains the table just sent. -// note that this function assumes you call it only after a sent of bench. -// WARN: we make a very strong assertion here: any time we will just 'split' at the last table. -// NOTE: if you meet a problem like 'failed to checksum' when everything is alright, check this. -// TODO: remove Batcher::currentBatch, collect currentBatch each time when call this. -func (b *Batcher) drainSentTables() (drained []TableWithRange) { - if b.Len() == 0 { - drained, b.cachedTables = b.cachedTables, []TableWithRange{} - return - } - cachedLen := len(b.cachedTables) - drained, b.cachedTables = b.cachedTables[:cachedLen-1], b.cachedTables[cachedLen-1:] - return -} +func (b *Batcher) drainRanges() (ranges []rtree.Range, emptyTables []CreatedTable) { + for offset, thisTable := range b.cachedTables { + thisTableLen := len(thisTable.Range) + collected := len(ranges) -// Send sends all pending requests in the batcher. -// returns tables sent in the current batch. -func (b *Batcher) Send() ([]TableWithRange, error) { - var ranges []rtree.Range - ranges, b.currentBatch = b.currentBatch[:b.splitPoint()], b.currentBatch[b.splitPoint():] - tbs := b.drainSentTables() - var tableNames []string + // the batch is full, we should stop here! + // we use strictly greater than because when we send a batch at equal, the offset should plus one. + // (because the last table is sent, we should put it in emptyTables), and this will intrduce extra complex. + if thisTableLen+collected > b.BatchSizeThreshold { + drainSize := b.BatchSizeThreshold - collected + thisTableRanges := thisTable.Range - if err := SplitRanges(b.ctx, b.client, ranges, b.rewriteRules, b.updateCh); err != nil { - log.Error("failed on split range", - zap.Any("ranges", ranges), - zap.Error(err), - ) - return nil, err - } + var drained []rtree.Range + drained, b.cachedTables[offset].Range = thisTableRanges[:drainSize], thisTableRanges[drainSize:] + log.Debug("draining partial table to batch", + zap.Stringer("table", thisTable.Table.Name), + zap.Stringer("database", thisTable.OldTable.Db.Name), + zap.Int("size", thisTableLen), + zap.Int("drained", drainSize), + ) + ranges = append(ranges, drained...) + b.cachedTables = b.cachedTables[offset:] + return + } - files := []*backup.File{} - for _, fs := range ranges { - files = append(files, fs.Files...) + emptyTables = append(emptyTables, b.cachedTables[offset].CreatedTable) + // let 'drain' the ranges of current table. This op must not make the batch full. + ranges = append(ranges, b.cachedTables[offset].Range...) + // clear the table + b.cachedTables[offset].Range = []rtree.Range{} + log.Debug("draining table to batch", + zap.Stringer("table", thisTable.Table.Name), + zap.Stringer("database", thisTable.OldTable.Db.Name), + zap.Int("size", thisTableLen), + ) } - for _, t := range tbs { - tableNames = append(tableNames, fmt.Sprintf("%s.%s", t.OldTable.Db.Name, t.OldTable.Info.Name)) - } - log.Debug("split range by tables done", zap.Strings("tables", tableNames)) + // all tables are drained. + b.cachedTables = []TableWithRange{} + return +} - if err := b.client.RestoreFiles(files, b.rewriteRules, b.rejectStoreMap, b.updateCh); err != nil { +// Send sends all pending requests in the batcher. +// returns tables sent in the current batch. +func (b *Batcher) Send() ([]CreatedTable, error) { + ranges, tbs := b.drainRanges() + if err := b.sender.RestoreBatch(ranges, b.rewriteRules, tbs); err != nil { return nil, err } - log.Debug("send batch done", - zap.Int("range count", len(ranges)), - zap.Int("file count", len(files)), - ) return tbs, nil } func (b *Batcher) sendIfFull() { if b.Len() >= b.BatchSizeThreshold { - b.sender <- struct{}{} + b.sendTrigger <- struct{}{} } } -// Add addes a task to bather. +// Add addes a task to batcher. func (b *Batcher) Add(tbs TableWithRange) { log.Debug("adding table to batch", zap.Stringer("table", tbs.Table.Name), zap.Stringer("database", tbs.OldTable.Db.Name), zap.Int64("old id", tbs.OldTable.Info.ID), zap.Int64("new id", tbs.Table.ID), + zap.Int("table size", len(tbs.Range)), zap.Int("batch size", b.Len()), ) - b.currentBatch = append(b.currentBatch, tbs.Range...) b.cachedTables = append(b.cachedTables, tbs) b.rewriteRules.Append(*tbs.RewriteRule) } // Close closes the batcher, sending all pending requests, close updateCh. func (b *Batcher) Close() { + log.Info("sending batch lastly on close.", zap.Int("size", b.Len())) b.asyncSend() close(b.outCh) - close(b.sender) - b.updateCh.Close() + close(b.sendTrigger) + b.sender.Close() } diff --git a/pkg/task/restore.go b/pkg/task/restore.go index 59df07764..b5a04de00 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -215,7 +215,11 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf // Restore sst files in batch. batchSize := utils.MinInt(int(cfg.Concurrency), maxRestoreBatchSizeLimit) - batcher, afterRestoreStream := restore.NewBatcher(ctx, client, updateCh, errCh) + sender, err := restore.NewTiKVSender(ctx, client, updateCh) + if err != nil { + return err + } + batcher, afterRestoreStream := restore.NewBatcher(ctx, sender, errCh) batcher.BatchSizeThreshold = batchSize goRestore(ctx, rangeStream, placementRules, client, batcher, errCh) From d54b99c86a8438962e65a5de4fa47c6758545706 Mon Sep 17 00:00:00 2001 From: Hillium Date: Fri, 8 May 2020 16:59:05 +0800 Subject: [PATCH 11/52] restore: add tests on batcher. --- pkg/restore/batcher_test.go | 213 ++++++++++++++++++++++++++++++++++ pkg/restore/pipeline_items.go | 52 +++++---- 2 files changed, 241 insertions(+), 24 deletions(-) diff --git a/pkg/restore/batcher_test.go b/pkg/restore/batcher_test.go index 9b70a15f0..92003dcdc 100644 --- a/pkg/restore/batcher_test.go +++ b/pkg/restore/batcher_test.go @@ -1 +1,214 @@ +// Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. + package restore + +import ( + "context" + "time" + + "github.com/pingcap/br/pkg/rtree" + "github.com/pingcap/br/pkg/utils" + . "github.com/pingcap/check" + "github.com/pingcap/errors" + "github.com/pingcap/log" + "github.com/pingcap/parser/model" + "go.uber.org/zap" +) + +type testBatcherSuite struct{} + +type drySender struct { + tbls chan CreatedTable + ranges chan rtree.Range + nBatch int +} + +func (d *drySender) RestoreBatch(ranges []rtree.Range, rewriteRules *RewriteRules, tbs []CreatedTable) error { + d.nBatch++ + for _, tbl := range tbs { + log.Info("dry restore", zap.Int64("table ID", tbl.Table.ID)) + d.tbls <- tbl + } + for _, rng := range ranges { + d.ranges <- rng + } + return nil +} + +func (d *drySender) Close() { + close(d.tbls) + close(d.ranges) +} + +func (d *drySender) exhaust() (tbls []CreatedTable, rngs []rtree.Range) { + for tbl := range d.tbls { + tbls = append(tbls, tbl) + } + for rng := range d.ranges { + rngs = append(rngs, rng) + } + return +} + +func newDrySender() *drySender { + return &drySender{ + tbls: make(chan CreatedTable, 4096), + ranges: make(chan rtree.Range, 4096), + } +} + +func (d *drySender) RangeLen() int { + return len(d.ranges) +} + +func (d *drySender) TableLen() int { + return len(d.tbls) +} + +func (d *drySender) BatchCount() int { + return d.nBatch +} + +var ( + _ = Suite(&testBatcherSuite{}) +) + +func fakeTableWithRange(id int64, rngs []rtree.Range) TableWithRange { + tbl := &utils.Table{ + Db: &model.DBInfo{}, + Info: &model.TableInfo{ + ID: id, + }, + } + tblWithRng := TableWithRange{ + CreatedTable: CreatedTable{ + RewriteRule: EmptyRewriteRule(), + Table: tbl.Info, + OldTable: tbl, + }, + Range: rngs, + } + return tblWithRng +} + +func fakeRange(startKey, endKey string) rtree.Range { + return rtree.Range{ + StartKey: []byte(startKey), + EndKey: []byte(endKey), + } +} + +// TestBasic tests basic workflow of batcher. +func (*testBatcherSuite) TestBasic(c *C) { + errCh := make(chan error, 8) + sender := newDrySender() + batcher, _ := NewBatcher(context.Background(), sender, errCh) + batcher.BatchSizeThreshold = 2 + + simpleTables := []TableWithRange{ + fakeTableWithRange(1, []rtree.Range{fakeRange("aaa", "aab")}), + fakeTableWithRange(2, []rtree.Range{fakeRange("baa", "bab"), fakeRange("bac", "bad")}), + fakeTableWithRange(3, []rtree.Range{fakeRange("caa", "cab"), fakeRange("cac", "cad")}), + } + + for _, tbl := range simpleTables { + batcher.Add(tbl) + } + + batcher.Close() + tbls, rngs := sender.exhaust() + totalRngs := []rtree.Range{} + + c.Assert(len(tbls), Equals, len(simpleTables)) + for i, tbl := range simpleTables { + c.Assert(tbls[i], DeepEquals, tbl.CreatedTable) + totalRngs = append(totalRngs, tbl.Range...) + } + + c.Assert(totalRngs, DeepEquals, rngs) + select { + case err := <-errCh: + c.Fatal(errors.Trace(err)) + default: + } +} + +func (*testBatcherSuite) TestAutoSend(c *C) { + errCh := make(chan error, 8) + sender := newDrySender() + batcher, _ := NewBatcher(context.Background(), sender, errCh) + batcher.BatchSizeThreshold = 1024 + + simpleTable := fakeTableWithRange(1, []rtree.Range{fakeRange("caa", "cab"), fakeRange("cac", "cad")}) + + batcher.Add(simpleTable) + // wait until auto send. + time.Sleep(1300 * time.Millisecond) + c.Assert(sender.RangeLen(), Greater, 0) + c.Assert(sender.TableLen(), Greater, 0) + c.Assert(batcher.Len(), Equals, 0) + + batcher.Close() + + tbls, rngs := sender.exhaust() + c.Assert(len(tbls), Greater, 0) + c.Assert(rngs, DeepEquals, simpleTable.Range) + c.Assert(tbls[0], DeepEquals, simpleTable.CreatedTable) + select { + case err := <-errCh: + c.Fatal(errors.Trace(err)) + default: + } +} + +func (*testBatcherSuite) TestSplitRangeOnSameTable(c *C) { + errCh := make(chan error, 8) + sender := newDrySender() + batcher, _ := NewBatcher(context.Background(), sender, errCh) + batcher.BatchSizeThreshold = 2 + + simpleTable := fakeTableWithRange(1, []rtree.Range{ + fakeRange("caa", "cab"), fakeRange("cac", "cad"), + fakeRange("cae", "caf"), fakeRange("cag", "cai"), + fakeRange("caj", "cak"), fakeRange("cal", "cam"), + fakeRange("can", "cao"), fakeRange("cap", "caq")}) + + batcher.Add(simpleTable) + c.Assert(sender.BatchCount(), Equals, 4) + + batcher.Close() + + tbls, rngs := sender.exhaust() + c.Assert(len(tbls), Greater, 0) + c.Assert(rngs, DeepEquals, simpleTable.Range) + c.Assert(tbls[0], DeepEquals, simpleTable.CreatedTable) + select { + case err := <-errCh: + c.Fatal(errors.Trace(err)) + default: + } +} + +func (*testBatcherSuite) TestBatcherLen(c *C) { + errCh := make(chan error, 8) + sender := newDrySender() + batcher, _ := NewBatcher(context.Background(), sender, errCh) + batcher.BatchSizeThreshold = 1024 + + simpleTable := fakeTableWithRange(1, []rtree.Range{ + fakeRange("caa", "cab"), fakeRange("cac", "cad"), + fakeRange("cae", "caf"), fakeRange("cag", "cai"), + fakeRange("caj", "cak"), fakeRange("cal", "cam"), + fakeRange("can", "cao"), fakeRange("cap", "caq")}) + + batcher.Add(simpleTable) + c.Assert(batcher.Len(), Equals, 8) + batcher.Close() + c.Assert(batcher.Len(), Equals, 0) + + select { + case err := <-errCh: + c.Fatal(errors.Trace(err)) + default: + } +} diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index 5ddec4e49..bc4a9a8da 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -5,6 +5,8 @@ package restore import ( "context" "fmt" + "sync" + "sync/atomic" "time" "github.com/pingcap/errors" @@ -40,15 +42,16 @@ type TableWithRange struct { // Batcher collectes ranges to restore and send batching split/ingest request. type Batcher struct { - cachedTables []TableWithRange - rewriteRules *RewriteRules + cachedTables []TableWithRange + cachedTablesMu *sync.Mutex + rewriteRules *RewriteRules ctx context.Context - sendTrigger chan<- struct{} sendErr chan<- error outCh chan<- CreatedTable sender BatchSender BatchSizeThreshold int + size int32 } // Exhasut drains all remaining errors in the channel, into a slice of errors. @@ -64,11 +67,7 @@ func Exhasut(ec <-chan error) []error { // Len calculate the current size of this batcher. func (b *Batcher) Len() int { - result := 0 - for _, tbl := range b.cachedTables { - result += len(tbl.Range) - } - return result + return int(atomic.LoadInt32(&b.size)) } // BatchSender is the abstract of how the batcher send a batch. @@ -150,23 +149,21 @@ func NewBatcher( sender BatchSender, errCh chan<- error, ) (*Batcher, <-chan CreatedTable) { - // use block channel here for forbid send table unexpectly. - mailbox := make(chan struct{}) output := make(chan CreatedTable, defaultBatcherOutputChannelSize) b := &Batcher{ rewriteRules: EmptyRewriteRule(), - sendTrigger: mailbox, sendErr: errCh, outCh: output, sender: sender, ctx: ctx, + cachedTablesMu: new(sync.Mutex), BatchSizeThreshold: 1, } - go b.workLoop(mailbox) + go b.workLoop() return b, output } -func (b *Batcher) workLoop(mailbox <-chan struct{}) { +func (b *Batcher) workLoop() { tick := time.NewTicker(time.Second) defer tick.Stop() for { @@ -174,12 +171,6 @@ func (b *Batcher) workLoop(mailbox <-chan struct{}) { case <-b.ctx.Done(): b.sendErr <- b.ctx.Err() return - case _, ok := <-mailbox: - if !ok { - return - } - log.Info("sending batch because batcher is full", zap.Int("size", b.Len())) - b.asyncSend() case <-tick.C: if b.Len() > 0 { log.Info("sending batch because time limit exceed", zap.Int("size", b.Len())) @@ -209,6 +200,9 @@ func (b *Batcher) splitPoint() int { } func (b *Batcher) drainRanges() (ranges []rtree.Range, emptyTables []CreatedTable) { + b.cachedTablesMu.Lock() + defer b.cachedTablesMu.Unlock() + for offset, thisTable := range b.cachedTables { thisTableLen := len(thisTable.Range) collected := len(ranges) @@ -230,13 +224,14 @@ func (b *Batcher) drainRanges() (ranges []rtree.Range, emptyTables []CreatedTabl ) ranges = append(ranges, drained...) b.cachedTables = b.cachedTables[offset:] + atomic.AddInt32(&b.size, -int32(len(ranges))) return } emptyTables = append(emptyTables, b.cachedTables[offset].CreatedTable) // let 'drain' the ranges of current table. This op must not make the batch full. ranges = append(ranges, b.cachedTables[offset].Range...) - // clear the table + // clear the table length. b.cachedTables[offset].Range = []rtree.Range{} log.Debug("draining table to batch", zap.Stringer("table", thisTable.Table.Name), @@ -247,6 +242,7 @@ func (b *Batcher) drainRanges() (ranges []rtree.Range, emptyTables []CreatedTabl // all tables are drained. b.cachedTables = []TableWithRange{} + atomic.AddInt32(&b.size, -int32(len(ranges))) return } @@ -261,13 +257,16 @@ func (b *Batcher) Send() ([]CreatedTable, error) { } func (b *Batcher) sendIfFull() { - if b.Len() >= b.BatchSizeThreshold { - b.sendTrigger <- struct{}{} + // never collect the send batch request message. + for b.Len() >= b.BatchSizeThreshold { + log.Info("sending batch because batcher is full", zap.Int("size", b.Len())) + b.asyncSend() } } // Add addes a task to batcher. func (b *Batcher) Add(tbs TableWithRange) { + b.cachedTablesMu.Lock() log.Debug("adding table to batch", zap.Stringer("table", tbs.Table.Name), zap.Stringer("database", tbs.OldTable.Db.Name), @@ -278,13 +277,18 @@ func (b *Batcher) Add(tbs TableWithRange) { ) b.cachedTables = append(b.cachedTables, tbs) b.rewriteRules.Append(*tbs.RewriteRule) + atomic.AddInt32(&b.size, int32(len(tbs.Range))) + b.cachedTablesMu.Unlock() + + b.sendIfFull() } // Close closes the batcher, sending all pending requests, close updateCh. func (b *Batcher) Close() { log.Info("sending batch lastly on close.", zap.Int("size", b.Len())) - b.asyncSend() + for b.Len() > 0 { + b.asyncSend() + } close(b.outCh) - close(b.sendTrigger) b.sender.Close() } From 4c4a3d8c91bf789d23c47f53c3f1009306114a87 Mon Sep 17 00:00:00 2001 From: Hillium Date: Fri, 8 May 2020 17:05:11 +0800 Subject: [PATCH 12/52] restore, task: make linter happy. --- go.mod | 1 - pkg/restore/batcher_test.go | 5 +++-- pkg/restore/pipeline_items.go | 8 -------- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/go.mod b/go.mod index 849a29148..986893afe 100644 --- a/go.mod +++ b/go.mod @@ -37,7 +37,6 @@ require ( go.opencensus.io v0.22.2 // indirect go.uber.org/zap v1.14.1 golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45 - golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e google.golang.org/api v0.14.0 google.golang.org/grpc v1.25.1 ) diff --git a/pkg/restore/batcher_test.go b/pkg/restore/batcher_test.go index 92003dcdc..14953246a 100644 --- a/pkg/restore/batcher_test.go +++ b/pkg/restore/batcher_test.go @@ -6,13 +6,14 @@ import ( "context" "time" - "github.com/pingcap/br/pkg/rtree" - "github.com/pingcap/br/pkg/utils" . "github.com/pingcap/check" "github.com/pingcap/errors" "github.com/pingcap/log" "github.com/pingcap/parser/model" "go.uber.org/zap" + + "github.com/pingcap/br/pkg/rtree" + "github.com/pingcap/br/pkg/utils" ) type testBatcherSuite struct{} diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index bc4a9a8da..7d7b0a1d4 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -191,14 +191,6 @@ func (b *Batcher) asyncSend() { } } -func (b *Batcher) splitPoint() int { - splitPoint := b.BatchSizeThreshold - if splitPoint > b.Len() { - return b.Len() - } - return splitPoint -} - func (b *Batcher) drainRanges() (ranges []rtree.Range, emptyTables []CreatedTable) { b.cachedTablesMu.Lock() defer b.cachedTablesMu.Unlock() From 3cffa5d7d32300217d6307957dd308e8bb24c562 Mon Sep 17 00:00:00 2001 From: Hillium Date: Fri, 8 May 2020 17:31:52 +0800 Subject: [PATCH 13/52] *: add dep to multierr. --- go.mod | 1 + 1 file changed, 1 insertion(+) diff --git a/go.mod b/go.mod index 986893afe..cfdfc1939 100644 --- a/go.mod +++ b/go.mod @@ -35,6 +35,7 @@ require ( github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5 // indirect go.etcd.io/etcd v0.5.0-alpha.5.0.20191023171146-3cf2f69b5738 go.opencensus.io v0.22.2 // indirect + go.uber.org/multierr v1.5.0 go.uber.org/zap v1.14.1 golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45 google.golang.org/api v0.14.0 From 59e33b0ab612d9135494390111b69c750156c7a3 Mon Sep 17 00:00:00 2001 From: Hillium Date: Fri, 8 May 2020 18:09:28 +0800 Subject: [PATCH 14/52] task: adjust to new function sig. --- pkg/task/restore.go | 11 +-- tests/config/tidb.toml | 162 ++--------------------------------------- 2 files changed, 6 insertions(+), 167 deletions(-) diff --git a/pkg/task/restore.go b/pkg/task/restore.go index d993224c0..a9e875818 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -258,12 +258,6 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf log.Info("all works end.") } - // Always run the post-work even on error, so we don't stuck in the import - // mode or emptied schedulers - if errRestorePostWork := restorePostWork(ctx, client, mgr, clusterCfg); err == nil { - err = errRestorePostWork - } - // If any error happened, return now, don't execute checksum. if err != nil { return err @@ -589,10 +583,7 @@ func goRestore( zap.Int("new tables", len(newTables)), zap.Int("old tables", len(oldTables)), ) - if err := splitPostWork(ctx, client, newTables); err != nil { - log.Error("failed on unset online restore placement rules", zap.Error(err)) - errCh <- err - } + splitPostWork(ctx, client, newTables) if err := client.RecoverTiFlashReplica(oldTables); err != nil { log.Error("failed on recover TiFlash replicas", zap.Error(err)) errCh <- err diff --git a/tests/config/tidb.toml b/tests/config/tidb.toml index b432842b1..b4a2289f1 100644 --- a/tests/config/tidb.toml +++ b/tests/config/tidb.toml @@ -1,158 +1,6 @@ -host = "0.0.0.0" -advertise-address = "0.0.0.0" -port = 4000 -cors = "" -store = "tikv" -path = "127.0.0.1:2379" -socket = "" -lease = "360s" -run-ddl = true -split-table = true -token-limit = 1000 -oom-use-tmp-storage = true -tmp-storage-path = "/var/folders/7g/f3yqsrhn5m75k7h_810s96_m0000gn/T/tidb/tmp-storage" -oom-action = "cancel" -mem-quota-query = 1073741824 -enable-streaming = false -enable-batch-dml = false -lower-case-table-names = 2 -server-version = "" -compatible-kill-query = false -check-mb4-value-in-utf8 = true -max-index-length = 3072 -alter-primary-key = false -treat-old-version-utf8-as-utf8mb4 = true -enable-table-lock = false -delay-clean-table-lock = 0 -split-region-max-num = 1000 -repair-mode = false -repair-table-list = [] -max-server-connections = 4096 -new_collations_enabled_on_first_bootstrap = false -enable-dynamic-config = true +# config of tidb -[txn-local-latches] - enabled = false - capacity = 2048000 - -[log] - level = "info" - format = "text" - disable-timestamp = "" - enable-timestamp = "" - disable-error-stack = "" - enable-error-stack = "" - enable-slow-log = true - slow-query-file = "tidb-slow.log" - slow-threshold = 300 - expensive-threshold = 10000 - query-log-max-len = 4096 - record-plan-in-slow-log = 1 - [log.file] - filename = "/tmp/backup_restore_test/tidb.log" - max-size = 300 - max-days = 0 - max-backups = 0 - -[security] - skip-grant-table = false - ssl-ca = "" - ssl-cert = "" - ssl-key = "" - require-secure-transport = false - cluster-ssl-ca = "" - cluster-ssl-cert = "" - cluster-ssl-key = "" - -[status] - status-host = "0.0.0.0" - metrics-addr = "" - status-port = 10080 - metrics-interval = 15 - report-status = true - record-db-qps = false - -[performance] - max-procs = 0 - max-memory = 0 - stats-lease = "3s" - stmt-count-limit = 5000 - feedback-probability = 0.05 - query-feedback-limit = 1024 - pseudo-estimate-ratio = 0.8 - force-priority = "NO_PRIORITY" - bind-info-lease = "3s" - txn-total-size-limit = 104857600 - tcp-keep-alive = true - cross-join = true - run-auto-analyze = true - -[prepared-plan-cache] - enabled = false - capacity = 100 - memory-guard-ratio = 0.1 - -[opentracing] - enable = false - rpc-metrics = false - [opentracing.sampler] - type = "const" - param = 1.0 - sampling-server-url = "" - max-operations = 0 - sampling-refresh-interval = 0 - [opentracing.reporter] - queue-size = 0 - buffer-flush-interval = 0 - log-spans = false - local-agent-host-port = "" - -[proxy-protocol] - networks = "" - header-timeout = 5 - -[tikv-client] - grpc-connection-count = 4 - grpc-keepalive-time = 10 - grpc-keepalive-timeout = 3 - commit-timeout = "41s" - max-batch-size = 128 - overload-threshold = 200 - max-batch-wait-time = 0 - batch-wait-size = 8 - enable-chunk-rpc = true - region-cache-ttl = 600 - store-limit = 0 - [tikv-client.copr-cache] - enabled = false - capacity-mb = 0.0 - admission-max-result-mb = 0.0 - admission-min-process-ms = 0 - -[binlog] - enable = false - ignore-error = false - write-timeout = "15s" - binlog-socket = "" - strategy = "range" - -[plugin] - dir = "" - load = "" - -[pessimistic-txn] - enable = true - max-retry-count = 256 - -[stmt-summary] - enable = true - max-stmt-count = 200 - max-sql-length = 4096 - refresh-interval = 1800 - history-size = 24 - -[isolation-read] - engines = ["tikv", "tiflash", "tidb"] - -[experimental] - allow-auto-random = false +# Schema lease duration +# There are lot of ddl in the tests, setting this +# to 360s to test whther BR is gracefully shutdown. +lease = "360s" \ No newline at end of file From 44d52be793c4c4277cbedf662b9ee5ee8e72bbc3 Mon Sep 17 00:00:00 2001 From: Hillium Date: Sat, 9 May 2020 10:36:44 +0800 Subject: [PATCH 15/52] task, restore: close updateCh until all task finish. --- pkg/restore/client.go | 5 ++++- pkg/task/restore.go | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/pkg/restore/client.go b/pkg/restore/client.go index a5487aca3..98a1f48b3 100644 --- a/pkg/restore/client.go +++ b/pkg/restore/client.go @@ -746,20 +746,24 @@ func (rc *Client) GoValidateChecksum( outCh <- struct{}{} close(outCh) }() + wg := new(sync.WaitGroup) for { select { case <-ctx.Done(): errCh <- ctx.Err() case tbl, ok := <-tableStream: if !ok { + wg.Wait() return } + wg.Add(1) workers.Apply(func() { err := rc.execChecksum(ctx, tbl, kvClient) if err != nil { errCh <- err } updateCh.Inc() + wg.Done() }) } } @@ -783,7 +787,6 @@ func (rc *Client) execChecksum(ctx context.Context, tbl CreatedTable, kvClient k }) if err != nil { return errors.Trace(err) - } table := tbl.OldTable diff --git a/pkg/task/restore.go b/pkg/task/restore.go index a9e875818..bcc1f449d 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -249,6 +249,7 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf // For now, we have to redirect one of them. updateCh = g.StartProgress( ctx, "Checksum", int64(len(tables)), true) + defer updateCh.Close() out := client.GoValidateChecksum( ctx, afterRestoreStream, mgr.GetTiKV().GetClient(), errCh, updateCh) select { @@ -262,7 +263,6 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf if err != nil { return err } - updateCh.Close() // Set task summary to success status. summary.SetSuccessStatus(true) From a49e1f03c5124772e7b24d7866055786fea5fb95 Mon Sep 17 00:00:00 2001 From: Hillium Date: Sat, 9 May 2020 12:27:07 +0800 Subject: [PATCH 16/52] task, restore: pipelined restore supports parition. --- pkg/restore/pipeline_items.go | 18 ++++++++++++++---- pkg/restore/util.go | 10 ++++++++++ pkg/task/restore.go | 4 ++++ 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index 7d7b0a1d4..8c3729cab 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -138,7 +138,9 @@ func (b *tikvSender) RestoreBatch(ranges []rtree.Range, rewriteRules *RewriteRul } func (b *tikvSender) Close() { - b.updateCh.Close() + // Instead of close update channel here, we close it when main function ends execute. + // Because when Close called, there may be some pending work at import worker. + // If possiable, it would be better to move close operation to sender side(e.g. func RestoreFiles). } // NewBatcher creates a new batcher by client and updateCh. @@ -220,9 +222,9 @@ func (b *Batcher) drainRanges() (ranges []rtree.Range, emptyTables []CreatedTabl return } - emptyTables = append(emptyTables, b.cachedTables[offset].CreatedTable) - // let 'drain' the ranges of current table. This op must not make the batch full. - ranges = append(ranges, b.cachedTables[offset].Range...) + emptyTables = append(emptyTables, thisTable.CreatedTable) + // let's 'drain' the ranges of current table. This op must not make the batch full. + ranges = append(ranges, thisTable.Range...) // clear the table length. b.cachedTables[offset].Range = []rtree.Range{} log.Debug("draining table to batch", @@ -242,6 +244,14 @@ func (b *Batcher) drainRanges() (ranges []rtree.Range, emptyTables []CreatedTabl // returns tables sent in the current batch. func (b *Batcher) Send() ([]CreatedTable, error) { ranges, tbs := b.drainRanges() + var tableNames []string + for _, t := range tbs { + tableNames = append(tableNames, fmt.Sprintf("%s.%s", t.OldTable.Db.Name, t.OldTable.Info.Name)) + } + log.Debug("do batch send", + zap.Strings("tables", tableNames), + zap.Int("ranges", len(ranges)), + ) if err := b.sender.RestoreBatch(ranges, b.rewriteRules, tbs); err != nil { return nil, err } diff --git a/pkg/restore/util.go b/pkg/restore/util.go index a2611bc09..c089661ed 100644 --- a/pkg/restore/util.go +++ b/pkg/restore/util.go @@ -202,6 +202,16 @@ func GoValidateFileRanges( return } files := fileOfTable[t.OldTable.Info.ID] + if partitions := t.OldTable.Info.Partition; partitions != nil { + log.Debug("table partition", + zap.Stringer("database", t.OldTable.Db.Name), + zap.Stringer("table", t.Table.Name), + zap.Any("partition info", partitions), + ) + for _, partition := range partitions.Definitions { + files = append(files, fileOfTable[partition.ID]...) + } + } ranges, err := ValidateFileRanges(files, t.RewriteRule) if err != nil { errCh <- err diff --git a/pkg/task/restore.go b/pkg/task/restore.go index bcc1f449d..62b2387c0 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -195,10 +195,13 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf } tableFileMap := restore.MapTableToFiles(files) + log.Debug("mapped table to files.", zap.Any("result map", tableFileMap)) + rangeStream := restore.GoValidateFileRanges(ctx, tableStream, tableFileMap, errCh) rangeSize := restore.EstimateRangeSize(files) summary.CollectInt("restore ranges", rangeSize) + log.Info("range and file prepared", zap.Int("file count", len(files)), zap.Int("range count", rangeSize)) // Redirect to log if there is no log file to avoid unreadable output. updateCh := g.StartProgress( @@ -207,6 +210,7 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf // Split/Scatter + Download/Ingest int64(restore.EstimateRangeSize(files)+len(files)), !cfg.LogProgress) + defer updateCh.Close() clusterCfg, err := restorePreWork(ctx, client, mgr) if err != nil { From 84e267cbf7d90f4d387e2aed0353f186e02f68f7 Mon Sep 17 00:00:00 2001 From: Hillium Date: Sat, 9 May 2020 16:12:43 +0800 Subject: [PATCH 17/52] backup: always wait worker to finish. --- pkg/restore/pipeline_items.go | 37 +++++++++++++++++++++++------------ pkg/task/restore.go | 1 - 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index 8c3729cab..19a9013a2 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -46,7 +46,9 @@ type Batcher struct { cachedTablesMu *sync.Mutex rewriteRules *RewriteRules - ctx context.Context + ctx context.Context + // joiner is for joining the background batch sender. + joiner chan<- struct{} sendErr chan<- error outCh chan<- CreatedTable sender BatchSender @@ -107,6 +109,12 @@ func NewTiKVSender(ctx context.Context, cli *Client, updateCh glue.Progress) (Ba } func (b *tikvSender) RestoreBatch(ranges []rtree.Range, rewriteRules *RewriteRules, tbs []CreatedTable) error { + var tableNames []string + for _, t := range tbs { + tableNames = append(tableNames, fmt.Sprintf("%s.%s", t.OldTable.Db.Name, t.OldTable.Info.Name)) + } + log.Debug("split region by tables start", zap.Strings("tables", tableNames)) + if err := SplitRanges(b.ctx, b.client, ranges, rewriteRules, b.updateCh); err != nil { log.Error("failed on split range", zap.Any("ranges", ranges), @@ -114,18 +122,13 @@ func (b *tikvSender) RestoreBatch(ranges []rtree.Range, rewriteRules *RewriteRul ) return err } + log.Debug("split region by tables end", zap.Strings("tables", tableNames)) files := []*backup.File{} for _, fs := range ranges { files = append(files, fs.Files...) } - var tableNames []string - for _, t := range tbs { - tableNames = append(tableNames, fmt.Sprintf("%s.%s", t.OldTable.Db.Name, t.OldTable.Info.Name)) - } - log.Debug("split range by tables done", zap.Strings("tables", tableNames)) - if err := b.client.RestoreFiles(files, rewriteRules, b.rejectStoreMap, b.updateCh); err != nil { return err } @@ -138,9 +141,7 @@ func (b *tikvSender) RestoreBatch(ranges []rtree.Range, rewriteRules *RewriteRul } func (b *tikvSender) Close() { - // Instead of close update channel here, we close it when main function ends execute. - // Because when Close called, there may be some pending work at import worker. - // If possiable, it would be better to move close operation to sender side(e.g. func RestoreFiles). + b.updateCh.Close() } // NewBatcher creates a new batcher by client and updateCh. @@ -152,24 +153,35 @@ func NewBatcher( errCh chan<- error, ) (*Batcher, <-chan CreatedTable) { output := make(chan CreatedTable, defaultBatcherOutputChannelSize) + joiner := make(chan struct{}) b := &Batcher{ rewriteRules: EmptyRewriteRule(), sendErr: errCh, outCh: output, sender: sender, ctx: ctx, + joiner: joiner, cachedTablesMu: new(sync.Mutex), BatchSizeThreshold: 1, } - go b.workLoop() + go b.workLoop(joiner) return b, output } -func (b *Batcher) workLoop() { +// joinWorker blocks the current goroutine until the worker can gracefully stop. +func (b *Batcher) joinWorker() { + log.Info("gracefully stoping worker goroutine") + b.joiner <- struct{}{} +} + +func (b *Batcher) workLoop(joiner <-chan struct{}) { tick := time.NewTicker(time.Second) defer tick.Stop() for { select { + case <-joiner: + log.Info("worker goroutine gracefully stoped") + return case <-b.ctx.Done(): b.sendErr <- b.ctx.Err() return @@ -291,6 +303,7 @@ func (b *Batcher) Close() { for b.Len() > 0 { b.asyncSend() } + b.joinWorker() close(b.outCh) b.sender.Close() } diff --git a/pkg/task/restore.go b/pkg/task/restore.go index 62b2387c0..a07566a90 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -210,7 +210,6 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf // Split/Scatter + Download/Ingest int64(restore.EstimateRangeSize(files)+len(files)), !cfg.LogProgress) - defer updateCh.Close() clusterCfg, err := restorePreWork(ctx, client, mgr) if err != nil { From 7fab3c32be0dac99ec8ff8ad99d662e7a1bedfa0 Mon Sep 17 00:00:00 2001 From: Hillium Date: Sat, 9 May 2020 17:12:50 +0800 Subject: [PATCH 18/52] backup, task: skip checksum when needed. --- pkg/restore/client.go | 8 ++++++ pkg/restore/pipeline_items.go | 3 +- pkg/task/restore.go | 53 +++++++++++++++++++++++++++++------ 3 files changed, 54 insertions(+), 10 deletions(-) diff --git a/pkg/restore/client.go b/pkg/restore/client.go index 98a1f48b3..c658c5910 100644 --- a/pkg/restore/client.go +++ b/pkg/restore/client.go @@ -772,6 +772,14 @@ func (rc *Client) GoValidateChecksum( } func (rc *Client) execChecksum(ctx context.Context, tbl CreatedTable, kvClient kv.Client) error { + if tbl.OldTable.NoChecksum() { + log.Warn("table has no checksum, skipping checksum.", + zap.Stringer("table", tbl.OldTable.Info.Name), + zap.Stringer("database", tbl.OldTable.Db.Name), + ) + return nil + } + startTS, err := rc.GetTS(ctx) if err != nil { return errors.Trace(err) diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index 19a9013a2..da850e890 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -172,6 +172,7 @@ func NewBatcher( func (b *Batcher) joinWorker() { log.Info("gracefully stoping worker goroutine") b.joiner <- struct{}{} + log.Info("gracefully stopped worker goroutine") } func (b *Batcher) workLoop(joiner <-chan struct{}) { @@ -180,7 +181,7 @@ func (b *Batcher) workLoop(joiner <-chan struct{}) { for { select { case <-joiner: - log.Info("worker goroutine gracefully stoped") + log.Debug("graceful stop signal received") return case <-b.ctx.Done(): b.sendErr <- b.ctx.Err() diff --git a/pkg/task/restore.go b/pkg/task/restore.go index a07566a90..444355816 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -246,19 +246,23 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf batcher.BatchSizeThreshold = batchSize goRestore(ctx, rangeStream, placementRules, client, batcher, errCh) + var finish <-chan struct{} // Checksum - // TODO: skip checksum when user specificated. - // TODO: allow checksum progress bar can appear together with each other. - // For now, we have to redirect one of them. - updateCh = g.StartProgress( - ctx, "Checksum", int64(len(tables)), true) - defer updateCh.Close() - out := client.GoValidateChecksum( - ctx, afterRestoreStream, mgr.GetTiKV().GetClient(), errCh, updateCh) + if cfg.Checksum { + updateCh = g.StartProgress( + ctx, "Checksum", int64(len(tables)), true) + defer updateCh.Close() + finish = client.GoValidateChecksum( + ctx, afterRestoreStream, mgr.GetTiKV().GetClient(), errCh, updateCh) + } else { + // when user skip checksum, just collect tables, and drop them. + finish = dropToBlockhole(ctx, afterRestoreStream, errCh) + } + select { case err = <-errCh: err = multierr.Append(err, multierr.Combine(restore.Exhasut(errCh)...)) - case <-out: + case <-finish: log.Info("all works end.") } @@ -272,6 +276,37 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf return nil } +// dropToBlockhole drop all incoming tables into black hole. +func dropToBlockhole( + ctx context.Context, + tableStream <-chan restore.CreatedTable, + errCh chan<- error, +) <-chan struct{} { + outCh := make(chan struct{}, 1) + go func() { + defer func() { + outCh <- struct{}{} + }() + for { + select { + case <-ctx.Done(): + errCh <- ctx.Err() + return + case tbl, ok := <-tableStream: + if !ok { + log.Info("all works end.") + return + } + log.Info("skipping checksum of table because user config", + zap.Stringer("database", tbl.OldTable.Db.Name), + zap.Stringer("table", tbl.Table.Name), + ) + } + } + }() + return outCh +} + func filterRestoreFiles( client *restore.Client, cfg *RestoreConfig, From ac6f5be229b7646754771078ceddc7455236f7c0 Mon Sep 17 00:00:00 2001 From: Hillium Date: Sat, 9 May 2020 17:35:44 +0800 Subject: [PATCH 19/52] *: make linter happy. --- pkg/restore/client.go | 8 ++++---- pkg/restore/pipeline_items.go | 14 +++++++------- pkg/task/restore.go | 25 +------------------------ 3 files changed, 12 insertions(+), 35 deletions(-) diff --git a/pkg/restore/client.go b/pkg/restore/client.go index 25959b0be..69cbae14c 100644 --- a/pkg/restore/client.go +++ b/pkg/restore/client.go @@ -419,7 +419,7 @@ func (rc *Client) GoCreateTables( } // RemoveTiFlashReplica removes all the tiflash replicas of a table -// TODO: remove this after tiflash supports restore +// TODO: remove this after tiflash supports restore. func (rc *Client) RemoveTiFlashReplica( tables []*utils.Table, newTables []*model.TableInfo, placementRules []placement.Rule) error { schemas := make([]*backup.Schema, 0, len(tables)) @@ -491,8 +491,8 @@ func (rc *Client) RemoveTiFlashOfTable(table CreatedTable, rule []placement.Rule return 0, nil } -// RecoverTiFlashOfTable recoveres TiFlash replica of some table. -// TODO: remove this after tiflash supports restore +// RecoverTiFlashOfTable recovers TiFlash replica of some table. +// TODO: remove this after tiflash supports restore. func (rc *Client) RecoverTiFlashOfTable(table *utils.Table) error { if table.TiFlashReplicas > 0 { err := rc.db.AlterTiflashReplica(rc.ctx, table, table.TiFlashReplicas) @@ -504,7 +504,7 @@ func (rc *Client) RecoverTiFlashOfTable(table *utils.Table) error { } // RecoverTiFlashReplica recovers all the tiflash replicas of a table -// TODO: remove this after tiflash supports restore +// TODO: remove this after tiflash supports restore. func (rc *Client) RecoverTiFlashReplica(tables []*utils.Table) error { for _, table := range tables { if err := rc.RecoverTiFlashOfTable(table); err != nil { diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index da850e890..268d55a1a 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -40,7 +40,7 @@ type TableWithRange struct { Range []rtree.Range } -// Batcher collectes ranges to restore and send batching split/ingest request. +// Batcher collects ranges to restore and send batching split/ingest request. type Batcher struct { cachedTables []TableWithRange cachedTablesMu *sync.Mutex @@ -56,8 +56,8 @@ type Batcher struct { size int32 } -// Exhasut drains all remaining errors in the channel, into a slice of errors. -func Exhasut(ec <-chan error) []error { +// Exhaust drains all remaining errors in the channel, into a slice of errors. +func Exhaust(ec <-chan error) []error { out := make([]error, 0, len(ec)) select { case err := <-ec: @@ -109,7 +109,7 @@ func NewTiKVSender(ctx context.Context, cli *Client, updateCh glue.Progress) (Ba } func (b *tikvSender) RestoreBatch(ranges []rtree.Range, rewriteRules *RewriteRules, tbs []CreatedTable) error { - var tableNames []string + tableNames := make([]string, 0, len(tbs)) for _, t := range tbs { tableNames = append(tableNames, fmt.Sprintf("%s.%s", t.OldTable.Db.Name, t.OldTable.Info.Name)) } @@ -232,7 +232,7 @@ func (b *Batcher) drainRanges() (ranges []rtree.Range, emptyTables []CreatedTabl ranges = append(ranges, drained...) b.cachedTables = b.cachedTables[offset:] atomic.AddInt32(&b.size, -int32(len(ranges))) - return + return ranges, emptyTables } emptyTables = append(emptyTables, thisTable.CreatedTable) @@ -250,14 +250,14 @@ func (b *Batcher) drainRanges() (ranges []rtree.Range, emptyTables []CreatedTabl // all tables are drained. b.cachedTables = []TableWithRange{} atomic.AddInt32(&b.size, -int32(len(ranges))) - return + return ranges, emptyTables } // Send sends all pending requests in the batcher. // returns tables sent in the current batch. func (b *Batcher) Send() ([]CreatedTable, error) { ranges, tbs := b.drainRanges() - var tableNames []string + tableNames := make([]string, 0, len(tbs)) for _, t := range tbs { tableNames = append(tableNames, fmt.Sprintf("%s.%s", t.OldTable.Db.Name, t.OldTable.Info.Name)) } diff --git a/pkg/task/restore.go b/pkg/task/restore.go index 575bcf6ea..029ed296e 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -20,7 +20,6 @@ import ( "github.com/pingcap/br/pkg/conn" "github.com/pingcap/br/pkg/glue" "github.com/pingcap/br/pkg/restore" - "github.com/pingcap/br/pkg/rtree" "github.com/pingcap/br/pkg/storage" "github.com/pingcap/br/pkg/summary" "github.com/pingcap/br/pkg/utils" @@ -261,7 +260,7 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf select { case err = <-errCh: - err = multierr.Append(err, multierr.Combine(restore.Exhasut(errCh)...)) + err = multierr.Append(err, multierr.Combine(restore.Exhaust(errCh)...)) case <-finish: log.Info("all works end.") } @@ -579,28 +578,6 @@ func enableTiDBConfig() { config.StoreGlobalConfig(conf) } -// collectRestoreResults collectes result of pipelined restore process, -// block the current goroutine, until all the tasks finished. -// TODO: remove this function when all the link is pipelined. -func collectRestoreResults( - ctx context.Context, - ch <-chan restore.TableWithRange, - errCh <-chan error, -) (newTables []*model.TableInfo, ranges []rtree.Range, rewriteRules *restore.RewriteRules, err error) { - rewriteRules = restore.EmptyRewriteRule() - for ct := range ch { - newTables = append(newTables, ct.Table) - ranges = append(ranges, ct.Range...) - rewriteRules.Table = append(rewriteRules.Table, ct.RewriteRule.Table...) - rewriteRules.Data = append(rewriteRules.Data, ct.RewriteRule.Data...) - } - select { - case err = <-errCh: - default: - } - return -} - // goRestore forks a goroutine to do the restore process. func goRestore( ctx context.Context, From 25a9a3acfbcc3930f3025858a9e7c77a614d3d3d Mon Sep 17 00:00:00 2001 From: Hillium Date: Mon, 11 May 2020 16:48:34 +0800 Subject: [PATCH 20/52] restore: move batcher test to restore_test package. --- pkg/restore/batcher_test.go | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/pkg/restore/batcher_test.go b/pkg/restore/batcher_test.go index 14953246a..6c17efd19 100644 --- a/pkg/restore/batcher_test.go +++ b/pkg/restore/batcher_test.go @@ -1,11 +1,13 @@ // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. -package restore +package restore_test import ( "context" "time" + "github.com/pingcap/br/pkg/restore" + . "github.com/pingcap/check" "github.com/pingcap/errors" "github.com/pingcap/log" @@ -19,12 +21,16 @@ import ( type testBatcherSuite struct{} type drySender struct { - tbls chan CreatedTable + tbls chan restore.CreatedTable ranges chan rtree.Range nBatch int } -func (d *drySender) RestoreBatch(ranges []rtree.Range, rewriteRules *RewriteRules, tbs []CreatedTable) error { +func (d *drySender) RestoreBatch( + ranges []rtree.Range, + rewriteRules *restore.RewriteRules, + tbs []restore.CreatedTable, +) error { d.nBatch++ for _, tbl := range tbs { log.Info("dry restore", zap.Int64("table ID", tbl.Table.ID)) @@ -41,7 +47,7 @@ func (d *drySender) Close() { close(d.ranges) } -func (d *drySender) exhaust() (tbls []CreatedTable, rngs []rtree.Range) { +func (d *drySender) exhaust() (tbls []restore.CreatedTable, rngs []rtree.Range) { for tbl := range d.tbls { tbls = append(tbls, tbl) } @@ -53,7 +59,7 @@ func (d *drySender) exhaust() (tbls []CreatedTable, rngs []rtree.Range) { func newDrySender() *drySender { return &drySender{ - tbls: make(chan CreatedTable, 4096), + tbls: make(chan restore.CreatedTable, 4096), ranges: make(chan rtree.Range, 4096), } } @@ -74,16 +80,16 @@ var ( _ = Suite(&testBatcherSuite{}) ) -func fakeTableWithRange(id int64, rngs []rtree.Range) TableWithRange { +func fakeTableWithRange(id int64, rngs []rtree.Range) restore.TableWithRange { tbl := &utils.Table{ Db: &model.DBInfo{}, Info: &model.TableInfo{ ID: id, }, } - tblWithRng := TableWithRange{ - CreatedTable: CreatedTable{ - RewriteRule: EmptyRewriteRule(), + tblWithRng := restore.TableWithRange{ + CreatedTable: restore.CreatedTable{ + RewriteRule: restore.EmptyRewriteRule(), Table: tbl.Info, OldTable: tbl, }, @@ -103,10 +109,10 @@ func fakeRange(startKey, endKey string) rtree.Range { func (*testBatcherSuite) TestBasic(c *C) { errCh := make(chan error, 8) sender := newDrySender() - batcher, _ := NewBatcher(context.Background(), sender, errCh) + batcher, _ := restore.NewBatcher(context.Background(), sender, errCh) batcher.BatchSizeThreshold = 2 - simpleTables := []TableWithRange{ + simpleTables := []restore.TableWithRange{ fakeTableWithRange(1, []rtree.Range{fakeRange("aaa", "aab")}), fakeTableWithRange(2, []rtree.Range{fakeRange("baa", "bab"), fakeRange("bac", "bad")}), fakeTableWithRange(3, []rtree.Range{fakeRange("caa", "cab"), fakeRange("cac", "cad")}), @@ -137,7 +143,7 @@ func (*testBatcherSuite) TestBasic(c *C) { func (*testBatcherSuite) TestAutoSend(c *C) { errCh := make(chan error, 8) sender := newDrySender() - batcher, _ := NewBatcher(context.Background(), sender, errCh) + batcher, _ := restore.NewBatcher(context.Background(), sender, errCh) batcher.BatchSizeThreshold = 1024 simpleTable := fakeTableWithRange(1, []rtree.Range{fakeRange("caa", "cab"), fakeRange("cac", "cad")}) @@ -165,7 +171,7 @@ func (*testBatcherSuite) TestAutoSend(c *C) { func (*testBatcherSuite) TestSplitRangeOnSameTable(c *C) { errCh := make(chan error, 8) sender := newDrySender() - batcher, _ := NewBatcher(context.Background(), sender, errCh) + batcher, _ := restore.NewBatcher(context.Background(), sender, errCh) batcher.BatchSizeThreshold = 2 simpleTable := fakeTableWithRange(1, []rtree.Range{ @@ -193,7 +199,7 @@ func (*testBatcherSuite) TestSplitRangeOnSameTable(c *C) { func (*testBatcherSuite) TestBatcherLen(c *C) { errCh := make(chan error, 8) sender := newDrySender() - batcher, _ := NewBatcher(context.Background(), sender, errCh) + batcher, _ := restore.NewBatcher(context.Background(), sender, errCh) batcher.BatchSizeThreshold = 1024 simpleTable := fakeTableWithRange(1, []rtree.Range{ From 7f8251e8bf8eebd592bab6e470d1413247a66bd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Tue, 12 May 2020 10:40:32 +0800 Subject: [PATCH 21/52] Apply suggestions from code review Co-authored-by: kennytm --- pkg/restore/pipeline_items.go | 6 +++--- pkg/restore/util.go | 8 +++----- pkg/task/restore.go | 5 ++--- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index 268d55a1a..c74aed8e1 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -25,8 +25,8 @@ const ( defaultBatcherOutputChannelSize = 1024 ) -// CreatedTable is a table is created on restore process, -// but not yet filled by data. +// CreatedTable is a table created on restore process, +// but not yet filled with data. type CreatedTable struct { RewriteRule *RewriteRules Table *model.TableInfo @@ -279,7 +279,7 @@ func (b *Batcher) sendIfFull() { } } -// Add addes a task to batcher. +// Add adds a task to the Batcher. func (b *Batcher) Add(tbs TableWithRange) { b.cachedTablesMu.Lock() log.Debug("adding table to batch", diff --git a/pkg/restore/util.go b/pkg/restore/util.go index 6eb13cab2..06a0527c0 100644 --- a/pkg/restore/util.go +++ b/pkg/restore/util.go @@ -124,11 +124,9 @@ func getSSTMetaFromFile( // EstimateRangeSize estimates the total range count by file. func EstimateRangeSize(files []*backup.File) int { result := 0 - visitedSet := map[string]bool{} for _, f := range files { - if !visitedSet[f.GetName()] && strings.Contains(f.GetName(), "write") { + if strings.HasSuffix(f.GetName(), "_write.sst") { result++ - visitedSet[f.GetName()] = true } } return result @@ -242,11 +240,11 @@ func validateAndGetFileRange(file *backup.File, rules *RewriteRules) (rtree.Rang startID := tablecodec.DecodeTableID(file.GetStartKey()) endID := tablecodec.DecodeTableID(file.GetEndKey()) if startID != endID { - log.Error("table ids dont match", + log.Error("table ids mismatch", zap.Int64("startID", startID), zap.Int64("endID", endID), zap.Stringer("file", file)) - return rtree.Range{}, errors.New("table ids dont match") + return rtree.Range{}, errors.New("table ids mismatch") } r := rtree.Range{StartKey: file.GetStartKey(), EndKey: file.GetEndKey()} return r, nil diff --git a/pkg/task/restore.go b/pkg/task/restore.go index 029ed296e..cab36cfdc 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -194,7 +194,7 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf } tableFileMap := restore.MapTableToFiles(files) - log.Debug("mapped table to files.", zap.Any("result map", tableFileMap)) + log.Debug("mapped table to files", zap.Any("result map", tableFileMap)) rangeStream := restore.GoValidateFileRanges(ctx, tableStream, tableFileMap, errCh) @@ -276,7 +276,7 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf } // dropToBlockhole drop all incoming tables into black hole. -func dropToBlockhole( +func dropToBlackhole( ctx context.Context, tableStream <-chan restore.CreatedTable, errCh chan<- error, @@ -293,7 +293,6 @@ func dropToBlockhole( return case tbl, ok := <-tableStream: if !ok { - log.Info("all works end.") return } log.Info("skipping checksum of table because user config", From 4bf507e02c2e6ccaa0435429d8f7fc29ae1ef00a Mon Sep 17 00:00:00 2001 From: Hillium Date: Tue, 12 May 2020 12:12:21 +0800 Subject: [PATCH 22/52] restore, task: remove context on struct types. --- pkg/restore/batcher_test.go | 20 +++++++------- pkg/restore/pipeline_items.go | 50 +++++++++++++++++++---------------- pkg/task/restore.go | 4 +-- tests/config/tidb.toml | 2 +- 4 files changed, 40 insertions(+), 36 deletions(-) diff --git a/pkg/restore/batcher_test.go b/pkg/restore/batcher_test.go index 6c17efd19..84079c57d 100644 --- a/pkg/restore/batcher_test.go +++ b/pkg/restore/batcher_test.go @@ -27,8 +27,8 @@ type drySender struct { } func (d *drySender) RestoreBatch( + _ctx context.Context, ranges []rtree.Range, - rewriteRules *restore.RewriteRules, tbs []restore.CreatedTable, ) error { d.nBatch++ @@ -119,10 +119,10 @@ func (*testBatcherSuite) TestBasic(c *C) { } for _, tbl := range simpleTables { - batcher.Add(tbl) + batcher.Add(context.TODO(), tbl) } - batcher.Close() + batcher.Close(context.TODO()) tbls, rngs := sender.exhaust() totalRngs := []rtree.Range{} @@ -143,19 +143,19 @@ func (*testBatcherSuite) TestBasic(c *C) { func (*testBatcherSuite) TestAutoSend(c *C) { errCh := make(chan error, 8) sender := newDrySender() - batcher, _ := restore.NewBatcher(context.Background(), sender, errCh) + batcher, _ := restore.NewBatcher(context.TODO(), sender, errCh) batcher.BatchSizeThreshold = 1024 simpleTable := fakeTableWithRange(1, []rtree.Range{fakeRange("caa", "cab"), fakeRange("cac", "cad")}) - batcher.Add(simpleTable) + batcher.Add(context.TODO(), simpleTable) // wait until auto send. time.Sleep(1300 * time.Millisecond) c.Assert(sender.RangeLen(), Greater, 0) c.Assert(sender.TableLen(), Greater, 0) c.Assert(batcher.Len(), Equals, 0) - batcher.Close() + batcher.Close(context.TODO()) tbls, rngs := sender.exhaust() c.Assert(len(tbls), Greater, 0) @@ -180,10 +180,10 @@ func (*testBatcherSuite) TestSplitRangeOnSameTable(c *C) { fakeRange("caj", "cak"), fakeRange("cal", "cam"), fakeRange("can", "cao"), fakeRange("cap", "caq")}) - batcher.Add(simpleTable) + batcher.Add(context.TODO(), simpleTable) c.Assert(sender.BatchCount(), Equals, 4) - batcher.Close() + batcher.Close(context.TODO()) tbls, rngs := sender.exhaust() c.Assert(len(tbls), Greater, 0) @@ -208,9 +208,9 @@ func (*testBatcherSuite) TestBatcherLen(c *C) { fakeRange("caj", "cak"), fakeRange("cal", "cam"), fakeRange("can", "cao"), fakeRange("cap", "caq")}) - batcher.Add(simpleTable) + batcher.Add(context.TODO(), simpleTable) c.Assert(batcher.Len(), Equals, 8) - batcher.Close() + batcher.Close(context.TODO()) c.Assert(batcher.Len(), Equals, 0) select { diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index c74aed8e1..2d16dbef4 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -46,7 +46,6 @@ type Batcher struct { cachedTablesMu *sync.Mutex rewriteRules *RewriteRules - ctx context.Context // joiner is for joining the background batch sender. joiner chan<- struct{} sendErr chan<- error @@ -59,12 +58,16 @@ type Batcher struct { // Exhaust drains all remaining errors in the channel, into a slice of errors. func Exhaust(ec <-chan error) []error { out := make([]error, 0, len(ec)) - select { - case err := <-ec: - out = append(out, err) - default: + for { + select { + case err := <-ec: + out = append(out, err) + default: + // errCh will *never* closed(ya see, it has multi send-part), + // so we just consume the current backlog of this cannel, then return. + return out + } } - return out } // Len calculate the current size of this batcher. @@ -75,7 +78,7 @@ func (b *Batcher) Len() int { // BatchSender is the abstract of how the batcher send a batch. type BatchSender interface { // RestoreBatch will backup all ranges and tables - RestoreBatch(ranges []rtree.Range, rewriteRules *RewriteRules, tbs []CreatedTable) error + RestoreBatch(ctx context.Context, ranges []rtree.Range, tbs []CreatedTable) error Close() } @@ -108,9 +111,11 @@ func NewTiKVSender(ctx context.Context, cli *Client, updateCh glue.Progress) (Ba }, nil } -func (b *tikvSender) RestoreBatch(ranges []rtree.Range, rewriteRules *RewriteRules, tbs []CreatedTable) error { +func (b *tikvSender) RestoreBatch(ctx context.Context, ranges []rtree.Range, tbs []CreatedTable) error { + rewriteRules := EmptyRewriteRule() tableNames := make([]string, 0, len(tbs)) for _, t := range tbs { + rewriteRules.Append(*t.RewriteRule) tableNames = append(tableNames, fmt.Sprintf("%s.%s", t.OldTable.Db.Name, t.OldTable.Info.Name)) } log.Debug("split region by tables start", zap.Strings("tables", tableNames)) @@ -159,12 +164,11 @@ func NewBatcher( sendErr: errCh, outCh: output, sender: sender, - ctx: ctx, joiner: joiner, cachedTablesMu: new(sync.Mutex), BatchSizeThreshold: 1, } - go b.workLoop(joiner) + go b.workLoop(ctx, joiner) return b, output } @@ -175,7 +179,7 @@ func (b *Batcher) joinWorker() { log.Info("gracefully stopped worker goroutine") } -func (b *Batcher) workLoop(joiner <-chan struct{}) { +func (b *Batcher) workLoop(ctx context.Context, joiner <-chan struct{}) { tick := time.NewTicker(time.Second) defer tick.Stop() for { @@ -183,20 +187,20 @@ func (b *Batcher) workLoop(joiner <-chan struct{}) { case <-joiner: log.Debug("graceful stop signal received") return - case <-b.ctx.Done(): - b.sendErr <- b.ctx.Err() + case <-ctx.Done(): + b.sendErr <- ctx.Err() return case <-tick.C: if b.Len() > 0 { log.Info("sending batch because time limit exceed", zap.Int("size", b.Len())) - b.asyncSend() + b.asyncSend(ctx) } } } } -func (b *Batcher) asyncSend() { - tbls, err := b.Send() +func (b *Batcher) asyncSend(ctx context.Context) { + tbls, err := b.Send(ctx) if err != nil { b.sendErr <- err return @@ -255,7 +259,7 @@ func (b *Batcher) drainRanges() (ranges []rtree.Range, emptyTables []CreatedTabl // Send sends all pending requests in the batcher. // returns tables sent in the current batch. -func (b *Batcher) Send() ([]CreatedTable, error) { +func (b *Batcher) Send(ctx context.Context) ([]CreatedTable, error) { ranges, tbs := b.drainRanges() tableNames := make([]string, 0, len(tbs)) for _, t := range tbs { @@ -265,17 +269,17 @@ func (b *Batcher) Send() ([]CreatedTable, error) { zap.Strings("tables", tableNames), zap.Int("ranges", len(ranges)), ) - if err := b.sender.RestoreBatch(ranges, b.rewriteRules, tbs); err != nil { + if err := b.sender.RestoreBatch(ctx, ranges, tbs); err != nil { return nil, err } return tbs, nil } -func (b *Batcher) sendIfFull() { +func (b *Batcher) sendIfFull(ctx context.Context) { // never collect the send batch request message. for b.Len() >= b.BatchSizeThreshold { log.Info("sending batch because batcher is full", zap.Int("size", b.Len())) - b.asyncSend() + b.asyncSend(ctx) } } @@ -295,14 +299,14 @@ func (b *Batcher) Add(tbs TableWithRange) { atomic.AddInt32(&b.size, int32(len(tbs.Range))) b.cachedTablesMu.Unlock() - b.sendIfFull() + b.sendIfFull(ctx) } // Close closes the batcher, sending all pending requests, close updateCh. -func (b *Batcher) Close() { +func (b *Batcher) Close(ctx context.Context) { log.Info("sending batch lastly on close.", zap.Int("size", b.Len())) for b.Len() > 0 { - b.asyncSend() + b.asyncSend(ctx) } b.joinWorker() close(b.outCh) diff --git a/pkg/task/restore.go b/pkg/task/restore.go index cab36cfdc..01ca79a29 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -592,7 +592,7 @@ func goRestore( newTables := []*model.TableInfo{} defer func() { // when things done, we must clean pending requests. - batcher.Close() + batcher.Close(ctx) log.Info("doing postwork", zap.Int("new tables", len(newTables)), zap.Int("old tables", len(oldTables)), @@ -630,7 +630,7 @@ func goRestore( } newTables = append(newTables, t.Table) - batcher.Add(t) + batcher.Add(ctx, t) } } }() diff --git a/tests/config/tidb.toml b/tests/config/tidb.toml index b4a2289f1..301aabd94 100644 --- a/tests/config/tidb.toml +++ b/tests/config/tidb.toml @@ -3,4 +3,4 @@ # Schema lease duration # There are lot of ddl in the tests, setting this # to 360s to test whther BR is gracefully shutdown. -lease = "360s" \ No newline at end of file +lease = "360s" From c45c772fda528945102d24230b6c4dba70837e2d Mon Sep 17 00:00:00 2001 From: Hillium Date: Tue, 12 May 2020 12:48:33 +0800 Subject: [PATCH 23/52] restore: batcher auto commit can be disabled now. --- pkg/restore/batcher_test.go | 32 ++++++++++++------ pkg/restore/pipeline_items.go | 62 +++++++++++++++++++++++++---------- pkg/task/restore.go | 6 ++-- 3 files changed, 70 insertions(+), 30 deletions(-) diff --git a/pkg/restore/batcher_test.go b/pkg/restore/batcher_test.go index 84079c57d..ff979cbef 100644 --- a/pkg/restore/batcher_test.go +++ b/pkg/restore/batcher_test.go @@ -109,8 +109,8 @@ func fakeRange(startKey, endKey string) rtree.Range { func (*testBatcherSuite) TestBasic(c *C) { errCh := make(chan error, 8) sender := newDrySender() - batcher, _ := restore.NewBatcher(context.Background(), sender, errCh) - batcher.BatchSizeThreshold = 2 + batcher, _ := restore.NewBatcher(sender, errCh) + batcher.SetThreshold(2) simpleTables := []restore.TableWithRange{ fakeTableWithRange(1, []rtree.Range{fakeRange("aaa", "aab")}), @@ -143,14 +143,18 @@ func (*testBatcherSuite) TestBasic(c *C) { func (*testBatcherSuite) TestAutoSend(c *C) { errCh := make(chan error, 8) sender := newDrySender() - batcher, _ := restore.NewBatcher(context.TODO(), sender, errCh) - batcher.BatchSizeThreshold = 1024 + batcher, _ := restore.NewBatcher(sender, errCh) + batcher.SetThreshold(1024) simpleTable := fakeTableWithRange(1, []rtree.Range{fakeRange("caa", "cab"), fakeRange("cac", "cad")}) batcher.Add(context.TODO(), simpleTable) - // wait until auto send. - time.Sleep(1300 * time.Millisecond) + c.Assert(batcher.Len(), Greater, 0) + + // enable auto commit. + batcher.EnableAutoCommit(context.TODO(), 100*time.Millisecond) + time.Sleep(120 * time.Millisecond) + c.Assert(sender.RangeLen(), Greater, 0) c.Assert(sender.TableLen(), Greater, 0) c.Assert(batcher.Len(), Equals, 0) @@ -171,8 +175,8 @@ func (*testBatcherSuite) TestAutoSend(c *C) { func (*testBatcherSuite) TestSplitRangeOnSameTable(c *C) { errCh := make(chan error, 8) sender := newDrySender() - batcher, _ := restore.NewBatcher(context.Background(), sender, errCh) - batcher.BatchSizeThreshold = 2 + batcher, _ := restore.NewBatcher(sender, errCh) + batcher.SetThreshold(2) simpleTable := fakeTableWithRange(1, []rtree.Range{ fakeRange("caa", "cab"), fakeRange("cac", "cad"), @@ -199,8 +203,8 @@ func (*testBatcherSuite) TestSplitRangeOnSameTable(c *C) { func (*testBatcherSuite) TestBatcherLen(c *C) { errCh := make(chan error, 8) sender := newDrySender() - batcher, _ := restore.NewBatcher(context.Background(), sender, errCh) - batcher.BatchSizeThreshold = 1024 + batcher, _ := restore.NewBatcher(sender, errCh) + batcher.SetThreshold(15) simpleTable := fakeTableWithRange(1, []rtree.Range{ fakeRange("caa", "cab"), fakeRange("cac", "cad"), @@ -208,8 +212,16 @@ func (*testBatcherSuite) TestBatcherLen(c *C) { fakeRange("caj", "cak"), fakeRange("cal", "cam"), fakeRange("can", "cao"), fakeRange("cap", "caq")}) + simpleTable2 := fakeTableWithRange(2, []rtree.Range{ + fakeRange("caa", "cab"), fakeRange("cac", "cad"), + fakeRange("cae", "caf"), fakeRange("cag", "cai"), + fakeRange("caj", "cak"), fakeRange("cal", "cam"), + fakeRange("can", "cao"), fakeRange("cap", "caq")}) + batcher.Add(context.TODO(), simpleTable) c.Assert(batcher.Len(), Equals, 8) + batcher.Add(context.TODO(), simpleTable2) + c.Assert(batcher.Len(), Equals, 1) batcher.Close(context.TODO()) c.Assert(batcher.Len(), Equals, 0) diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index 2d16dbef4..cd05f12c0 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -51,10 +51,17 @@ type Batcher struct { sendErr chan<- error outCh chan<- CreatedTable sender BatchSender - BatchSizeThreshold int + batchSizeThreshold int size int32 } +// SetThreshold sets the threshold that how big the batch size reaching need to send batch. +// note this function isn't goroutine safe yet, +// just set threshold before anything starts(e.g. EnableAutoCommit), please. +func (b *Batcher) SetThreshold(newThreshold int) { + b.batchSizeThreshold = newThreshold +} + // Exhaust drains all remaining errors in the channel, into a slice of errors. func Exhaust(ec <-chan error) []error { out := make([]error, 0, len(ec)) @@ -63,7 +70,7 @@ func Exhaust(ec <-chan error) []error { case err := <-ec: out = append(out, err) default: - // errCh will *never* closed(ya see, it has multi send-part), + // errCh will *never* closed(ya see, it has multi sender-part), // so we just consume the current backlog of this cannel, then return. return out } @@ -153,34 +160,52 @@ func (b *tikvSender) Close() { // this batcher will work background, send batches per second, or batch size reaches limit. // and it will emit full-restored tables to the output channel returned. func NewBatcher( - ctx context.Context, sender BatchSender, errCh chan<- error, ) (*Batcher, <-chan CreatedTable) { output := make(chan CreatedTable, defaultBatcherOutputChannelSize) - joiner := make(chan struct{}) b := &Batcher{ rewriteRules: EmptyRewriteRule(), sendErr: errCh, outCh: output, sender: sender, - joiner: joiner, cachedTablesMu: new(sync.Mutex), - BatchSizeThreshold: 1, + batchSizeThreshold: 1, } - go b.workLoop(ctx, joiner) return b, output } +// EnableAutoCommit enables the batcher commit batch periodicity even batcher size isn't big enough. +// we make this function for disable AutoCommit in some case. +func (b *Batcher) EnableAutoCommit(ctx context.Context, delay time.Duration) { + if b.joiner != nil { + log.Warn("enable auto commit on a batcher that is enabled auto commit, nothing will happen") + log.Info("if desire, please disable auto commit firstly") + } + joiner := make(chan struct{}) + go b.workLoop(ctx, joiner, delay) + b.joiner = joiner +} + +// DisableAutoCommit blocks the current goroutine until the worker can gracefully stop, +// and then disable auto commit. +func (b *Batcher) DisableAutoCommit(ctx context.Context) { + b.joinWorker() + b.joiner = nil +} + // joinWorker blocks the current goroutine until the worker can gracefully stop. +// return immediately when auto commit disabled. func (b *Batcher) joinWorker() { - log.Info("gracefully stoping worker goroutine") - b.joiner <- struct{}{} - log.Info("gracefully stopped worker goroutine") + if b.joiner != nil { + log.Info("gracefully stoping worker goroutine") + b.joiner <- struct{}{} + log.Info("gracefully stopped worker goroutine") + } } -func (b *Batcher) workLoop(ctx context.Context, joiner <-chan struct{}) { - tick := time.NewTicker(time.Second) +func (b *Batcher) workLoop(ctx context.Context, joiner <-chan struct{}, delay time.Duration) { + tick := time.NewTicker(delay) defer tick.Stop() for { select { @@ -221,8 +246,8 @@ func (b *Batcher) drainRanges() (ranges []rtree.Range, emptyTables []CreatedTabl // the batch is full, we should stop here! // we use strictly greater than because when we send a batch at equal, the offset should plus one. // (because the last table is sent, we should put it in emptyTables), and this will intrduce extra complex. - if thisTableLen+collected > b.BatchSizeThreshold { - drainSize := b.BatchSizeThreshold - collected + if thisTableLen+collected > b.batchSizeThreshold { + drainSize := b.batchSizeThreshold - collected thisTableRanges := thisTable.Range var drained []rtree.Range @@ -235,13 +260,15 @@ func (b *Batcher) drainRanges() (ranges []rtree.Range, emptyTables []CreatedTabl ) ranges = append(ranges, drained...) b.cachedTables = b.cachedTables[offset:] - atomic.AddInt32(&b.size, -int32(len(ranges))) + atomic.AddInt32(&b.size, -int32(len(drained))) return ranges, emptyTables } emptyTables = append(emptyTables, thisTable.CreatedTable) // let's 'drain' the ranges of current table. This op must not make the batch full. ranges = append(ranges, thisTable.Range...) + // let's reduce the batcher size each time, to make a consitance view of + atomic.AddInt32(&b.size, -int32(len(thisTable.Range))) // clear the table length. b.cachedTables[offset].Range = []rtree.Range{} log.Debug("draining table to batch", @@ -253,7 +280,6 @@ func (b *Batcher) drainRanges() (ranges []rtree.Range, emptyTables []CreatedTabl // all tables are drained. b.cachedTables = []TableWithRange{} - atomic.AddInt32(&b.size, -int32(len(ranges))) return ranges, emptyTables } @@ -277,7 +303,7 @@ func (b *Batcher) Send(ctx context.Context) ([]CreatedTable, error) { func (b *Batcher) sendIfFull(ctx context.Context) { // never collect the send batch request message. - for b.Len() >= b.BatchSizeThreshold { + for b.Len() >= b.batchSizeThreshold { log.Info("sending batch because batcher is full", zap.Int("size", b.Len())) b.asyncSend(ctx) } @@ -308,7 +334,7 @@ func (b *Batcher) Close(ctx context.Context) { for b.Len() > 0 { b.asyncSend(ctx) } - b.joinWorker() + b.DisableAutoCommit(ctx) close(b.outCh) b.sender.Close() } diff --git a/pkg/task/restore.go b/pkg/task/restore.go index 01ca79a29..812894a07 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -5,6 +5,7 @@ package task import ( "context" "math" + "time" "github.com/pingcap/errors" "github.com/pingcap/kvproto/pkg/backup" @@ -241,8 +242,9 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf if err != nil { return err } - batcher, afterRestoreStream := restore.NewBatcher(ctx, sender, errCh) - batcher.BatchSizeThreshold = batchSize + batcher, afterRestoreStream := restore.NewBatcher(sender, errCh) + batcher.SetThreshold(batchSize) + batcher.EnableAutoCommit(ctx, time.Second) goRestore(ctx, rangeStream, placementRules, client, batcher, errCh) var finish <-chan struct{} From afea1255599fb2b7ae12670836076221bee9a52b Mon Sep 17 00:00:00 2001 From: Hillium Date: Tue, 12 May 2020 13:15:33 +0800 Subject: [PATCH 24/52] restore, task: fix typos. --- pkg/restore/client.go | 2 +- pkg/restore/pipeline_items.go | 6 ++---- pkg/restore/util_test.go | 2 +- pkg/task/restore.go | 3 +-- 4 files changed, 5 insertions(+), 8 deletions(-) diff --git a/pkg/restore/client.go b/pkg/restore/client.go index 69cbae14c..bca1fe67f 100644 --- a/pkg/restore/client.go +++ b/pkg/restore/client.go @@ -773,7 +773,7 @@ func (rc *Client) GoValidateChecksum( func (rc *Client) execChecksum(ctx context.Context, tbl CreatedTable, kvClient kv.Client) error { if tbl.OldTable.NoChecksum() { - log.Warn("table has no checksum, skipping checksum.", + log.Warn("table has no checksum, skipping checksum", zap.Stringer("table", tbl.OldTable.Info.Name), zap.Stringer("database", tbl.OldTable.Db.Name), ) diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index cd05f12c0..18cd9ccd3 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -92,7 +92,6 @@ type BatchSender interface { type tikvSender struct { client *Client updateCh glue.Progress - ctx context.Context rejectStoreMap map[uint64]bool } @@ -113,7 +112,6 @@ func NewTiKVSender(ctx context.Context, cli *Client, updateCh glue.Progress) (Ba return &tikvSender{ client: cli, updateCh: updateCh, - ctx: ctx, rejectStoreMap: rejectStoreMap, }, nil } @@ -127,7 +125,7 @@ func (b *tikvSender) RestoreBatch(ctx context.Context, ranges []rtree.Range, tbs } log.Debug("split region by tables start", zap.Strings("tables", tableNames)) - if err := SplitRanges(b.ctx, b.client, ranges, rewriteRules, b.updateCh); err != nil { + if err := SplitRanges(ctx, b.client, ranges, rewriteRules, b.updateCh); err != nil { log.Error("failed on split range", zap.Any("ranges", ranges), zap.Error(err), @@ -310,7 +308,7 @@ func (b *Batcher) sendIfFull(ctx context.Context) { } // Add adds a task to the Batcher. -func (b *Batcher) Add(tbs TableWithRange) { +func (b *Batcher) Add(ctx context.Context, tbs TableWithRange) { b.cachedTablesMu.Lock() log.Debug("adding table to batch", zap.Stringer("table", tbs.Table.Name), diff --git a/pkg/restore/util_test.go b/pkg/restore/util_test.go index 1d6f782e7..ebc86d3b9 100644 --- a/pkg/restore/util_test.go +++ b/pkg/restore/util_test.go @@ -118,7 +118,7 @@ func (s *testRestoreUtilSuite) TestValidateFileRanges(c *C) { }}, rules, ) - c.Assert(err, ErrorMatches, "table ids dont match") + c.Assert(err, ErrorMatches, "table ids mismatch") // Add a bad rule for end key, after rewrite start key > end key. rules.Table = append(rules.Table[:1], &import_sstpb.RewriteRule{ diff --git a/pkg/task/restore.go b/pkg/task/restore.go index 812894a07..4da6dc0c5 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -257,14 +257,13 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf ctx, afterRestoreStream, mgr.GetTiKV().GetClient(), errCh, updateCh) } else { // when user skip checksum, just collect tables, and drop them. - finish = dropToBlockhole(ctx, afterRestoreStream, errCh) + finish = dropToBlackhole(ctx, afterRestoreStream, errCh) } select { case err = <-errCh: err = multierr.Append(err, multierr.Combine(restore.Exhaust(errCh)...)) case <-finish: - log.Info("all works end.") } // If any error happened, return now, don't execute checksum. From 3c7530cf68728bff10e58aafff7115c09d4f4095 Mon Sep 17 00:00:00 2001 From: Hillium Date: Tue, 12 May 2020 16:50:29 +0800 Subject: [PATCH 25/52] recover: fix a bug about removing tiflash. --- pkg/restore/client.go | 1 + pkg/task/restore.go | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/pkg/restore/client.go b/pkg/restore/client.go index bca1fe67f..aa6bc3394 100644 --- a/pkg/restore/client.go +++ b/pkg/restore/client.go @@ -486,6 +486,7 @@ func (rc *Client) RemoveTiFlashOfTable(table CreatedTable, rule []placement.Rule if err != nil { return 0, errors.Trace(err) } + return rule.Count, nil } } return 0, nil diff --git a/pkg/task/restore.go b/pkg/task/restore.go index 4da6dc0c5..c87e25d71 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -615,14 +615,16 @@ func goRestore( return } // Omit the number of TiFlash have been removed. - if _, err := client.RemoveTiFlashOfTable(t.CreatedTable, rules); err != nil { + tiFlashRep, err := client.RemoveTiFlashOfTable(t.CreatedTable, rules) + if err != nil { log.Error("failed on remove TiFlash replicas", zap.Error(err)) errCh <- err return } + t.OldTable.TiFlashReplicas = tiFlashRep oldTables = append(oldTables, t.OldTable) - // Reusage of splitPrepareWork would be safe. + // Reuse of splitPrepareWork would be safe. // But this operation sometime would be costly. if err := splitPrepareWork(ctx, client, []*model.TableInfo{t.Table}); err != nil { log.Error("failed on set online restore placement rules", zap.Error(err)) From e164c4f64dbe0adc251e29c945a1e217bd0cb21d Mon Sep 17 00:00:00 2001 From: Hillium Date: Wed, 13 May 2020 11:20:44 +0800 Subject: [PATCH 26/52] restore: MapTableToFiles issues Error log when key range not match. --- pkg/restore/util.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/restore/util.go b/pkg/restore/util.go index 06a0527c0..82903820e 100644 --- a/pkg/restore/util.go +++ b/pkg/restore/util.go @@ -162,13 +162,13 @@ func MapTableToFiles(files []*backup.File) map[int64][]*backup.File { tableID := tablecodec.DecodeTableID(file.GetStartKey()) tableEndID := tablecodec.DecodeTableID(file.GetEndKey()) if tableID != tableEndID { - log.Warn("key range spread between many files.", + log.Error("key range spread between many files.", zap.String("file name", file.Name), zap.Binary("start key", file.GetStartKey()), zap.Binary("end key", file.GetEndKey())) } if tableID == 0 { - log.Warn("invalid table key of file", + log.Error("invalid table key of file", zap.String("file name", file.Name), zap.Binary("start key", file.GetStartKey()), zap.Binary("end key", file.GetEndKey())) From 2af022a2208890085bcecccba32cb8f526fef3ca Mon Sep 17 00:00:00 2001 From: Hillium Date: Wed, 13 May 2020 11:28:27 +0800 Subject: [PATCH 27/52] *: merge master. --- Makefile | 16 +++- go.mod | 43 ++++------ go.sum | 143 ++++++++++++++++++++++++++++++++++ pkg/backup/client.go | 9 ++- pkg/backup/client_test.go | 28 +++---- pkg/backup/metrics.go | 2 +- pkg/backup/safe_point_test.go | 11 +-- pkg/backup/schema.go | 5 +- pkg/backup/schema_test.go | 17 ++-- pkg/checksum/executor.go | 17 +++- pkg/checksum/executor_test.go | 29 ++++--- pkg/mock/mock_cluster_test.go | 14 +++- pkg/restore/backoff.go | 48 +++++++----- pkg/restore/backoff_test.go | 36 ++++----- pkg/restore/client.go | 3 +- pkg/restore/client_test.go | 13 +--- pkg/restore/db.go | 4 +- pkg/restore/db_test.go | 7 +- pkg/restore/import.go | 34 ++++---- pkg/restore/range.go | 14 +++- pkg/restore/range_test.go | 11 +-- pkg/restore/split.go | 16 ++-- pkg/restore/split_client.go | 1 - pkg/restore/split_test.go | 72 +++++++++-------- pkg/restore/util.go | 9 +-- pkg/restore/util_test.go | 50 ++++++------ pkg/rtree/rtree_test.go | 42 +++++----- pkg/storage/gcs.go | 1 - pkg/storage/gcs_test.go | 4 +- pkg/storage/local.go | 5 +- pkg/storage/parse_test.go | 1 - pkg/storage/s3.go | 8 +- pkg/storage/s3_test.go | 2 +- pkg/storage/storage.go | 2 +- pkg/summary/collector.go | 2 +- pkg/utils/key_test.go | 1 - pkg/utils/progress.go | 2 +- pkg/utils/retry.go | 4 +- pkg/utils/unit.go | 6 +- pkg/utils/worker.go | 4 +- tools/go.mod | 3 +- tools/go.sum | 44 +++++++---- 42 files changed, 496 insertions(+), 287 deletions(-) diff --git a/Makefile b/Makefile index fb421e2d8..7d5f188b1 100644 --- a/Makefile +++ b/Makefile @@ -68,20 +68,30 @@ static: tools tools/bin/goimports -w -d -format-only -local $(BR_PKG) $$($(PACKAGE_DIRECTORIES)) 2>&1 | $(GOCHECKER) tools/bin/govet --shadow $$($(PACKAGE_DIRECTORIES)) 2>&1 | $(GOCHECKER) + @# why some lints are disabled? + @# gochecknoglobals - disabled because we do use quite a lot of globals + @# goimports - executed above already + @# gofmt - ditto + @# wsl - too pedantic about the formatting + @# funlen - PENDING REFACTORING + @# gocognit - PENDING REFACTORING + @# godox - TODO + @# gomnd - too many magic numbers, and too pedantic (even 2*x got flagged...) + @# testpackage - several test packages still rely on private functions + @# nestif - PENDING REFACTORING + @# goerr113 - it mistaken pingcap/errors with standard errors CGO_ENABLED=0 tools/bin/golangci-lint run --enable-all --deadline 120s \ --disable gochecknoglobals \ - --disable gochecknoinits \ - --disable interfacer \ --disable goimports \ --disable gofmt \ --disable wsl \ --disable funlen \ - --disable whitespace \ --disable gocognit \ --disable godox \ --disable gomnd \ --disable testpackage \ --disable nestif \ + --disable goerr113 \ $$($(PACKAGE_DIRECTORIES)) lint: tools diff --git a/go.mod b/go.mod index cfdfc1939..58d8551a2 100644 --- a/go.mod +++ b/go.mod @@ -3,41 +3,32 @@ module github.com/pingcap/br go 1.13 require ( - cloud.google.com/go/storage v1.4.0 - github.com/aws/aws-sdk-go v1.26.1 - github.com/cheggaaa/pb/v3 v3.0.1 - github.com/coreos/go-semver v0.3.0 // indirect - github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f // indirect - github.com/fsouza/fake-gcs-server v1.15.0 - github.com/go-sql-driver/mysql v1.4.1 + cloud.google.com/go/storage v1.5.0 + github.com/aws/aws-sdk-go v1.30.24 + github.com/cheggaaa/pb/v3 v3.0.4 + github.com/fsouza/fake-gcs-server v1.17.0 + github.com/go-sql-driver/mysql v1.5.0 github.com/gogo/protobuf v1.3.1 github.com/google/btree v1.0.0 github.com/google/uuid v1.1.1 - github.com/klauspost/cpuid v1.2.0 // indirect - github.com/montanaflynn/stats v0.5.0 // indirect - github.com/onsi/ginkgo v1.11.0 // indirect - github.com/onsi/gomega v1.8.1 // indirect github.com/pingcap/check v0.0.0-20200212061837-5e12011dc712 github.com/pingcap/errors v0.11.5-0.20190809092503-95897b64e011 - github.com/pingcap/kvproto v0.0.0-20200423020121-038e31959c2a + github.com/pingcap/kvproto v0.0.0-20200509065137-6a4d5c264a8b github.com/pingcap/log v0.0.0-20200117041106-d28c14d3b1cd - github.com/pingcap/parser v0.0.0-20200424075042-8222d8b724a4 - github.com/pingcap/pd/v4 v4.0.0-rc.1.0.20200422143320-428acd53eba2 - github.com/pingcap/tidb v1.1.0-beta.0.20200424154252-5ede18f10eed + github.com/pingcap/parser v0.0.0-20200507022230-f3bf29096657 + github.com/pingcap/pd/v4 v4.0.0-rc.1.0.20200508085806-feb738d721fb + github.com/pingcap/tidb v1.1.0-beta.0.20200509133407-a9dc72cf2558 github.com/pingcap/tidb-tools v4.0.0-rc.1.0.20200421113014-507d2bb3a15e+incompatible github.com/pingcap/tipb v0.0.0-20200417094153-7316d94df1ee - github.com/prometheus/client_golang v1.0.0 - github.com/prometheus/common v0.4.1 - github.com/sirupsen/logrus v1.4.2 - github.com/spf13/cobra v0.0.5 + github.com/prometheus/client_golang v1.5.1 + github.com/prometheus/common v0.9.1 + github.com/sirupsen/logrus v1.6.0 + github.com/spf13/cobra v1.0.0 github.com/spf13/pflag v1.0.5 - github.com/syndtr/goleveldb v1.0.1-0.20190625010220-02440ea7a285 // indirect - github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5 // indirect go.etcd.io/etcd v0.5.0-alpha.5.0.20191023171146-3cf2f69b5738 - go.opencensus.io v0.22.2 // indirect go.uber.org/multierr v1.5.0 - go.uber.org/zap v1.14.1 - golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45 - google.golang.org/api v0.14.0 - google.golang.org/grpc v1.25.1 + go.uber.org/zap v1.15.0 + golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6 + google.golang.org/api v0.15.1 + google.golang.org/grpc v1.26.0 ) diff --git a/go.sum b/go.sum index d30cc5bf6..ecbb93537 100644 --- a/go.sum +++ b/go.sum @@ -8,14 +8,23 @@ cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxK cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc= cloud.google.com/go v0.46.3 h1:AVXDdKsrtX33oR9fbCMu/+c1o8Ofjq6Ku/MInaLVg5Y= cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0= +cloud.google.com/go v0.50.0 h1:0E3eE8MX426vUOs7aHfI7aN1BrIzzzf4ccKCSfSjGmc= +cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To= cloud.google.com/go/bigquery v1.0.1 h1:hL+ycaJpVE9M7nLoiXb/Pn10ENE2u+oddxbD8uu0ZVU= cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= +cloud.google.com/go/bigquery v1.3.0 h1:sAbMqjY1PEQKZBWfbu6Y6bsupJ9c4QdHnzg/VvYTLcE= +cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= cloud.google.com/go/datastore v1.0.0 h1:Kt+gOPPp2LEPWp8CSfxhsM8ik9CcyE/gYu+0r+RnZvM= cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= cloud.google.com/go/pubsub v1.0.1 h1:W9tAK3E57P75u0XLLR82LZyw8VpAnhmyTOxW9qzmyj8= cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= +cloud.google.com/go/pubsub v1.1.0 h1:9/vpR43S4aJaROxqQHQ3nH9lfyKKV0dC3vOmnw8ebQQ= +cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw= +cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw= cloud.google.com/go/storage v1.4.0 h1:KDdqY5VTXBTqpSbctVTt0mVvfanP6JZzNzLE0qNY100= cloud.google.com/go/storage v1.4.0/go.mod h1:ZusYJWlOshgSBGbt6K3GnB3MT3H1xs2id9+TCl4fDBA= +cloud.google.com/go/storage v1.5.0 h1:RPUcBvDeYgQFMfQu1eBMq6piD1SXmLH+vK3qjewZPus= +cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= @@ -36,31 +45,44 @@ github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 h1:JYp7IbQjafo github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf h1:qet1QNfXsQxTZqLG4oE62mJzwPIB8+Tee4RNCL9ulrY= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= +github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4 h1:Hs82Z41s6SdL1CELW+XaDYmOH4hkBN4/N9og/AsOv7E= +github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/antihax/optional v0.0.0-20180407024304-ca021399b1a6/go.mod h1:V8iCPQYkqmusNa815XgQio277wI47sdRh1dUOLdyC6Q= github.com/appleboy/gin-jwt/v2 v2.6.3/go.mod h1:MfPYA4ogzvOcVkRwAxT7quHOtQmVKDpTwxyUrC2DNw0= github.com/appleboy/gofight/v2 v2.1.2/go.mod h1:frW+U1QZEdDgixycTj4CygQ48yLTUhplt43+Wczp3rw= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= github.com/aws/aws-sdk-go v1.26.1 h1:JGQggXhOiNJIqsmbYUl3cYtJZUffeOWlHtxfzGK7WPI= github.com/aws/aws-sdk-go v1.26.1/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= +github.com/aws/aws-sdk-go v1.30.24 h1:y3JPD51VuEmVqN3BEDVm4amGpDma2cKJcDPuAU1OR58= +github.com/aws/aws-sdk-go v1.30.24/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0 h1:HWo1m869IqiPhD389kmkxeTalrjNbbJTC8LXupb+sl0= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= github.com/blacktear23/go-proxyprotocol v0.0.0-20180807104634-af7a81e8dd0d h1:rQlvB2AYWme2bIB18r/SipGiMEVJYE9U0z+MGoU/LtQ= github.com/blacktear23/go-proxyprotocol v0.0.0-20180807104634-af7a81e8dd0d/go.mod h1:VKt7CNAQxpFpSDz3sXyj9hY/GbVsQCr0sB3w59nE7lU= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= +github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY= +github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cheggaaa/pb/v3 v3.0.1 h1:m0BngUk2LuSRYdx4fujDKNRXNDpbNCfptPfVT2m6OJY= github.com/cheggaaa/pb/v3 v3.0.1/go.mod h1:SqqeMF/pMOIu3xgGoxtPYhMNQP258xE4x/XRTYua+KU= +github.com/cheggaaa/pb/v3 v3.0.4 h1:QZEPYOj2ix6d5oEg63fbHmpolrnNiwjUsk+h74Yt4bM= +github.com/cheggaaa/pb/v3 v3.0.4/go.mod h1:7rgWxLrAUcFMkvJuv09+DYi7mMUYi8nO9iOWcvGJPfw= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20171208011716-f6d7a1f6fbf3/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= +github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa h1:OaNxuTZr7kxeODyLWsRMC+OD03aFUH+mW6r2d+MWa5Y= github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8= github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd h1:qMd81Ts1T2OTKmB4acZcyKaMtRnY5Y44NuXGX2GFJ1w= github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd/go.mod h1:sE/e/2PUdi/liOCUjSTXgM1o87ZssimdTWN964YiIeI= +github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= +github.com/coreos/etcd v3.3.10+incompatible h1:jFneRYjIvLMLhDLCzuTuU4rSJUjRplcJQ7pD7MnhC04= github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= github.com/coreos/go-semver v0.2.0 h1:3Jm3tLmsgAYcjC+4Up7hJrFBPr+n7rAqYeSw/SZazuY= @@ -70,13 +92,16 @@ github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3Ee github.com/coreos/go-systemd v0.0.0-20180511133405-39ca1b05acc7/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/go-systemd v0.0.0-20181031085051-9002847aa142 h1:3jFq2xL4ZajGK4aZY8jz+DAF0FHjI51BXjjSwCzS1Dk= github.com/coreos/go-systemd v0.0.0-20181031085051-9002847aa142/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= +github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f h1:JOrtw2xFKzlg+cbHpyrpLDmnN1HqhBfnX7WDiW7eG2c= github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/pkg v0.0.0-20160727233714-3ac0863d7acf/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f h1:lBNOc5arjvs8E5mO2tbpBpLoyyu8B6e44T7hJy6potg= github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= +github.com/corona10/goimagehash v1.0.2/go.mod h1:/l9umBhvcHQXVtQO1V6Gp1yD20STawkhRnnX0D1bvVI= github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= +github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= github.com/cznic/golex v0.0.0-20181122101858-9c343928389c/go.mod h1:+bmmJDNmKlhWNG+gwWCkaBoTy39Fs+bzRxVBzoTQbIc= github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548 h1:iwZdTE0PVqJCos1vaoKsclOGD3ADKpshg3SRtYBbwso= @@ -97,6 +122,7 @@ github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumC github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 h1:tdlZCpZ/P9DhczCTSixgIKmwPv6+wP5DGjqLYw5SUiA= github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= +github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw= github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= @@ -108,6 +134,7 @@ github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385/go.mod h1:0vRUJqYpeSZi github.com/elazarl/go-bindata-assetfs v1.0.0 h1:G/bYguwHIzWq9ZoyUQqrjTmJbbYn3j3CKKpKinvZLFk= github.com/elazarl/go-bindata-assetfs v1.0.0/go.mod h1:v+YaWX3bdea5J/mo8dSETolEo7R71Vk1u8bnjau5yw4= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/erikstmartin/go-testdb v0.0.0-20160219214506-8d10e4a1bae5/go.mod h1:a2zkGnVExMxdzMo3M0Hi/3sEU+cWnZpSni0O6/Yb/P0= github.com/fatih/color v1.7.0 h1:DkWD4oS2D8LGGgTQ6IvwJJXSL5Vp2ffcQg58nFV38Ys= @@ -115,11 +142,15 @@ github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5Kwzbycv github.com/fatih/color v1.9.0 h1:8xPHl4/q1VyqGIPif1F+1V3Y3lSmrq01EabUW3CoW5s= github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU= github.com/fatih/structtag v1.2.0/go.mod h1:mBJUNpUnHmRKrKlQQlmCrh5PuhftFbNv8Ys4/aAZl94= +github.com/fogleman/gg v1.3.0/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsouza/fake-gcs-server v1.15.0 h1:ss/ztlt10Y64A5qslmxZKsiqW/i28t5DkRtv6qSFaLQ= github.com/fsouza/fake-gcs-server v1.15.0/go.mod h1:HNxAJ/+FY/XSsxuwz8iIYdp2GtMmPbJ8WQjjGMxd6Qk= +github.com/fsouza/fake-gcs-server v1.17.0 h1:OeH75kBZcZa3ZE+zz/mFdJ2btt9FgqfjI7gIh9+5fvk= +github.com/fsouza/fake-gcs-server v1.17.0/go.mod h1:D1rTE4YCyHFNa99oyJJ5HyclvN/0uQR+pM/VdlL83bw= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= +github.com/ghodss/yaml v1.0.1-0.20190212211648-25d852aebe32 h1:Mn26/9ZMNWSw9C9ERFA1PUxfmGpolnw2v0bKOREu5ew= github.com/ghodss/yaml v1.0.1-0.20190212211648-25d852aebe32/go.mod h1:GIjDIg/heH5DOkXY3YJ/wNhfHsQHoXGjl8G8amsYQ1I= github.com/gin-contrib/gzip v0.0.1/go.mod h1:fGBJBCdt6qCZuCAOwWuFhBB4OOq9EFqlo5dEaFhhu5w= github.com/gin-contrib/sse v0.0.0-20170109093832-22d885f9ecc7/go.mod h1:VJ0WA2NBN22VlZ2dKZQPAPnyWw5XTlK1KymzLKsr59s= @@ -131,8 +162,11 @@ github.com/gin-gonic/gin v1.5.0/go.mod h1:Nd6IXA8m5kNZdNEHMBd93KT+mdY3+bewLgRvmC github.com/go-bindata/go-bindata/v3 v3.1.3/go.mod h1:1/zrpXsLD8YDIbhZRqXzm1Ghc7NhEvIN9+Z6R5/xH4I= github.com/go-chi/chi v4.0.2+incompatible/go.mod h1:eB3wogJHnLi3x/kFX2A+IbTBlXxmMeXJVKy9tTv1XzQ= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= +github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= +github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= +github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= github.com/go-ole/go-ole v1.2.4 h1:nNBDSCOigTSiarFpYE9J/KtEA1IOW4CNeqT9TQDqCxI= github.com/go-ole/go-ole v1.2.4/go.mod h1:XCwSNxSkXRo4vlyPy93sltvi/qJq0jqQhjqQNIwKuxM= @@ -157,19 +191,26 @@ github.com/go-playground/universal-translator v0.16.0/go.mod h1:1AnU7NaIRDWWzGEK github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA= github.com/go-sql-driver/mysql v1.4.1 h1:g24URVg0OFbNUTx9qqY1IRZ9D9z3iPyi5zKhQZpNwpA= github.com/go-sql-driver/mysql v1.4.1/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= +github.com/go-sql-driver/mysql v1.5.0 h1:ozyZYNQW3x3HtqT1jira07DN2PArx2v7/mN66gGcHOs= +github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= +github.com/goccy/go-graphviz v0.0.5/go.mod h1:wXVsXxmyMQU6TN3zGRttjNn3h+iCAS7xQFC6TlNvLhk= github.com/gogo/protobuf v0.0.0-20180717141946-636bf0302bc9/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= github.com/gogo/protobuf v1.3.1 h1:DqDEcV5aeaTmdFBePNpYsp3FlcVH/2ISVVM9Qf8PSls= github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0= +github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20181024230925-c65c006176ff/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6 h1:ZgQEtGgCBiWRM39fZuwSd1LwSqqSW0hOdXCYYDX0R3I= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7 h1:5ZkaAPbicIKTF2I64qf5Fh8Aa83Q/dnOafMYV0OMwjA= +github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/mock v1.3.1 h1:qGJ6qTW+x6xX/my+8YUVl4WNpX9B7+/l2tRsHGZ7f2s= @@ -193,6 +234,8 @@ github.com/google/go-cmp v0.3.0 h1:crn/baboCvb5fXaQ0IJ1SGTsTVrWpDsCWC8EGETZijY= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1 h1:Xye71clBPdm5HgqGwUkwhbynsUJZhDbS20FvLhQ2izg= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/martian v2.1.0+incompatible h1:/CP5g8u/VJHijgedC/Legn3BAbAaWPgecwXBIDzw5no= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= @@ -200,6 +243,8 @@ github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OI github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= github.com/google/pprof v0.0.0-20190930153522-6ce02741cba3 h1:3CYI9xg87xNAD+es02gZxbX/ky4KQeoFBsNOzuoAQZg= github.com/google/pprof v0.0.0-20190930153522-6ce02741cba3/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= +github.com/google/pprof v0.0.0-20200407044318-7d83b28da2e9 h1:K+lX49/3eURCE1IjlaZN//u6c+9nfDAMnyQ9E2dsJbY= +github.com/google/pprof v0.0.0-20200407044318-7d83b28da2e9/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/shlex v0.0.0-20181106134648-c34317bd91bf/go.mod h1:RpwtwJQFrIEPstU94h88MWPXP2ektJZ8cZ0YntAmXiE= github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= @@ -208,16 +253,20 @@ github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+ github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5 h1:sjZBwGj9Jlw33ImPtvFviGYvseOtDM7hkSKB7+Tv3SM= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= +github.com/gorilla/handlers v1.4.2 h1:0QniY0USkHQ1RGCLfKxeNHK9bkDHGRYGNDFBCS+YARg= +github.com/gorilla/handlers v1.4.2/go.mod h1:Qkdc/uu4tH4g6mTK6auzZ766c4CA0Ng8+o/OAirnOIQ= github.com/gorilla/mux v1.7.3 h1:gnP5JzjVOuiZD07fKKToCAOjS0yOpj/qPETTXCCS6hw= github.com/gorilla/mux v1.7.3/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= github.com/gorilla/websocket v1.2.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= github.com/gorilla/websocket v1.4.0 h1:WDFjx/TMzVgy9VdMMQi2K2Emtwi2QcUQsztZ/zLaH/Q= github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= +github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4 h1:z53tR0945TRRQO/fLEVPI6SMv7ZflF0TEaTAoU7tOzg= github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= +github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.5 h1:UImYN5qQ8tuGpGE16ZmjvcTtTw24zw1QAp/SlnNrZhI= github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.12.1 h1:zCy2xE9ablevUOrUZc3Dl72Dt+ya2FNAvC2yLYMHzi4= @@ -232,13 +281,18 @@ github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= +github.com/hypnoglow/gormzap v0.3.0/go.mod h1:5Wom8B7Jl2oK0Im9hs6KQ+Kl92w4Y7gKCrj66rhyvw0= +github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= +github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= github.com/jinzhu/gorm v1.9.12/go.mod h1:vhTjlKSJUTWNtcbQtrMBFCxy7eXTzeCAzfL5fBZT/Qs= github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc= github.com/jinzhu/now v1.0.1/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af h1:pmfjZENx5imkbgOkpRUYLnmbU7UEFbjtDA2hxJ1ichM= github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= +github.com/jmespath/go-jmespath v0.3.0 h1:OS12ieG61fsCg5+qLJ+SsW9NicxNkg3b25OyT2yCeUc= +github.com/jmespath/go-jmespath v0.3.0/go.mod h1:9QtRXoHjLGCJ5IBSaohpXITPlowMeeYCZ7fLUTSywik= github.com/joho/godotenv v1.3.0/go.mod h1:7hK45KPybAkOC6peb+G5yklZfMxEjkZhHbwpqxOKXbg= github.com/jonboulle/clockwork v0.1.0 h1:VKV+ZcuP6l3yW9doeqz6ziZGgcynBVQO+obU0+0hcPo= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= @@ -251,6 +305,8 @@ github.com/json-iterator/go v1.1.9 h1:9yzud/Ht36ygwatGx56VwCZtlI/2AD15T1X2sjSuGn github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024 h1:rBMNdlhTLzJjJSDIjNEXX1Pz3Hmwmz91v+zycvx9PJc= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= +github.com/jstemmer/go-junit-report v0.9.1 h1:6QPYqodiu3GuPL+7mfx+NwDdp2eTkp9IfEUpgAwUN0o= +github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= github.com/juju/ratelimit v1.0.1 h1:+7AIFJVQ0EQgq/K9+0Krm7m530Du7tIz0METWzN0RgY= github.com/juju/ratelimit v1.0.1/go.mod h1:qapgC/Gy+xNh9UxzV13HGGl/6UXNN+ct+vwSgWNm/qk= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= @@ -267,6 +323,8 @@ github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGi github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.2 h1:DB17ag19krx9CFsz4o3enTrPXyIXCl+2iCXH/aMAp9s= github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/konsorten/go-windows-terminal-sequences v1.0.3 h1:CE8S1cTafDpPvMhIxNJKvHsGVBgn1xWYf1NbHQhywc8= +github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= @@ -292,6 +350,7 @@ github.com/mattn/go-isatty v0.0.7/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hd github.com/mattn/go-isatty v0.0.8 h1:HLtExJ+uU2HOZ+wI0Tt5DtUDrx8yhUqDcp7fYERX4CE= github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= github.com/mattn/go-isatty v0.0.9/go.mod h1:YNRxwqDuOph6SZLI9vUUz6OYw3QyUt7WiY2yME+cCiQ= +github.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcMEpPG5Rm84= github.com/mattn/go-isatty v0.0.11 h1:FxPOTFNqGkuDUGi3H/qkUbQO4ZiBa2brKq5r0l8TGeM= github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE= github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= @@ -321,11 +380,13 @@ github.com/montanaflynn/stats v0.0.0-20180911141734-db72e6cae808/go.mod h1:wL8QJ github.com/montanaflynn/stats v0.5.0 h1:2EkzeTSqBB4V4bJwWrt5gIIrZmpJBcoIRGS2kWLgzmk= github.com/montanaflynn/stats v0.5.0/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/nfnt/resize v0.0.0-20160724205520-891127d8d1b5/go.mod h1:jpp1/29i3P1S/RLdc7JQKbRpFeM1dOBd8T9ki5s+AY8= github.com/ngaut/pools v0.0.0-20180318154953-b7bc8c42aac7 h1:7KAv7KMGTTqSmYZtNdcNTgsos+vFzULLwyElndwn+5c= github.com/ngaut/pools v0.0.0-20180318154953-b7bc8c42aac7/go.mod h1:iWMfgwqYW+e8n5lC/jjNEhwcjbRDpl5NT7n2h+4UNcI= github.com/ngaut/sync2 v0.0.0-20141008032647-7a24ed77b2ef h1:K0Fn+DoFqNqktdZtdV3bPQ/0cuYh2H4rkg0tytX/07k= github.com/ngaut/sync2 v0.0.0-20141008032647-7a24ed77b2ef/go.mod h1:7WjlapSfwQyo6LNmIvEWzsW1hbBQfpUO4JWnuQRmva8= github.com/nicksnyder/go-i18n v1.10.0/go.mod h1:HrK7VCrbOvQoUAQ7Vpy7i87N7JZZZ7R2xBGjv0j365Q= +github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= github.com/olekukonko/tablewriter v0.0.0-20170122224234-a0225b3f23b5/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= github.com/olekukonko/tablewriter v0.0.4/go.mod h1:zq6QwlOf5SlnkVbMSr5EoBv3636FWnp+qbPhuoO21uA= github.com/onsi/ginkgo v1.6.0 h1:Ix8l273rp3QzYgXSR+c8d1fTG7UPgYkOSELPhiY/YGw= @@ -342,12 +403,17 @@ github.com/opentracing/basictracer-go v1.0.0 h1:YyUAhaEfjoWXclZVJ9sGoNct7j4TVk7l github.com/opentracing/basictracer-go v1.0.0/go.mod h1:QfBfYuafItcjQuMwinw9GhYKwFXS9KnPs5lxoYwgW74= github.com/opentracing/opentracing-go v1.0.2 h1:3jA2P6O1F9UOrWVpwrIo17pu01KWvNWg4X946/Y5Zwg= github.com/opentracing/opentracing-go v1.0.2/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= +github.com/opentracing/opentracing-go v1.1.0 h1:pWlfV3Bxv7k65HYwkikxat0+s3pV4bsqf19k25Ur8rU= +github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/pelletier/go-toml v1.3.0 h1:e5+lF2E4Y2WCIxBefVowBuB0iHrUH4HZ8q+6mGF7fJc= github.com/pelletier/go-toml v1.3.0/go.mod h1:PN7xzY2wHTK0K9p34ErDQMlFxa51Fk0OUruD3k1mMwo= +github.com/petermattis/goid v0.0.0-20180202154549-b0b1615b78e5/go.mod h1:jvVRKCrJTQWu0XVbaOlby/2lO20uSCHEMzzplHXte1o= github.com/phf/go-queue v0.0.0-20170504031614-9abe38d0371d h1:U+PMnTlV2tu7RuMK5etusZG3Cf+rpow5hqQByeCzJ2g= github.com/phf/go-queue v0.0.0-20170504031614-9abe38d0371d/go.mod h1:lXfE4PvvTW5xOjO6Mba8zDPyw8M93B6AQ7frTGnMlA8= github.com/pingcap-incubator/tidb-dashboard v0.0.0-20200407064406-b2b8ad403d01/go.mod h1:77fCh8d3oKzC5ceOJWeZXAS/mLzVgdZ7rKniwmOyFuo= +github.com/pingcap-incubator/tidb-dashboard v0.0.0-20200424032813-662fb05948e7/go.mod h1:DwotQY91ds7eiwnY6QrQX9t5iimDtsJVIjI9tZYMaUM= +github.com/pingcap/br v0.0.0-20200426093517-dd11ae28b885/go.mod h1:4w3meMnk7HDNpNgjuRAxavruTeKJvUiXxoEWTjzXPnA= github.com/pingcap/check v0.0.0-20190102082844-67f458068fc8 h1:USx2/E1bX46VG32FIw034Au6seQ2fY9NEILmNh/UlQg= github.com/pingcap/check v0.0.0-20190102082844-67f458068fc8/go.mod h1:B1+S9LNcuMyLH/4HMTViQOJevkGiik3wW2AN9zb2fNQ= github.com/pingcap/check v0.0.0-20191107115940-caf2b9e6ccf4 h1:iRtOAQ6FXkY/BGvst3CDfTva4nTqh6CL8WXvanLdbu0= @@ -376,19 +442,28 @@ github.com/pingcap/kvproto v0.0.0-20200417092353-efbe03bcffbd/go.mod h1:IOdRDPLy github.com/pingcap/kvproto v0.0.0-20200420075417-e0c6e8842f22/go.mod h1:IOdRDPLyda8GX2hE/jO7gqaCV/PNFh8BZQCQZXfIOqI= github.com/pingcap/kvproto v0.0.0-20200423020121-038e31959c2a h1:FtuXja79nBFAeg9LxF9+9EzoJXi+j9pCReARPOePiGs= github.com/pingcap/kvproto v0.0.0-20200423020121-038e31959c2a/go.mod h1:IOdRDPLyda8GX2hE/jO7gqaCV/PNFh8BZQCQZXfIOqI= +github.com/pingcap/kvproto v0.0.0-20200424032552-6650270c39c3/go.mod h1:IOdRDPLyda8GX2hE/jO7gqaCV/PNFh8BZQCQZXfIOqI= +github.com/pingcap/kvproto v0.0.0-20200509065137-6a4d5c264a8b h1:lzOE8Z3TnbgpK8d1h4Jze/SKJrQ7gHgLeUVy0vi0l6o= +github.com/pingcap/kvproto v0.0.0-20200509065137-6a4d5c264a8b/go.mod h1:IOdRDPLyda8GX2hE/jO7gqaCV/PNFh8BZQCQZXfIOqI= github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9 h1:AJD9pZYm72vMgPcQDww9rkZ1DnWfl0pXV3BOWlkYIjA= github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20200117041106-d28c14d3b1cd h1:CV3VsP3Z02MVtdpTMfEgRJ4T9NGgGTxdHpJerent7rM= github.com/pingcap/log v0.0.0-20200117041106-d28c14d3b1cd/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/parser v0.0.0-20200424075042-8222d8b724a4 h1:1dFbm4zVXWvdwjEdyjMlu1PCxnlxK+JfNq+HhbGjDtc= github.com/pingcap/parser v0.0.0-20200424075042-8222d8b724a4/go.mod h1:9v0Edh8IbgjGYW2ArJr19E+bvL8zKahsFp+ixWeId+4= +github.com/pingcap/parser v0.0.0-20200507022230-f3bf29096657 h1:2ceTso30kmgMeddZ4iZ6zrK8N9eFF8zmCa1hSSE1tXc= +github.com/pingcap/parser v0.0.0-20200507022230-f3bf29096657/go.mod h1:9v0Edh8IbgjGYW2ArJr19E+bvL8zKahsFp+ixWeId+4= github.com/pingcap/pd/v4 v4.0.0-rc.1.0.20200422143320-428acd53eba2 h1:JTzYYukREvxVSKW/ncrzNjFitd8snoQ/Xz32pw8i+s8= github.com/pingcap/pd/v4 v4.0.0-rc.1.0.20200422143320-428acd53eba2/go.mod h1:s+utZtXDznOiL24VK0qGmtoHjjXNsscJx3m1n8cC56s= +github.com/pingcap/pd/v4 v4.0.0-rc.1.0.20200508085806-feb738d721fb h1:/9zzZH7sbMJcplGqVHUN3pqt2YUqclImLqRpyMklURc= +github.com/pingcap/pd/v4 v4.0.0-rc.1.0.20200508085806-feb738d721fb/go.mod h1:lEf22fUhISqUo2/TCJL7fTfFgG+VXjOfDCd2s8q0BRU= github.com/pingcap/sysutil v0.0.0-20200206130906-2bfa6dc40bcd/go.mod h1:EB/852NMQ+aRKioCpToQ94Wl7fktV+FNnxf3CX/TTXI= github.com/pingcap/sysutil v0.0.0-20200408114249-ed3bd6f7fdb1 h1:PI8YpTl45F8ilNkrPtT4IdbcZB1SCEa+gK/U5GJYl3E= github.com/pingcap/sysutil v0.0.0-20200408114249-ed3bd6f7fdb1/go.mod h1:EB/852NMQ+aRKioCpToQ94Wl7fktV+FNnxf3CX/TTXI= github.com/pingcap/tidb v1.1.0-beta.0.20200424154252-5ede18f10eed h1:5mKxIq52K8pZaoKay6K6MLWIcOza8ok+kEyzOk4jYvU= github.com/pingcap/tidb v1.1.0-beta.0.20200424154252-5ede18f10eed/go.mod h1:m2VDlJDbUeHPCXAfKPajqLmB1uLvWpkKk3zALNqDYdw= +github.com/pingcap/tidb v1.1.0-beta.0.20200509133407-a9dc72cf2558 h1:5SRb0TBMxODjCC/7QnH69IJfRyB3K7PDIGcGN/XU3z8= +github.com/pingcap/tidb v1.1.0-beta.0.20200509133407-a9dc72cf2558/go.mod h1:cXNbVSQAkwwmjFQmEnEPI00Z2/Y/KOhouttUPERiInE= github.com/pingcap/tidb-tools v4.0.0-beta.1.0.20200306084441-875bd09aa3d5+incompatible h1:84F7MFMfdAYObrznvRslmVu43aoihrlL+7mMyMlOi0o= github.com/pingcap/tidb-tools v4.0.0-beta.1.0.20200306084441-875bd09aa3d5+incompatible/go.mod h1:XGdcy9+yqlDSEMTpOXnwf3hiTeqrV6MN/u1se9N8yIM= github.com/pingcap/tidb-tools v4.0.0-rc.1.0.20200421113014-507d2bb3a15e+incompatible h1:+K5bqDYG5HT+GqLdx4GH5VmS84+xHgpHbGg6Xt6qQec= @@ -404,18 +479,31 @@ github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= +github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso= github.com/prometheus/client_golang v1.0.0 h1:vrDKnkGzuGvhNAL56c7DBz29ZL+KxnoR0x7enabFceM= github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= +github.com/prometheus/client_golang v1.5.1 h1:bdHYieyGlH+6OLEk2YQha8THib30KP0/yD0YH9m6xcA= +github.com/prometheus/client_golang v1.5.1/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90 h1:S/YWwWx/RA8rT8tKFRuGUZhuA90OyIBpPCXkcbwU8DE= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4 h1:gQz4mCbXsO+nc9n1hCxHcGA3Zx3Eo+UHZoInFGUIXNM= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_model v0.2.0 h1:uq5h0d+GuxiXLJLNABMgp2qUWDPiLvgCzz2dUR+/W/M= +github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= +github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/prometheus/common v0.4.1 h1:K0MGApIoQvMw27RTdJkPbr3JZ7DNbtxQNyi5STVM6Kw= github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= +github.com/prometheus/common v0.9.1 h1:KOMtN28tlbam3/7ZKEYKHhKoJZYYj3gMH4uc62x7X7U= +github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= +github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= github.com/prometheus/procfs v0.0.2 h1:6LJUbpNm42llc4HRCuvApCSWB/WfhuNo9K98Q9sNGfs= github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= +github.com/prometheus/procfs v0.0.8 h1:+fpWZdT24pJBiqJdAwYBjPSk+5YmQzYNPYzQsdzLkt8= +github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= +github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= github.com/remyoudompheng/bigfft v0.0.0-20190728182440-6a916e37a237 h1:HQagqIiBmr8YXawX/le3+O26N+vPPC1PtjaF3mwnook= github.com/remyoudompheng/bigfft v0.0.0-20190728182440-6a916e37a237/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= @@ -424,6 +512,7 @@ github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFR github.com/rs/cors v1.7.0/go.mod h1:gFx+x8UowdsKA9AchylcLynDq+nNFfI8FkUZdN/jGCU= github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/sasha-s/go-deadlock v0.2.0/go.mod h1:StQn567HiB1fF2yJ44N9au7wOhrPS3iZqiDbRupzT10= github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= github.com/sergi/go-diff v1.0.1-0.20180205163309-da645544ed44/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= github.com/shirou/gopsutil v2.19.10+incompatible h1:lA4Pi29JEVIQIgATSeftHSY0rMGI9CLrl2ZvDLiahto= @@ -439,6 +528,8 @@ github.com/sirupsen/logrus v1.2.0 h1:juTguoYk5qI21pwyTXY3B3Y5cOTH3ZUyZCg1v/mihuo github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.2 h1:SPIRibHv4MatM3XXNO2BJeFLZwZ2LvZgfQ5+UNI2im4= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= +github.com/sirupsen/logrus v1.6.0 h1:UBcNElsrwanuuMsnGSlYmtmgbb23qDR5dG+6X6Oo89I= +github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= github.com/soheilhy/cmux v0.1.4 h1:0HKaf1o97UwFjHH9o5XsHUOF+tqmdA7KEzXLpiyaw0E= github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 h1:qLC7fQah7D6K1B0ujays3HV9gkFtllcxhzImRR7ArPQ= @@ -450,6 +541,8 @@ github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkU github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= github.com/spf13/cobra v0.0.5 h1:f0B+LkLX6DtmRH1isoNA9VTtNUK9K8xYd28JNNfOv/s= github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= +github.com/spf13/cobra v1.0.0 h1:6m/oheQuQ13N9ks4hubMG6BnvwOeaJrqSPLahSnczz8= +github.com/spf13/cobra v1.0.0/go.mod h1:/6GTrnGXV9HjY+aR4k0oJ5tcvakLuG6EuKReYlHNrgE= github.com/spf13/jwalterweatherman v1.0.0 h1:XHEdyB+EcvlqZamSM4ZOMGlc93t6AcsBEu9Gc1vn7yk= github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= github.com/spf13/pflag v1.0.1/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= @@ -458,6 +551,7 @@ github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnIn github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= +github.com/spf13/viper v1.4.0/go.mod h1:PTJ7Z/lr49W6bUbkmS1V3by4uWynFiR9p7+dSq/yZzE= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= @@ -466,6 +560,8 @@ github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/swaggo/files v0.0.0-20190704085106-630677cd5c14/go.mod h1:gxQT6pBGRuIGunNf/+tSOB5OHvguWi8Tbt82WOkf35E= github.com/swaggo/gin-swagger v1.2.0/go.mod h1:qlH2+W7zXGZkczuL+r2nEBR2JTT+/lX05Nn6vPhc7OI= github.com/swaggo/http-swagger v0.0.0-20200103000832-0e9263c4b516/go.mod h1:O1lAbCgAAX/KZ80LM/OXwtWFI/5TvZlwxSg8Cq08PV0= @@ -492,14 +588,20 @@ github.com/uber-go/atomic v1.3.2 h1:Azu9lPBWRNKzYXSIwRfgRuDuS0YKsK4NFhiQv98gkxo= github.com/uber-go/atomic v1.3.2/go.mod h1:/Ct5t2lcmbJ4OSe/waGBoaVvVqtO0bmtfVNex1PFV8g= github.com/uber/jaeger-client-go v2.15.0+incompatible h1:NP3qsSqNxh8VYr956ur1N/1C1PjvOJnJykCzcD5QHbk= github.com/uber/jaeger-client-go v2.15.0+incompatible/go.mod h1:WVhlPFC8FDjOFMMWRy2pZqQJSXxYSwNYOkTr/Z6d3Kk= +github.com/uber/jaeger-client-go v2.22.1+incompatible h1:NHcubEkVbahf9t3p75TOCR83gdUHXjRJvjoBh1yACsM= +github.com/uber/jaeger-client-go v2.22.1+incompatible/go.mod h1:WVhlPFC8FDjOFMMWRy2pZqQJSXxYSwNYOkTr/Z6d3Kk= github.com/uber/jaeger-lib v1.5.0 h1:OHbgr8l656Ub3Fw5k9SWnBfIEwvoHQ+W2y+Aa9D1Uyo= github.com/uber/jaeger-lib v1.5.0/go.mod h1:ComeNDZlWwrWnDv8aPp0Ba6+uUTzImX/AauajbLI56U= +github.com/uber/jaeger-lib v2.2.0+incompatible h1:MxZXOiR2JuoANZ3J6DE/U0kSFv/eJ/GfSYVCjK7dyaw= +github.com/uber/jaeger-lib v2.2.0+incompatible/go.mod h1:ComeNDZlWwrWnDv8aPp0Ba6+uUTzImX/AauajbLI56U= github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= github.com/ugorji/go v1.1.5-pre/go.mod h1:FwP/aQVg39TXzItUBMwnWp9T9gPQnXw4Poh4/oBQZ/0= +github.com/ugorji/go v1.1.7 h1:/68gy2h+1mWMrwZFeD1kQialdSzAb432dtpeJ42ovdo= github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw= github.com/ugorji/go/codec v0.0.0-20181022190402-e5e69e061d4f/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= github.com/ugorji/go/codec v1.1.5-pre/go.mod h1:tULtS6Gy1AE1yCENaw4Vb//HLH5njI2tfCQDUqRd8fI= +github.com/ugorji/go/codec v1.1.7 h1:2SvQaVZ1ouYrrKKwoSk2pzd4A9evlKJb9oTL+OaLUSs= github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= github.com/unrolled/render v0.0.0-20171102162132-65450fb6b2d3 h1:ZsIlNwu/G0zbChIZaWOeZ2TPGNmKMt46jZLXi3e8LFc= github.com/unrolled/render v0.0.0-20171102162132-65450fb6b2d3/go.mod h1:tu82oB5W2ykJRVioYsB+IQKcft7ryBr7w12qMBUPyXg= @@ -514,6 +616,7 @@ github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1: github.com/yookoala/realpath v1.0.0 h1:7OA9pj4FZd+oZDsyvXWQvjn5oBdcHRTV44PpdMSuImQ= github.com/yookoala/realpath v1.0.0/go.mod h1:gJJMA9wuX7AcqLy1+ffPatSCySA1FQ2S8Ya9AIoYBpE= github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.3 h1:MUGmc65QhB3pIlaQ5bB4LwqSj6GIonVJXpZiaKNyaKk= go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/etcd v0.0.0-20191023171146-3cf2f69b5738/go.mod h1:dnLIgRNXwCJa5e+c6mIZCrds/GIG4ncV9HhK5PX7jPg= @@ -526,6 +629,7 @@ go.opencensus.io v0.22.2 h1:75k/FF0Q2YM8QYo07VPddOLBslDt1MZOdEslOHvmzAs= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.uber.org/atomic v1.3.2 h1:2Oa65PReHzfn29GpvgsYwloV9AVFHPDk8tYxt2c2tr4= go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= +go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.5.0 h1:OI5t8sDa1Or+q8AeE+yKeB/SDYioSHAgcVljj9JIETY= go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= go.uber.org/atomic v1.6.0 h1:Ezj3JGmsOnG1MoRWQkPBsKLe9DwWD9QeXzTRzzldNVk= @@ -544,6 +648,7 @@ go.uber.org/multierr v1.5.0 h1:KCa4XfM8CWFCpxXRGok+Q0SS/0XBhMDbHHGABQLvD2A= go.uber.org/multierr v1.5.0/go.mod h1:FeouvMocqHpRaaGuG9EjoKcStLC43Zu/fmqdUMPcKYU= go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee h1:0mgffUl7nfd+FpvXMVz4IDEaUSmT1ysygQC7qYo7sG4= go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA= +go.uber.org/zap v1.8.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.9.1/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.10.0 h1:ORx85nbTijNz8ljznvCMR1ZBIPKFn3jQrag10X2AsuM= go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= @@ -552,6 +657,8 @@ go.uber.org/zap v1.12.0/go.mod h1:zwrFLgMcdUuIBviXEYEH1YKNaOBnKXsx2IPda5bBwHM= go.uber.org/zap v1.13.0/go.mod h1:zwrFLgMcdUuIBviXEYEH1YKNaOBnKXsx2IPda5bBwHM= go.uber.org/zap v1.14.1 h1:nYDKopTbvAPq/NrUVZwT15y2lpROBiLLyoRTbXOYWOo= go.uber.org/zap v1.14.1/go.mod h1:Mb2vm2krFEG5DV0W9qcHBYFtp/Wku1cvYaqPsS/WYfc= +go.uber.org/zap v1.15.0 h1:ZZCA22JRF2gQE5FoNmhmrf7jeJJ2uhqDUNRYKm8dvmM= +go.uber.org/zap v1.15.0/go.mod h1:Mb2vm2krFEG5DV0W9qcHBYFtp/Wku1cvYaqPsS/WYfc= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= @@ -563,14 +670,20 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20191205180655-e7c4368fe9dd/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20191206172530-e9b2fee46413 h1:ULYEB3JvPRE/IfO+9uO7vKV/xzVTO7XPAwm8xbf4w2g= golang.org/x/crypto v0.0.0-20191206172530-e9b2fee46413/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20200204104054-c9f3fb736b72 h1:+ELyKg6m8UBf0nPFSqD0mi7zUfwPyXo23HNjMnXPz7w= +golang.org/x/crypto v0.0.0-20200204104054-c9f3fb736b72/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek= golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136 h1:A1gGSx58LAGVHUUsOf7IiR0u8Xb6W51gRwfDBhkdcaw= golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY= +golang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= +golang.org/x/exp v0.0.0-20191227195350-da58074b4299 h1:zQpM52jfKHG6II1ISZY1ZcpygvuSFZpLwfluuF89XOg= +golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.0.0-20200119044424-58c23975cae1/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= @@ -605,13 +718,17 @@ golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190611141213-3f473d35a33a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20191002035440-2ec189313ef0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e h1:3G+cUijn7XD+S4eJFddp53Pv7+slrESplyjG25HgL+k= golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= @@ -619,6 +736,8 @@ golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAG golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45 h1:SVwTIAaPC2U/AvvLNZ2a7OVsmBpC8L5BlwK1whH3hm0= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6 h1:pE8b58s1HRDMi8RDc79m0HISf9D4TzseP40cEA6IGfs= +golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -645,9 +764,15 @@ golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190610200419-93c9922d18ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190616124812-15dcb6c0061f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191128015809-6d18c012aee9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd h1:xhmwyvizuTgC2qz7ZlMluP20uW+C3Rm0FD/WLDX8884= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= @@ -686,12 +811,15 @@ golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.0.0-20191030062658-86caa796c7ab/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191107010934-f79515f33823 h1:akkRBeitX2EZP59KdtKw310CI4WGPCNPyrLbE7WZA8Y= golang.org/x/tools v0.0.0-20191107010934-f79515f33823/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191114200427-caa0b0f7d508/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2 h1:EtTFh6h4SAKemS+CURDMTDIANuduG5zKEXShyy18bGA= golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191216173652-a0e659d51361/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200225230052-807dcd883420/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200325010219-a49f79bcc224/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8= @@ -706,8 +834,13 @@ google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEt google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= +google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= google.golang.org/api v0.14.0 h1:uMf5uLi4eQMRrMKhCplNik4U4H8Z6C1br3zOtAa/aDE= google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= +google.golang.org/api v0.15.0 h1:yzlyyDW/J0w8yNFJIhiAJy4kq74S+1DOLdawELNxFMA= +google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= +google.golang.org/api v0.15.1 h1:5mMS6mYvK5LVB8+ujVBC33Y8gltBo/kT6HBm6kU80G4= +google.golang.org/api v0.15.1/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0 h1:/wp5JvzpHIxhs/dumFmF7BXTf3Z+dd4uXta4kVyO508= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -715,6 +848,8 @@ google.golang.org/appengine v1.5.0 h1:KxkO13IPW4Lslp2bz+KHP2E3gtFlrIGNThxkZQ3g+4 google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.6.1 h1:QzqyMA1tlu6CgqCDUtU9V+ZKhLFT2dkJuANu5QaxI3I= google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= +google.golang.org/appengine v1.6.5 h1:tycE03LOZYQNhDpS27tcQdAzLCVMaj7QT2SXxebnpCM= +google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20181004005441-af9cb2a35e7f/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= @@ -726,17 +861,24 @@ google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98 google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8= google.golang.org/genproto v0.0.0-20190927181202-20e1ac93f88c/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8= +google.golang.org/genproto v0.0.0-20191108220845-16a3f7862a1a/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= google.golang.org/genproto v0.0.0-20191115194625-c23dd37a84c9 h1:6XzpBoANz1NqMNfDXzc2QmHmbb1vyMsvRfoP5rM+K1I= google.golang.org/genproto v0.0.0-20191115194625-c23dd37a84c9/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20191216164720-4f79533eabd1/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb h1:ADPHZzpzM4tk4V4S5cnCrr5SwzvlrPRmqqCuJDB8UTs= +google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= google.golang.org/grpc v0.0.0-20180607172857-7a6a684ca69e/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= +google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.23.1/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.24.0/go.mod h1:XDChyiUovWa60DnaeDeZmSW86xtLtjtZbwvSiRnRtcA= google.golang.org/grpc v1.25.1 h1:wdKvqQk7IttEw92GoRyKG2IDrUIpgpj6H6m81yfeMW0= google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= +google.golang.org/grpc v1.26.0 h1:2dTRdpdFEEhJYQD8EMLB61nnrzSCTbG38PhqdhvOltg= +google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= gopkg.in/alecthomas/gometalinter.v2 v2.0.12/go.mod h1:NDRytsqEZyolNuAgTzJkZMkSQM7FIKyzVzGhjB/qfYo= gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= @@ -765,6 +907,7 @@ gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.7 h1:VUgggvou5XRW9mHwD/yXxIYSMtY0zoKQf/v226p2nyo= gopkg.in/yaml.v2 v2.2.7/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= diff --git a/pkg/backup/client.go b/pkg/backup/client.go index 24bf5a5a9..0a22fa40e 100644 --- a/pkg/backup/client.go +++ b/pkg/backup/client.go @@ -169,7 +169,9 @@ func (bc *Client) SaveBackupMeta(ctx context.Context, ddlJobs []*model.Job) erro return bc.storage.Write(ctx, utils.MetaFile, backupMetaData) } -func buildTableRanges(tbl *model.TableInfo) ([]kv.KeyRange, error) { +// BuildTableRanges returns the key ranges encompassing the entire table, +// and its partitions if exists. +func BuildTableRanges(tbl *model.TableInfo) ([]kv.KeyRange, error) { pis := tbl.GetPartitionInfo() if pis == nil { // Short path, no partition. @@ -202,7 +204,6 @@ func appendRanges(tbl *model.TableInfo, tblID int64) ([]kv.KeyRange, error) { kvRanges = append(kvRanges, idxRanges...) } return kvRanges, nil - } // BuildBackupRangeAndSchema gets the range and schema of tables. @@ -285,7 +286,7 @@ func BuildBackupRangeAndSchema( } backupSchemas.pushPending(schema, dbInfo.Name.L, tableInfo.Name.L) - tableRanges, err := buildTableRanges(tableInfo) + tableRanges, err := BuildTableRanges(tableInfo) if err != nil { return nil, nil, err } @@ -897,7 +898,7 @@ func (bc *Client) CollectChecksums() ([]Checksum, error) { // CompleteMeta wait response of admin checksum from TiDB to complete backup meta. func (bc *Client) CompleteMeta(backupSchemas *Schemas) error { - schemas, err := backupSchemas.finishTableChecksum() + schemas, err := backupSchemas.FinishTableChecksum() if err != nil { return err } diff --git a/pkg/backup/client_test.go b/pkg/backup/client_test.go index 63f3d5d5f..e649b6dcc 100644 --- a/pkg/backup/client_test.go +++ b/pkg/backup/client_test.go @@ -1,6 +1,6 @@ // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. -package backup +package backup_test import ( "context" @@ -10,12 +10,14 @@ import ( . "github.com/pingcap/check" "github.com/pingcap/parser/model" + pd "github.com/pingcap/pd/v4/client" "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/store/mockstore/mocktikv" "github.com/pingcap/tidb/store/tikv/oracle" "github.com/pingcap/tidb/tablecodec" "github.com/pingcap/tidb/util/codec" + "github.com/pingcap/br/pkg/backup" "github.com/pingcap/br/pkg/conn" ) @@ -23,7 +25,8 @@ type testBackup struct { ctx context.Context cancel context.CancelFunc - backupClient *Client + mockPDClient pd.Client + backupClient *backup.Client } var _ = Suite(&testBackup{}) @@ -33,15 +36,14 @@ func TestT(t *testing.T) { } func (r *testBackup) SetUpSuite(c *C) { - mockPDClient := mocktikv.NewPDClient(mocktikv.NewCluster()) + r.mockPDClient = mocktikv.NewPDClient(mocktikv.NewCluster()) r.ctx, r.cancel = context.WithCancel(context.Background()) mockMgr := &conn.Mgr{} - mockMgr.SetPDClient(mockPDClient) + mockMgr.SetPDClient(r.mockPDClient) mockMgr.SetPDHTTP([]string{"test"}, nil) - r.backupClient = &Client{ - clusterID: mockPDClient.GetClusterID(r.ctx), - mgr: mockMgr, - } + var err error + r.backupClient, err = backup.NewBackupClient(r.ctx, mockMgr) + c.Assert(err, IsNil) } func (r *testBackup) TestGetTS(c *C) { @@ -81,10 +83,10 @@ func (r *testBackup) TestGetTS(c *C) { c.Assert(err, ErrorMatches, "backup ts overflow.*") // timeago = "10h" exceed GCSafePoint - p, l, err := r.backupClient.mgr.GetPDClient().GetTS(r.ctx) + p, l, err := r.mockPDClient.GetTS(r.ctx) c.Assert(err, IsNil) now := oracle.ComposeTS(p, l) - _, err = r.backupClient.mgr.GetPDClient().UpdateGCSafePoint(r.ctx, now) + _, err = r.mockPDClient.UpdateGCSafePoint(r.ctx, now) c.Assert(err, IsNil) _, err = r.backupClient.GetTS(r.ctx, 10*time.Hour, 0) c.Assert(err, ErrorMatches, "GC safepoint [0-9]+ exceed TS [0-9]+") @@ -94,7 +96,6 @@ func (r *testBackup) TestGetTS(c *C) { ts, err = r.backupClient.GetTS(r.ctx, time.Minute, backupts) c.Assert(err, IsNil) c.Assert(ts, Equals, backupts) - } func (r *testBackup) TestBuildTableRange(c *C) { @@ -125,16 +126,15 @@ func (r *testBackup) TestBuildTableRange(c *C) { tbl.Partition.Definitions = append(tbl.Partition.Definitions, model.PartitionDefinition{ID: id}) } - ranges, err := buildTableRanges(tbl) + ranges, err := backup.BuildTableRanges(tbl) c.Assert(err, IsNil) c.Assert(ranges, DeepEquals, cs.trs) } tbl := &model.TableInfo{ID: 7} - ranges, err := buildTableRanges(tbl) + ranges, err := backup.BuildTableRanges(tbl) c.Assert(err, IsNil) c.Assert(ranges, DeepEquals, []kv.KeyRange{ {StartKey: tablecodec.EncodeRowKey(7, low), EndKey: tablecodec.EncodeRowKey(7, high)}, }) - } diff --git a/pkg/backup/metrics.go b/pkg/backup/metrics.go index 67d5fe1e5..426fe2f56 100644 --- a/pkg/backup/metrics.go +++ b/pkg/backup/metrics.go @@ -25,7 +25,7 @@ var ( }) ) -func init() { +func init() { // nolint:gochecknoinits prometheus.MustRegister(backupRegionCounters) prometheus.MustRegister(backupRegionHistogram) } diff --git a/pkg/backup/safe_point_test.go b/pkg/backup/safe_point_test.go index 6ded3d0ab..f48c8ff56 100644 --- a/pkg/backup/safe_point_test.go +++ b/pkg/backup/safe_point_test.go @@ -1,6 +1,6 @@ // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. -package backup +package backup_test import ( "context" @@ -10,6 +10,7 @@ import ( pd "github.com/pingcap/pd/v4/client" "github.com/pingcap/tidb/util/testleak" + "github.com/pingcap/br/pkg/backup" "github.com/pingcap/br/pkg/mock" ) @@ -36,19 +37,19 @@ func (s *testSafePointSuite) TestCheckGCSafepoint(c *C) { ctx := context.Background() pdClient := &mockSafePoint{Client: s.mock.PDClient, safepoint: 2333} { - err := CheckGCSafePoint(ctx, pdClient, 2333+1) + err := backup.CheckGCSafePoint(ctx, pdClient, 2333+1) c.Assert(err, IsNil) } { - err := CheckGCSafePoint(ctx, pdClient, 2333) + err := backup.CheckGCSafePoint(ctx, pdClient, 2333) c.Assert(err, NotNil) } { - err := CheckGCSafePoint(ctx, pdClient, 2333-1) + err := backup.CheckGCSafePoint(ctx, pdClient, 2333-1) c.Assert(err, NotNil) } { - err := CheckGCSafePoint(ctx, pdClient, 0) + err := backup.CheckGCSafePoint(ctx, pdClient, 0) c.Assert(err, ErrorMatches, "GC safepoint 2333 exceed TS 0") } } diff --git a/pkg/backup/schema.go b/pkg/backup/schema.go index 2b9def22c..edcd2c293 100644 --- a/pkg/backup/schema.go +++ b/pkg/backup/schema.go @@ -29,7 +29,7 @@ const ( DefaultSchemaConcurrency = 64 ) -// Schemas is task for backuping schemas +// Schemas is task for backuping schemas. type Schemas struct { // name -> schema schemas map[string]backup.Schema @@ -110,7 +110,8 @@ func (pending *Schemas) Start( }() } -func (pending *Schemas) finishTableChecksum() ([]*backup.Schema, error) { +// FinishTableChecksum waits until all schemas' checksums are verified. +func (pending *Schemas) FinishTableChecksum() ([]*backup.Schema, error) { schemas := make([]*backup.Schema, 0, len(pending.schemas)) for { select { diff --git a/pkg/backup/schema_test.go b/pkg/backup/schema_test.go index 98173dd55..06584a6b5 100644 --- a/pkg/backup/schema_test.go +++ b/pkg/backup/schema_test.go @@ -1,6 +1,6 @@ // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. -package backup +package backup_test import ( "context" @@ -12,6 +12,7 @@ import ( "github.com/pingcap/tidb/util/testkit" "github.com/pingcap/tidb/util/testleak" + "github.com/pingcap/br/pkg/backup" "github.com/pingcap/br/pkg/mock" ) @@ -60,7 +61,7 @@ func (s *testBackupSchemaSuite) TestBuildBackupRangeAndSchema(c *C) { DoTables: []*filter.Table{{Schema: "test", Name: "t1"}}, }) c.Assert(err, IsNil) - _, backupSchemas, err := BuildBackupRangeAndSchema( + _, backupSchemas, err := backup.BuildBackupRangeAndSchema( s.mock.Domain, s.mock.Storage, testFilter, math.MaxUint64) c.Assert(err, IsNil) c.Assert(backupSchemas, IsNil) @@ -70,7 +71,7 @@ func (s *testBackupSchemaSuite) TestBuildBackupRangeAndSchema(c *C) { DoTables: []*filter.Table{{Schema: "foo", Name: "t1"}}, }) c.Assert(err, IsNil) - _, backupSchemas, err = BuildBackupRangeAndSchema( + _, backupSchemas, err = backup.BuildBackupRangeAndSchema( s.mock.Domain, s.mock.Storage, fooFilter, math.MaxUint64) c.Assert(err, IsNil) c.Assert(backupSchemas, IsNil) @@ -78,7 +79,7 @@ func (s *testBackupSchemaSuite) TestBuildBackupRangeAndSchema(c *C) { // Empty database. noFilter, err := filter.New(false, &filter.Rules{}) c.Assert(err, IsNil) - _, backupSchemas, err = BuildBackupRangeAndSchema( + _, backupSchemas, err = backup.BuildBackupRangeAndSchema( s.mock.Domain, s.mock.Storage, noFilter, math.MaxUint64) c.Assert(err, IsNil) c.Assert(backupSchemas, IsNil) @@ -88,13 +89,13 @@ func (s *testBackupSchemaSuite) TestBuildBackupRangeAndSchema(c *C) { tk.MustExec("create table t1 (a int);") tk.MustExec("insert into t1 values (10);") - _, backupSchemas, err = BuildBackupRangeAndSchema( + _, backupSchemas, err = backup.BuildBackupRangeAndSchema( s.mock.Domain, s.mock.Storage, testFilter, math.MaxUint64) c.Assert(err, IsNil) c.Assert(backupSchemas.Len(), Equals, 1) updateCh := new(simpleProgress) backupSchemas.Start(context.Background(), s.mock.Storage, math.MaxUint64, 1, updateCh) - schemas, err := backupSchemas.finishTableChecksum() + schemas, err := backupSchemas.FinishTableChecksum() c.Assert(updateCh.get(), Equals, int64(1)) c.Assert(err, IsNil) c.Assert(len(schemas), Equals, 1) @@ -108,13 +109,13 @@ func (s *testBackupSchemaSuite) TestBuildBackupRangeAndSchema(c *C) { tk.MustExec("insert into t2 values (10);") tk.MustExec("insert into t2 values (11);") - _, backupSchemas, err = BuildBackupRangeAndSchema( + _, backupSchemas, err = backup.BuildBackupRangeAndSchema( s.mock.Domain, s.mock.Storage, noFilter, math.MaxUint64) c.Assert(err, IsNil) c.Assert(backupSchemas.Len(), Equals, 2) updateCh.reset() backupSchemas.Start(context.Background(), s.mock.Storage, math.MaxUint64, 2, updateCh) - schemas, err = backupSchemas.finishTableChecksum() + schemas, err = backupSchemas.FinishTableChecksum() c.Assert(updateCh.get(), Equals, int64(2)) c.Assert(err, IsNil) c.Assert(len(schemas), Equals, 2) diff --git a/pkg/checksum/executor.go b/pkg/checksum/executor.go index a56aa95d6..4860b02a0 100644 --- a/pkg/checksum/executor.go +++ b/pkg/checksum/executor.go @@ -6,6 +6,7 @@ import ( "context" "log" + "github.com/gogo/protobuf/proto" "github.com/pingcap/errors" "github.com/pingcap/parser/model" "github.com/pingcap/tidb/distsql" @@ -240,7 +241,7 @@ func updateChecksumResponse(resp, update *tipb.ChecksumResponse) { resp.TotalBytes += update.TotalBytes } -// Executor is a checksum executor +// Executor is a checksum executor. type Executor struct { reqs []*kv.Request } @@ -250,6 +251,20 @@ func (exec *Executor) Len() int { return len(exec.reqs) } +// RawRequests extracts the raw requests associated with this executor. +// This is mainly used for debugging only. +func (exec *Executor) RawRequests() ([]*tipb.ChecksumRequest, error) { + res := make([]*tipb.ChecksumRequest, 0, len(exec.reqs)) + for _, req := range exec.reqs { + rawReq := new(tipb.ChecksumRequest) + if err := proto.Unmarshal(req.Data, rawReq); err != nil { + return nil, err + } + res = append(res, rawReq) + } + return res, nil +} + // Execute executes a checksum executor. func (exec *Executor) Execute( ctx context.Context, diff --git a/pkg/checksum/executor_test.go b/pkg/checksum/executor_test.go index 43c90761d..ec78f8051 100644 --- a/pkg/checksum/executor_test.go +++ b/pkg/checksum/executor_test.go @@ -1,19 +1,18 @@ // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. -package checksum +package checksum_test import ( "context" "math" "testing" - "github.com/gogo/protobuf/proto" . "github.com/pingcap/check" "github.com/pingcap/parser/model" "github.com/pingcap/tidb/util/testkit" "github.com/pingcap/tidb/util/testleak" - "github.com/pingcap/tipb/go-tipb" + "github.com/pingcap/br/pkg/checksum" "github.com/pingcap/br/pkg/mock" "github.com/pingcap/br/pkg/utils" ) @@ -59,9 +58,9 @@ func (s *testChecksumSuite) TestChecksum(c *C) { tk.MustExec("create table t1 (a int);") tk.MustExec("insert into t1 values (10);") tableInfo1 := s.getTableInfo(c, "test", "t1") - exe1, err := NewExecutorBuilder(tableInfo1, math.MaxUint64).Build() + exe1, err := checksum.NewExecutorBuilder(tableInfo1, math.MaxUint64).Build() c.Assert(err, IsNil) - c.Assert(len(exe1.reqs), Equals, 1) + c.Assert(exe1.Len(), Equals, 1) resp, err := exe1.Execute(context.TODO(), s.mock.Storage.GetClient(), func() {}) c.Assert(err, IsNil) // Cluster returns a dummy checksum (all fields are 1). @@ -74,9 +73,9 @@ func (s *testChecksumSuite) TestChecksum(c *C) { tk.MustExec("alter table t2 add index i2(a);") tk.MustExec("insert into t2 values (10);") tableInfo2 := s.getTableInfo(c, "test", "t2") - exe2, err := NewExecutorBuilder(tableInfo2, math.MaxUint64).Build() + exe2, err := checksum.NewExecutorBuilder(tableInfo2, math.MaxUint64).Build() c.Assert(err, IsNil) - c.Assert(len(exe2.reqs), Equals, 2, Commentf("%v", tableInfo2)) + c.Assert(exe2.Len(), Equals, 2, Commentf("%v", tableInfo2)) resp2, err := exe2.Execute(context.TODO(), s.mock.Storage.GetClient(), func() {}) c.Assert(err, IsNil) c.Assert(resp2.Checksum, Equals, uint64(0), Commentf("%v", resp2)) @@ -87,18 +86,16 @@ func (s *testChecksumSuite) TestChecksum(c *C) { tk.MustExec("alter table t1 add index i2(a);") tableInfo1 = s.getTableInfo(c, "test", "t1") oldTable := utils.Table{Info: tableInfo1} - exe2, err = NewExecutorBuilder(tableInfo2, math.MaxUint64). + exe2, err = checksum.NewExecutorBuilder(tableInfo2, math.MaxUint64). SetOldTable(&oldTable).Build() c.Assert(err, IsNil) - c.Assert(len(exe2.reqs), Equals, 2) - req := tipb.ChecksumRequest{} - err = proto.Unmarshal(exe2.reqs[0].Data, &req) + c.Assert(exe2.Len(), Equals, 2) + rawReqs, err := exe2.RawRequests() c.Assert(err, IsNil) - c.Assert(req.Rule, NotNil) - req = tipb.ChecksumRequest{} - err = proto.Unmarshal(exe2.reqs[1].Data, &req) - c.Assert(err, IsNil) - c.Assert(req.Rule, NotNil) + c.Assert(rawReqs, HasLen, 2) + for _, rawReq := range rawReqs { + c.Assert(rawReq.Rule, NotNil) + } resp2, err = exe2.Execute(context.TODO(), s.mock.Storage.GetClient(), func() {}) c.Assert(err, IsNil) c.Assert(resp2, NotNil) diff --git a/pkg/mock/mock_cluster_test.go b/pkg/mock/mock_cluster_test.go index 1db0f5a8c..2ca00923c 100644 --- a/pkg/mock/mock_cluster_test.go +++ b/pkg/mock/mock_cluster_test.go @@ -1,21 +1,29 @@ // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. -package mock +package mock_test import ( + "testing" + . "github.com/pingcap/check" "github.com/pingcap/tidb/util/testleak" + + "github.com/pingcap/br/pkg/mock" ) +func Test(t *testing.T) { + TestingT(t) +} + var _ = Suite(&testClusterSuite{}) type testClusterSuite struct { - mock *Cluster + mock *mock.Cluster } func (s *testClusterSuite) SetUpSuite(c *C) { var err error - s.mock, err = NewCluster() + s.mock, err = mock.NewCluster() c.Assert(err, IsNil) } diff --git a/pkg/restore/backoff.go b/pkg/restore/backoff.go index a84014c11..77b57b3e7 100644 --- a/pkg/restore/backoff.go +++ b/pkg/restore/backoff.go @@ -13,13 +13,24 @@ import ( ) var ( - errEpochNotMatch = errors.NewNoStackError("epoch not match") - errKeyNotInRegion = errors.NewNoStackError("key not in region") - errRewriteRuleNotFound = errors.NewNoStackError("rewrite rule not found") - errRangeIsEmpty = errors.NewNoStackError("range is empty") - errGrpc = errors.NewNoStackError("gRPC error") - errDownloadFailed = errors.NewNoStackError("download sst failed") - errIngestFailed = errors.NewNoStackError("ingest sst failed") + // ErrEpochNotMatch is the error raised when ingestion failed with "epoch + // not match". This error is retryable. + ErrEpochNotMatch = errors.NewNoStackError("epoch not match") + // ErrKeyNotInRegion is the error raised when ingestion failed with "key not + // in region". This error cannot be retried. + ErrKeyNotInRegion = errors.NewNoStackError("key not in region") + // ErrRewriteRuleNotFound is the error raised when download failed with + // "rewrite rule not found". This error cannot be retried + ErrRewriteRuleNotFound = errors.NewNoStackError("rewrite rule not found") + // ErrRangeIsEmpty is the error raised when download failed with "range is + // empty". This error cannot be retried. + ErrRangeIsEmpty = errors.NewNoStackError("range is empty") + // ErrGRPC indicates any gRPC communication error. This error can be retried. + ErrGRPC = errors.NewNoStackError("gRPC error") + // ErrDownloadFailed indicates a generic, non-retryable download error. + ErrDownloadFailed = errors.NewNoStackError("download sst failed") + // ErrIngestFailed indicates a generic, retryable ingest error. + ErrIngestFailed = errors.NewNoStackError("ingest sst failed") ) const ( @@ -42,28 +53,29 @@ type importerBackoffer struct { maxDelayTime time.Duration } -func newImportSSTBackoffer() utils.Backoffer { +// NewBackoffer creates a new controller regulating a truncated exponential backoff. +func NewBackoffer(attempt int, delayTime, maxDelayTime time.Duration) utils.Backoffer { return &importerBackoffer{ - attempt: importSSTRetryTimes, - delayTime: importSSTWaitInterval, - maxDelayTime: importSSTMaxWaitInterval, + attempt: attempt, + delayTime: delayTime, + maxDelayTime: maxDelayTime, } } +func newImportSSTBackoffer() utils.Backoffer { + return NewBackoffer(importSSTRetryTimes, importSSTWaitInterval, importSSTMaxWaitInterval) +} + func newDownloadSSTBackoffer() utils.Backoffer { - return &importerBackoffer{ - attempt: downloadSSTRetryTimes, - delayTime: downloadSSTWaitInterval, - maxDelayTime: downloadSSTMaxWaitInterval, - } + return NewBackoffer(downloadSSTRetryTimes, downloadSSTWaitInterval, downloadSSTMaxWaitInterval) } func (bo *importerBackoffer) NextBackoff(err error) time.Duration { switch errors.Cause(err) { - case errGrpc, errEpochNotMatch, errIngestFailed: + case ErrGRPC, ErrEpochNotMatch, ErrIngestFailed: bo.delayTime = 2 * bo.delayTime bo.attempt-- - case errRangeIsEmpty, errRewriteRuleNotFound: + case ErrRangeIsEmpty, ErrRewriteRuleNotFound: // Excepted error, finish the operation bo.delayTime = 0 bo.attempt = 0 diff --git a/pkg/restore/backoff_test.go b/pkg/restore/backoff_test.go index a07c0839b..5ee63e885 100644 --- a/pkg/restore/backoff_test.go +++ b/pkg/restore/backoff_test.go @@ -1,6 +1,6 @@ // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. -package restore +package restore_test import ( "context" @@ -10,6 +10,7 @@ import ( "github.com/pingcap/tidb/util/testleak" "github.com/pingcap/br/pkg/mock" + "github.com/pingcap/br/pkg/restore" "github.com/pingcap/br/pkg/utils" ) @@ -29,33 +30,32 @@ func (s *testBackofferSuite) TearDownSuite(c *C) { testleak.AfterTest(c)() } -func (s *testBackofferSuite) TestImporterBackoffer(c *C) { +func (s *testBackofferSuite) TestBackoffWithFatalError(c *C) { var counter int + backoffer := restore.NewBackoffer(10, time.Nanosecond, time.Nanosecond) err := utils.WithRetry(context.Background(), func() error { defer func() { counter++ }() switch counter { case 0: - return errGrpc + return restore.ErrGRPC case 1: - return errEpochNotMatch + return restore.ErrEpochNotMatch case 2: - return errRangeIsEmpty + return restore.ErrRangeIsEmpty } return nil - }, newImportSSTBackoffer()) + }, backoffer) c.Assert(counter, Equals, 3) - c.Assert(err, Equals, errRangeIsEmpty) - - counter = 0 - backoffer := importerBackoffer{ - attempt: 10, - delayTime: time.Nanosecond, - maxDelayTime: time.Nanosecond, - } - err = utils.WithRetry(context.Background(), func() error { + c.Assert(err, Equals, restore.ErrRangeIsEmpty) +} + +func (s *testBackofferSuite) TestBackoffWithRetryableError(c *C) { + var counter int + backoffer := restore.NewBackoffer(10, time.Nanosecond, time.Nanosecond) + err := utils.WithRetry(context.Background(), func() error { defer func() { counter++ }() - return errEpochNotMatch - }, &backoffer) + return restore.ErrEpochNotMatch + }, backoffer) c.Assert(counter, Equals, 10) - c.Assert(err, Equals, errEpochNotMatch) + c.Assert(err, Equals, restore.ErrEpochNotMatch) } diff --git a/pkg/restore/client.go b/pkg/restore/client.go index aa6bc3394..8518299b8 100644 --- a/pkg/restore/client.go +++ b/pkg/restore/client.go @@ -47,7 +47,7 @@ import ( // checksum tasks. const defaultChecksumConcurrency = 64 -// Client sends requests to restore files +// Client sends requests to restore files. type Client struct { ctx context.Context cancel context.CancelFunc @@ -626,7 +626,6 @@ func (rc *Client) RestoreRaw(startKey []byte, endKey []byte, files []*backup.Fil err := rc.fileImporter.SetRawRange(startKey, endKey) if err != nil { - return errors.Trace(err) } diff --git a/pkg/restore/client_test.go b/pkg/restore/client_test.go index 13b5caa0a..96425ffb4 100644 --- a/pkg/restore/client_test.go +++ b/pkg/restore/client_test.go @@ -1,6 +1,6 @@ // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. -package restore +package restore_test import ( "context" @@ -16,6 +16,7 @@ import ( "github.com/pingcap/br/pkg/gluetidb" "github.com/pingcap/br/pkg/mock" + "github.com/pingcap/br/pkg/restore" "github.com/pingcap/br/pkg/utils" ) @@ -39,11 +40,8 @@ func (s *testRestoreClientSuite) TestCreateTables(c *C) { c.Assert(s.mock.Start(), IsNil) defer s.mock.Stop() - client := Client{} - db, err := NewDB(gluetidb.Glue{}, s.mock.Storage) + client, err := restore.NewRestoreClient(context.Background(), gluetidb.Glue{}, s.mock.PDClient, s.mock.Storage, nil) c.Assert(err, IsNil) - client.db = db - client.ctx = context.Background() info, err := s.mock.Domain.GetSnapshotInfoSchema(math.MaxInt64) c.Assert(err, IsNil) @@ -100,11 +98,8 @@ func (s *testRestoreClientSuite) TestIsOnline(c *C) { c.Assert(s.mock.Start(), IsNil) defer s.mock.Stop() - client := Client{} - db, err := NewDB(gluetidb.Glue{}, s.mock.Storage) + client, err := restore.NewRestoreClient(context.Background(), gluetidb.Glue{}, s.mock.PDClient, s.mock.Storage, nil) c.Assert(err, IsNil) - client.db = db - client.ctx = context.Background() c.Assert(client.IsOnline(), IsFalse) client.EnableOnline() diff --git a/pkg/restore/db.go b/pkg/restore/db.go index c8c748e99..7b1cb436d 100644 --- a/pkg/restore/db.go +++ b/pkg/restore/db.go @@ -167,16 +167,14 @@ func (db *DB) AlterTiflashReplica(ctx context.Context, table *utils.Table, count zap.Stringer("db", table.Db.Name), zap.Stringer("table", table.Info.Name), zap.Error(err)) - return err } else if table.TiFlashReplicas > 0 { log.Warn("alter tiflash replica done", zap.Stringer("db", table.Db.Name), zap.Stringer("table", table.Info.Name), zap.Int("originalReplicaCount", table.TiFlashReplicas), zap.Int("replicaCount", count)) - } - return nil + return err } // Close closes the connection. diff --git a/pkg/restore/db_test.go b/pkg/restore/db_test.go index 3f77a53dd..8e1807be7 100644 --- a/pkg/restore/db_test.go +++ b/pkg/restore/db_test.go @@ -1,6 +1,6 @@ // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. -package restore +package restore_test import ( "context" @@ -17,6 +17,7 @@ import ( "github.com/pingcap/br/pkg/backup" "github.com/pingcap/br/pkg/gluetidb" "github.com/pingcap/br/pkg/mock" + "github.com/pingcap/br/pkg/restore" "github.com/pingcap/br/pkg/utils" ) @@ -74,7 +75,7 @@ func (s *testRestoreSchemaSuite) TestRestoreAutoIncID(c *C) { c.Assert(autoIncID, Equals, uint64(globalAutoID)) // Alter AutoIncID to the next AutoIncID + 100 table.Info.AutoIncID = globalAutoID + 100 - db, err := NewDB(gluetidb.Glue{}, s.mock.Storage) + db, err := restore.NewDB(gluetidb.Glue{}, s.mock.Storage) c.Assert(err, IsNil, Commentf("Error create DB")) tk.MustExec("drop database if exists test;") // Test empty collate value @@ -126,7 +127,7 @@ func (s *testRestoreSchemaSuite) TestFilterDDLJobs(c *C) { Db: dbInfo, Info: tableInfo.Meta(), }} - ddlJobs := FilterDDLJobs(allDDLJobs, tables) + ddlJobs := restore.FilterDDLJobs(allDDLJobs, tables) for _, job := range ddlJobs { c.Logf("get ddl job: %s", job.Query) } diff --git a/pkg/restore/import.go b/pkg/restore/import.go index 4c5f38402..2f65b4ccf 100644 --- a/pkg/restore/import.go +++ b/pkg/restore/import.go @@ -27,7 +27,7 @@ import ( const importScanRegionTime = 10 * time.Second const scanRegionPaginationLimit = int(128) -// ImporterClient is used to import a file to TiKV +// ImporterClient is used to import a file to TiKV. type ImporterClient interface { DownloadSST( ctx context.Context, @@ -204,7 +204,7 @@ func (importer *FileImporter) Import( ctx, cancel := context.WithTimeout(importer.ctx, importScanRegionTime) defer cancel() // Scan regions covered by the file range - regionInfos, errScanRegion := paginateScanRegion( + regionInfos, errScanRegion := PaginateScanRegion( ctx, importer.metaClient, startKey, endKey, scanRegionPaginationLimit) if errScanRegion != nil { return errors.Trace(errScanRegion) @@ -242,7 +242,7 @@ func (importer *FileImporter) Import( return e }, newDownloadSSTBackoffer()) if errDownload != nil { - if errDownload == errRewriteRuleNotFound || errDownload == errRangeIsEmpty { + if errDownload == ErrRewriteRuleNotFound || errDownload == ErrRangeIsEmpty { // Skip this region continue } @@ -285,7 +285,7 @@ func (importer *FileImporter) Import( zap.Stringer("newLeader", newInfo.Leader)) if !checkRegionEpoch(newInfo, info) { - errIngest = errors.AddStack(errEpochNotMatch) + errIngest = errors.AddStack(ErrEpochNotMatch) break ingestRetry } ingestResp, errIngest = importer.ingestSST(downloadMeta, newInfo) @@ -293,14 +293,14 @@ func (importer *FileImporter) Import( // TODO handle epoch not match error // 1. retry download if needed // 2. retry ingest - errIngest = errors.AddStack(errEpochNotMatch) + errIngest = errors.AddStack(ErrEpochNotMatch) break ingestRetry case errPb.KeyNotInRegion != nil: - errIngest = errors.AddStack(errKeyNotInRegion) + errIngest = errors.AddStack(ErrKeyNotInRegion) break ingestRetry default: // Other errors like `ServerIsBusy`, `RegionNotFound`, etc. should be retryable - errIngest = errors.Annotatef(errIngestFailed, "ingest error %s", errPb) + errIngest = errors.Annotatef(ErrIngestFailed, "ingest error %s", errPb) break ingestRetry } } @@ -345,13 +345,13 @@ func (importer *FileImporter) downloadSST( } regionRule := matchNewPrefix(key, rewriteRules) if regionRule == nil { - return nil, errors.Trace(errRewriteRuleNotFound) + return nil, errors.Trace(ErrRewriteRuleNotFound) } rule := import_sstpb.RewriteRule{ OldKeyPrefix: encodeKeyPrefix(regionRule.GetOldKeyPrefix()), NewKeyPrefix: encodeKeyPrefix(regionRule.GetNewKeyPrefix()), } - sstMeta := getSSTMetaFromFile(id, file, regionInfo.Region, &rule) + sstMeta := GetSSTMetaFromFile(id, file, regionInfo.Region, &rule) req := &import_sstpb.DownloadRequest{ Sst: sstMeta, @@ -367,13 +367,13 @@ func (importer *FileImporter) downloadSST( for _, peer := range regionInfo.Region.GetPeers() { resp, err = importer.importClient.DownloadSST(importer.ctx, peer.GetStoreId(), req) if err != nil { - return nil, errors.Annotatef(errGrpc, "%s", err) + return nil, errors.Annotatef(ErrGRPC, "%s", err) } if resp.GetError() != nil { - return nil, errors.Annotate(errDownloadFailed, resp.GetError().GetMessage()) + return nil, errors.Annotate(ErrDownloadFailed, resp.GetError().GetMessage()) } if resp.GetIsEmpty() { - return nil, errors.Trace(errRangeIsEmpty) + return nil, errors.Trace(ErrRangeIsEmpty) } } sstMeta.Range.Start = truncateTS(resp.Range.GetStart()) @@ -391,7 +391,7 @@ func (importer *FileImporter) downloadRawKVSST( } // Empty rule var rule import_sstpb.RewriteRule - sstMeta := getSSTMetaFromFile(id, file, regionInfo.Region, &rule) + sstMeta := GetSSTMetaFromFile(id, file, regionInfo.Region, &rule) // Cut the SST file's range to fit in the restoring range. if bytes.Compare(importer.rawStartKey, sstMeta.Range.GetStart()) > 0 { @@ -402,7 +402,7 @@ func (importer *FileImporter) downloadRawKVSST( sstMeta.Range.End = importer.rawEndKey } if bytes.Compare(sstMeta.Range.GetStart(), sstMeta.Range.GetEnd()) > 0 { - return nil, errors.Trace(errRangeIsEmpty) + return nil, errors.Trace(ErrRangeIsEmpty) } req := &import_sstpb.DownloadRequest{ @@ -419,13 +419,13 @@ func (importer *FileImporter) downloadRawKVSST( for _, peer := range regionInfo.Region.GetPeers() { resp, err = importer.importClient.DownloadSST(importer.ctx, peer.GetStoreId(), req) if err != nil { - return nil, errors.Annotatef(errGrpc, "%s", err) + return nil, errors.Annotatef(ErrGRPC, "%s", err) } if resp.GetError() != nil { - return nil, errors.Annotate(errDownloadFailed, resp.GetError().GetMessage()) + return nil, errors.Annotate(ErrDownloadFailed, resp.GetError().GetMessage()) } if resp.GetIsEmpty() { - return nil, errors.Trace(errRangeIsEmpty) + return nil, errors.Trace(ErrRangeIsEmpty) } } sstMeta.Range.Start = resp.Range.GetStart() diff --git a/pkg/restore/range.go b/pkg/restore/range.go index 72f530525..0e016dbde 100644 --- a/pkg/restore/range.go +++ b/pkg/restore/range.go @@ -3,6 +3,8 @@ package restore import ( + "bytes" + "github.com/pingcap/errors" "github.com/pingcap/kvproto/pkg/import_sstpb" "github.com/pingcap/kvproto/pkg/metapb" @@ -13,8 +15,8 @@ import ( "github.com/pingcap/br/pkg/rtree" ) -// sortRanges checks if the range overlapped and sort them. -func sortRanges(ranges []rtree.Range, rewriteRules *RewriteRules) ([]rtree.Range, error) { +// SortRanges checks if the range overlapped and sort them. +func SortRanges(ranges []rtree.Range, rewriteRules *RewriteRules) ([]rtree.Range, error) { rangeTree := rtree.NewRangeTree() for _, rg := range ranges { if rewriteRules != nil { @@ -63,6 +65,14 @@ type RegionInfo struct { Leader *metapb.Peer } +// ContainsInterior returns whether the region contains the given key, and also +// that the key does not fall on the boundary (start key) of the region. +func (region *RegionInfo) ContainsInterior(key []byte) bool { + return bytes.Compare(key, region.Region.GetStartKey()) > 0 && + (len(region.Region.GetEndKey()) == 0 || + bytes.Compare(key, region.Region.GetEndKey()) < 0) +} + // RewriteRules contains rules for rewriting keys of tables. type RewriteRules struct { Table []*import_sstpb.RewriteRule diff --git a/pkg/restore/range_test.go b/pkg/restore/range_test.go index 37561f6b4..259bc5bf2 100644 --- a/pkg/restore/range_test.go +++ b/pkg/restore/range_test.go @@ -1,6 +1,6 @@ // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. -package restore +package restore_test import ( "bytes" @@ -9,6 +9,7 @@ import ( "github.com/pingcap/kvproto/pkg/import_sstpb" "github.com/pingcap/tidb/tablecodec" + "github.com/pingcap/br/pkg/restore" "github.com/pingcap/br/pkg/rtree" ) @@ -44,7 +45,7 @@ func (s *testRangeSuite) TestSortRange(c *C) { {OldKeyPrefix: tablecodec.GenTableRecordPrefix(1), NewKeyPrefix: tablecodec.GenTableRecordPrefix(4)}, {OldKeyPrefix: tablecodec.GenTableRecordPrefix(2), NewKeyPrefix: tablecodec.GenTableRecordPrefix(5)}, } - rewriteRules := &RewriteRules{ + rewriteRules := &restore.RewriteRules{ Table: make([]*import_sstpb.RewriteRule, 0), Data: dataRules, } @@ -52,7 +53,7 @@ func (s *testRangeSuite) TestSortRange(c *C) { {StartKey: append(tablecodec.GenTableRecordPrefix(1), []byte("aaa")...), EndKey: append(tablecodec.GenTableRecordPrefix(1), []byte("bbb")...), Files: nil}, } - rs1, err := sortRanges(ranges1, rewriteRules) + rs1, err := restore.SortRanges(ranges1, rewriteRules) c.Assert(err, IsNil, Commentf("sort range1 failed: %v", err)) c.Assert(rs1, RangeEquals, []rtree.Range{ {StartKey: append(tablecodec.GenTableRecordPrefix(4), []byte("aaa")...), @@ -63,12 +64,12 @@ func (s *testRangeSuite) TestSortRange(c *C) { {StartKey: append(tablecodec.GenTableRecordPrefix(1), []byte("aaa")...), EndKey: append(tablecodec.GenTableRecordPrefix(2), []byte("bbb")...), Files: nil}, } - _, err = sortRanges(ranges2, rewriteRules) + _, err = restore.SortRanges(ranges2, rewriteRules) c.Assert(err, ErrorMatches, ".*table id does not match.*") ranges3 := initRanges() rewriteRules1 := initRewriteRules() - rs3, err := sortRanges(ranges3, rewriteRules1) + rs3, err := restore.SortRanges(ranges3, rewriteRules1) c.Assert(err, IsNil, Commentf("sort range1 failed: %v", err)) c.Assert(rs3, RangeEquals, []rtree.Range{ {StartKey: []byte("bbd"), EndKey: []byte("bbf"), Files: nil}, diff --git a/pkg/restore/split.go b/pkg/restore/split.go index ef685dc91..21fc5b58a 100644 --- a/pkg/restore/split.go +++ b/pkg/restore/split.go @@ -70,7 +70,7 @@ func (rs *RegionSplitter) Split( } startTime := time.Now() // Sort the range for getting the min and max key of the ranges - sortedRanges, errSplit := sortRanges(ranges, rewriteRules) + sortedRanges, errSplit := SortRanges(ranges, rewriteRules) if errSplit != nil { return errors.Trace(errSplit) } @@ -96,7 +96,7 @@ func (rs *RegionSplitter) Split( scatterRegions := make([]*RegionInfo, 0) SplitRegions: for i := 0; i < SplitRetryTimes; i++ { - regions, errScan := paginateScanRegion(ctx, rs.client, minKey, maxKey, scanRegionPaginationLimit) + regions, errScan := PaginateScanRegion(ctx, rs.client, minKey, maxKey, scanRegionPaginationLimit) if errScan != nil { return errors.Trace(errScan) } @@ -272,7 +272,7 @@ func getSplitKeys(rewriteRules *RewriteRules, ranges []rtree.Range, regions []*R checkKeys = append(checkKeys, truncateRowKey(rg.EndKey)) } for _, key := range checkKeys { - if region := needSplit(key, regions); region != nil { + if region := NeedSplit(key, regions); region != nil { splitKeys, ok := splitKeyMap[region.Region.GetId()] if !ok { splitKeys = make([][]byte, 0, 1) @@ -287,8 +287,8 @@ func getSplitKeys(rewriteRules *RewriteRules, ranges []rtree.Range, regions []*R return splitKeyMap } -// needSplit checks whether a key is necessary to split, if true returns the split region. -func needSplit(splitKey []byte, regions []*RegionInfo) *RegionInfo { +// NeedSplit checks whether a key is necessary to split, if true returns the split region. +func NeedSplit(splitKey []byte, regions []*RegionInfo) *RegionInfo { // If splitKey is the max key. if len(splitKey) == 0 { return nil @@ -300,7 +300,7 @@ func needSplit(splitKey []byte, regions []*RegionInfo) *RegionInfo { return nil } // If splitKey is in a region - if bytes.Compare(splitKey, region.Region.GetStartKey()) > 0 && beforeEnd(splitKey, region.Region.GetEndKey()) { + if region.ContainsInterior(splitKey) { return region } } @@ -322,10 +322,6 @@ func truncateRowKey(key []byte) []byte { return key } -func beforeEnd(key []byte, end []byte) bool { - return bytes.Compare(key, end) < 0 || len(end) == 0 -} - func replacePrefix(s []byte, rewriteRules *RewriteRules) ([]byte, *import_sstpb.RewriteRule) { // We should search the dataRules firstly. for _, rule := range rewriteRules.Data { diff --git a/pkg/restore/split_client.go b/pkg/restore/split_client.go index d9b5f8677..696945225 100644 --- a/pkg/restore/split_client.go +++ b/pkg/restore/split_client.go @@ -88,7 +88,6 @@ func (c *pdClient) GetStore(ctx context.Context, storeID uint64) (*metapb.Store, } c.storeCache[storeID] = store return store, nil - } func (c *pdClient) GetRegion(ctx context.Context, key []byte) (*RegionInfo, error) { diff --git a/pkg/restore/split_test.go b/pkg/restore/split_test.go index b21cbf781..0e2c22fa5 100644 --- a/pkg/restore/split_test.go +++ b/pkg/restore/split_test.go @@ -1,6 +1,6 @@ // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. -package restore +package restore_test import ( "bytes" @@ -16,18 +16,23 @@ import ( "github.com/pingcap/pd/v4/server/schedule/placement" "github.com/pingcap/tidb/util/codec" + "github.com/pingcap/br/pkg/restore" "github.com/pingcap/br/pkg/rtree" ) type testClient struct { mu sync.RWMutex stores map[uint64]*metapb.Store - regions map[uint64]*RegionInfo + regions map[uint64]*restore.RegionInfo regionsInfo *core.RegionsInfo // For now it's only used in ScanRegions nextRegionID uint64 } -func newTestClient(stores map[uint64]*metapb.Store, regions map[uint64]*RegionInfo, nextRegionID uint64) *testClient { +func newTestClient( + stores map[uint64]*metapb.Store, + regions map[uint64]*restore.RegionInfo, + nextRegionID uint64, +) *testClient { regionsInfo := core.NewRegionsInfo() for _, regionInfo := range regions { regionsInfo.AddRegion(core.NewRegionInfo(regionInfo.Region, regionInfo.Leader)) @@ -40,7 +45,7 @@ func newTestClient(stores map[uint64]*metapb.Store, regions map[uint64]*RegionIn } } -func (c *testClient) GetAllRegions() map[uint64]*RegionInfo { +func (c *testClient) GetAllRegions() map[uint64]*restore.RegionInfo { c.mu.RLock() defer c.mu.RUnlock() return c.regions @@ -56,7 +61,7 @@ func (c *testClient) GetStore(ctx context.Context, storeID uint64) (*metapb.Stor return store, nil } -func (c *testClient) GetRegion(ctx context.Context, key []byte) (*RegionInfo, error) { +func (c *testClient) GetRegion(ctx context.Context, key []byte) (*restore.RegionInfo, error) { c.mu.RLock() defer c.mu.RUnlock() for _, region := range c.regions { @@ -68,7 +73,7 @@ func (c *testClient) GetRegion(ctx context.Context, key []byte) (*RegionInfo, er return nil, errors.Errorf("region not found: key=%s", string(key)) } -func (c *testClient) GetRegionByID(ctx context.Context, regionID uint64) (*RegionInfo, error) { +func (c *testClient) GetRegionByID(ctx context.Context, regionID uint64) (*restore.RegionInfo, error) { c.mu.RLock() defer c.mu.RUnlock() region, ok := c.regions[regionID] @@ -78,10 +83,14 @@ func (c *testClient) GetRegionByID(ctx context.Context, regionID uint64) (*Regio return region, nil } -func (c *testClient) SplitRegion(ctx context.Context, regionInfo *RegionInfo, key []byte) (*RegionInfo, error) { +func (c *testClient) SplitRegion( + ctx context.Context, + regionInfo *restore.RegionInfo, + key []byte, +) (*restore.RegionInfo, error) { c.mu.Lock() defer c.mu.Unlock() - var target *RegionInfo + var target *restore.RegionInfo splitKey := codec.EncodeBytes([]byte{}, key) for _, region := range c.regions { if bytes.Compare(splitKey, region.Region.StartKey) >= 0 && @@ -92,7 +101,7 @@ func (c *testClient) SplitRegion(ctx context.Context, regionInfo *RegionInfo, ke if target == nil { return nil, errors.Errorf("region not found: key=%s", string(key)) } - newRegion := &RegionInfo{ + newRegion := &restore.RegionInfo{ Region: &metapb.Region{ Peers: target.Region.Peers, Id: c.nextRegionID, @@ -108,24 +117,23 @@ func (c *testClient) SplitRegion(ctx context.Context, regionInfo *RegionInfo, ke } func (c *testClient) BatchSplitRegions( - ctx context.Context, regionInfo *RegionInfo, keys [][]byte, -) ([]*RegionInfo, error) { + ctx context.Context, regionInfo *restore.RegionInfo, keys [][]byte, +) ([]*restore.RegionInfo, error) { c.mu.Lock() defer c.mu.Unlock() - newRegions := make([]*RegionInfo, 0) + newRegions := make([]*restore.RegionInfo, 0) for _, key := range keys { - var target *RegionInfo + var target *restore.RegionInfo splitKey := codec.EncodeBytes([]byte{}, key) for _, region := range c.regions { - if bytes.Compare(splitKey, region.Region.GetStartKey()) > 0 && - beforeEnd(splitKey, region.Region.GetEndKey()) { + if region.ContainsInterior(splitKey) { target = region } } if target == nil { continue } - newRegion := &RegionInfo{ + newRegion := &restore.RegionInfo{ Region: &metapb.Region{ Peers: target.Region.Peers, Id: c.nextRegionID, @@ -142,7 +150,7 @@ func (c *testClient) BatchSplitRegions( return newRegions, nil } -func (c *testClient) ScatterRegion(ctx context.Context, regionInfo *RegionInfo) error { +func (c *testClient) ScatterRegion(ctx context.Context, regionInfo *restore.RegionInfo) error { return nil } @@ -152,11 +160,11 @@ func (c *testClient) GetOperator(ctx context.Context, regionID uint64) (*pdpb.Ge }, nil } -func (c *testClient) ScanRegions(ctx context.Context, key, endKey []byte, limit int) ([]*RegionInfo, error) { +func (c *testClient) ScanRegions(ctx context.Context, key, endKey []byte, limit int) ([]*restore.RegionInfo, error) { infos := c.regionsInfo.ScanRange(key, endKey, limit) - regions := make([]*RegionInfo, 0, len(infos)) + regions := make([]*restore.RegionInfo, 0, len(infos)) for _, info := range infos { - regions = append(regions, &RegionInfo{ + regions = append(regions, &restore.RegionInfo{ Region: info.GetMeta(), Leader: info.GetLeader(), }) @@ -190,7 +198,7 @@ func (s *testRestoreUtilSuite) TestSplit(c *C) { client := initTestClient() ranges := initRanges() rewriteRules := initRewriteRules() - regionSplitter := NewRegionSplitter(client) + regionSplitter := restore.NewRegionSplitter(client) ctx := context.Background() err := regionSplitter.Split(ctx, ranges, rewriteRules, func(key [][]byte) {}) @@ -215,7 +223,7 @@ func initTestClient() *testClient { StoreId: 1, } keys := [6]string{"", "aay", "bba", "bbh", "cca", ""} - regions := make(map[uint64]*RegionInfo) + regions := make(map[uint64]*restore.RegionInfo) for i := uint64(1); i < 6; i++ { startKey := []byte(keys[i-1]) if len(startKey) != 0 { @@ -225,7 +233,7 @@ func initTestClient() *testClient { if len(endKey) != 0 { endKey = codec.EncodeBytes([]byte{}, endKey) } - regions[i] = &RegionInfo{ + regions[i] = &restore.RegionInfo{ Region: &metapb.Region{ Id: i, Peers: peers, @@ -263,7 +271,7 @@ func initRanges() []rtree.Range { return ranges[:] } -func initRewriteRules() *RewriteRules { +func initRewriteRules() *restore.RewriteRules { var rules [2]*import_sstpb.RewriteRule rules[0] = &import_sstpb.RewriteRule{ OldKeyPrefix: []byte("aa"), @@ -273,7 +281,7 @@ func initRewriteRules() *RewriteRules { OldKeyPrefix: []byte("cc"), NewKeyPrefix: []byte("bb"), } - return &RewriteRules{ + return &restore.RewriteRules{ Table: rules[:], Data: rules[:], } @@ -282,7 +290,7 @@ func initRewriteRules() *RewriteRules { // expected regions after split: // [, aay), [aay, bb), [bb, bba), [bba, bbf), [bbf, bbh), [bbh, bbj), // [bbj, cca), [cca, xx), [xx, xxe), [xxe, xxz), [xxz, ) -func validateRegions(regions map[uint64]*RegionInfo) bool { +func validateRegions(regions map[uint64]*restore.RegionInfo) bool { keys := [12]string{"", "aay", "bb", "bba", "bbf", "bbh", "bbj", "cca", "xx", "xxe", "xxz", ""} if len(regions) != 11 { return false @@ -309,7 +317,7 @@ FindRegion: } func (s *testRestoreUtilSuite) TestNeedSplit(c *C) { - regions := []*RegionInfo{ + regions := []*restore.RegionInfo{ { Region: &metapb.Region{ StartKey: codec.EncodeBytes([]byte{}, []byte("b")), @@ -318,15 +326,15 @@ func (s *testRestoreUtilSuite) TestNeedSplit(c *C) { }, } // Out of region - c.Assert(needSplit([]byte("a"), regions), IsNil) + c.Assert(restore.NeedSplit([]byte("a"), regions), IsNil) // Region start key - c.Assert(needSplit([]byte("b"), regions), IsNil) + c.Assert(restore.NeedSplit([]byte("b"), regions), IsNil) // In region - region := needSplit([]byte("c"), regions) + region := restore.NeedSplit([]byte("c"), regions) c.Assert(bytes.Compare(region.Region.GetStartKey(), codec.EncodeBytes([]byte{}, []byte("b"))), Equals, 0) c.Assert(bytes.Compare(region.Region.GetEndKey(), codec.EncodeBytes([]byte{}, []byte("d"))), Equals, 0) // Region end key - c.Assert(needSplit([]byte("d"), regions), IsNil) + c.Assert(restore.NeedSplit([]byte("d"), regions), IsNil) // Out of region - c.Assert(needSplit([]byte("e"), regions), IsNil) + c.Assert(restore.NeedSplit([]byte("e"), regions), IsNil) } diff --git a/pkg/restore/util.go b/pkg/restore/util.go index 82903820e..3272263db 100644 --- a/pkg/restore/util.go +++ b/pkg/restore/util.go @@ -81,9 +81,9 @@ func GetRewriteRules( } } -// getSSTMetaFromFile compares the keys in file, region and rewrite rules, then returns a sst conn. +// GetSSTMetaFromFile compares the keys in file, region and rewrite rules, then returns a sst conn. // The range of the returned sst meta is [regionRule.NewKeyPrefix, append(regionRule.NewKeyPrefix, 0xff)]. -func getSSTMetaFromFile( +func GetSSTMetaFromFile( id []byte, file *backup.File, region *metapb.Region, @@ -261,7 +261,6 @@ func AttachFilesToRanges( rangeTree.Update(rg) } for _, f := range files { - rg := rangeTree.Find(&rtree.Range{ StartKey: f.GetStartKey(), EndKey: f.GetEndKey(), @@ -433,10 +432,10 @@ func encodeKeyPrefix(key []byte) []byte { return append(encodedPrefix[:len(encodedPrefix)-9], key[len(key)-ungroupedLen:]...) } -// paginateScanRegion scan regions with a limit pagination and +// PaginateScanRegion scan regions with a limit pagination and // return all regions at once. // It reduces max gRPC message size. -func paginateScanRegion( +func PaginateScanRegion( ctx context.Context, client SplitClient, startKey, endKey []byte, limit int, ) ([]*RegionInfo, error) { if len(endKey) != 0 && bytes.Compare(startKey, endKey) >= 0 { diff --git a/pkg/restore/util_test.go b/pkg/restore/util_test.go index ebc86d3b9..fdd455cb5 100644 --- a/pkg/restore/util_test.go +++ b/pkg/restore/util_test.go @@ -1,6 +1,6 @@ // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. -package restore +package restore_test import ( "context" @@ -12,6 +12,8 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/tidb/tablecodec" "github.com/pingcap/tidb/util/codec" + + "github.com/pingcap/br/pkg/restore" ) var _ = Suite(&testRestoreUtilSuite{}) @@ -33,7 +35,7 @@ func (s *testRestoreUtilSuite) TestGetSSTMetaFromFile(c *C) { StartKey: []byte("t2abc"), EndKey: []byte("t3a"), } - sstMeta := getSSTMetaFromFile([]byte{}, file, region, rule) + sstMeta := restore.GetSSTMetaFromFile([]byte{}, file, region, rule) c.Assert(string(sstMeta.GetRange().GetStart()), Equals, "t2abc") c.Assert(string(sstMeta.GetRange().GetEnd()), Equals, "t2\xff") } @@ -65,7 +67,7 @@ func (s *testRestoreUtilSuite) TestMapTableToFiles(c *C) { } func (s *testRestoreUtilSuite) TestValidateFileRanges(c *C) { - rules := &RewriteRules{ + rules := &restore.RewriteRules{ Table: []*import_sstpb.RewriteRule{&import_sstpb.RewriteRule{ OldKeyPrefix: []byte(tablecodec.EncodeTablePrefix(1)), NewKeyPrefix: []byte(tablecodec.EncodeTablePrefix(2)), @@ -73,7 +75,7 @@ func (s *testRestoreUtilSuite) TestValidateFileRanges(c *C) { } // Empty start/end key is not allowed. - _, err := ValidateFileRanges( + _, err := restore.ValidateFileRanges( []*backup.File{&backup.File{ Name: "file_write.sst", StartKey: []byte(""), @@ -84,7 +86,7 @@ func (s *testRestoreUtilSuite) TestValidateFileRanges(c *C) { c.Assert(err, ErrorMatches, ".*cannot find rewrite rule.*") // Range is not overlap, no rule found. - _, err = ValidateFileRanges( + _, err = restore.ValidateFileRanges( []*backup.File{{ Name: "file_write.sst", StartKey: tablecodec.EncodeTablePrefix(0), @@ -95,7 +97,7 @@ func (s *testRestoreUtilSuite) TestValidateFileRanges(c *C) { c.Assert(err, ErrorMatches, ".*cannot find rewrite rule.*") // No rule for end key. - _, err = ValidateFileRanges( + _, err = restore.ValidateFileRanges( []*backup.File{{ Name: "file_write.sst", StartKey: tablecodec.EncodeTablePrefix(1), @@ -110,7 +112,7 @@ func (s *testRestoreUtilSuite) TestValidateFileRanges(c *C) { OldKeyPrefix: tablecodec.EncodeTablePrefix(2), NewKeyPrefix: tablecodec.EncodeTablePrefix(3), }) - _, err = ValidateFileRanges( + _, err = restore.ValidateFileRanges( []*backup.File{{ Name: "file_write.sst", StartKey: tablecodec.EncodeTablePrefix(1), @@ -125,7 +127,7 @@ func (s *testRestoreUtilSuite) TestValidateFileRanges(c *C) { OldKeyPrefix: tablecodec.EncodeTablePrefix(2), NewKeyPrefix: tablecodec.EncodeTablePrefix(1), }) - _, err = ValidateFileRanges( + _, err = restore.ValidateFileRanges( []*backup.File{{ Name: "file_write.sst", StartKey: tablecodec.EncodeTablePrefix(1), @@ -147,12 +149,12 @@ func (s *testRestoreUtilSuite) TestPaginateScanRegion(c *C) { Id: 1, } - makeRegions := func(num uint64) (map[uint64]*RegionInfo, []*RegionInfo) { - regionsMap := make(map[uint64]*RegionInfo, num) - regions := make([]*RegionInfo, 0, num) + makeRegions := func(num uint64) (map[uint64]*restore.RegionInfo, []*restore.RegionInfo) { + regionsMap := make(map[uint64]*restore.RegionInfo, num) + regions := make([]*restore.RegionInfo, 0, num) endKey := make([]byte, 8) for i := uint64(0); i < num-1; i++ { - ri := &RegionInfo{ + ri := &restore.RegionInfo{ Region: &metapb.Region{ Id: i + 1, Peers: peers, @@ -177,7 +179,7 @@ func (s *testRestoreUtilSuite) TestPaginateScanRegion(c *C) { } else { endKey = codec.EncodeBytes([]byte{}, endKey) } - ri := &RegionInfo{ + ri := &restore.RegionInfo{ Region: &metapb.Region{ Id: num, Peers: peers, @@ -192,48 +194,48 @@ func (s *testRestoreUtilSuite) TestPaginateScanRegion(c *C) { } ctx := context.Background() - regionMap := make(map[uint64]*RegionInfo) - regions := []*RegionInfo{} - batch, err := paginateScanRegion(ctx, newTestClient(stores, regionMap, 0), []byte{}, []byte{}, 3) + regionMap := make(map[uint64]*restore.RegionInfo) + regions := []*restore.RegionInfo{} + batch, err := restore.PaginateScanRegion(ctx, newTestClient(stores, regionMap, 0), []byte{}, []byte{}, 3) c.Assert(err, IsNil) c.Assert(batch, DeepEquals, regions) regionMap, regions = makeRegions(1) - batch, err = paginateScanRegion(ctx, newTestClient(stores, regionMap, 0), []byte{}, []byte{}, 3) + batch, err = restore.PaginateScanRegion(ctx, newTestClient(stores, regionMap, 0), []byte{}, []byte{}, 3) c.Assert(err, IsNil) c.Assert(batch, DeepEquals, regions) regionMap, regions = makeRegions(2) - batch, err = paginateScanRegion(ctx, newTestClient(stores, regionMap, 0), []byte{}, []byte{}, 3) + batch, err = restore.PaginateScanRegion(ctx, newTestClient(stores, regionMap, 0), []byte{}, []byte{}, 3) c.Assert(err, IsNil) c.Assert(batch, DeepEquals, regions) regionMap, regions = makeRegions(3) - batch, err = paginateScanRegion(ctx, newTestClient(stores, regionMap, 0), []byte{}, []byte{}, 3) + batch, err = restore.PaginateScanRegion(ctx, newTestClient(stores, regionMap, 0), []byte{}, []byte{}, 3) c.Assert(err, IsNil) c.Assert(batch, DeepEquals, regions) regionMap, regions = makeRegions(8) - batch, err = paginateScanRegion(ctx, newTestClient(stores, regionMap, 0), []byte{}, []byte{}, 3) + batch, err = restore.PaginateScanRegion(ctx, newTestClient(stores, regionMap, 0), []byte{}, []byte{}, 3) c.Assert(err, IsNil) c.Assert(batch, DeepEquals, regions) regionMap, regions = makeRegions(8) - batch, err = paginateScanRegion( + batch, err = restore.PaginateScanRegion( ctx, newTestClient(stores, regionMap, 0), regions[1].Region.StartKey, []byte{}, 3) c.Assert(err, IsNil) c.Assert(batch, DeepEquals, regions[1:]) - batch, err = paginateScanRegion( + batch, err = restore.PaginateScanRegion( ctx, newTestClient(stores, regionMap, 0), []byte{}, regions[6].Region.EndKey, 3) c.Assert(err, IsNil) c.Assert(batch, DeepEquals, regions[:7]) - batch, err = paginateScanRegion( + batch, err = restore.PaginateScanRegion( ctx, newTestClient(stores, regionMap, 0), regions[1].Region.StartKey, regions[1].Region.EndKey, 3) c.Assert(err, IsNil) c.Assert(batch, DeepEquals, regions[1:2]) - _, err = paginateScanRegion(ctx, newTestClient(stores, regionMap, 0), []byte{2}, []byte{1}, 3) + _, err = restore.PaginateScanRegion(ctx, newTestClient(stores, regionMap, 0), []byte{2}, []byte{1}, 3) c.Assert(err, ErrorMatches, "startKey >= endKey.*") } diff --git a/pkg/rtree/rtree_test.go b/pkg/rtree/rtree_test.go index d3e151e25..720b1ca28 100644 --- a/pkg/rtree/rtree_test.go +++ b/pkg/rtree/rtree_test.go @@ -1,37 +1,43 @@ // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. -package rtree +package rtree_test import ( "fmt" "testing" . "github.com/pingcap/check" + + "github.com/pingcap/br/pkg/rtree" ) +func Test(t *testing.T) { + TestingT(t) +} + var _ = Suite(&testRangeTreeSuite{}) type testRangeTreeSuite struct{} -func newRange(start, end []byte) *Range { - return &Range{ +func newRange(start, end []byte) *rtree.Range { + return &rtree.Range{ StartKey: start, EndKey: end, } } func (s *testRangeTreeSuite) TestRangeTree(c *C) { - rangeTree := NewRangeTree() + rangeTree := rtree.NewRangeTree() c.Assert(rangeTree.Get(newRange([]byte(""), []byte(""))), IsNil) - search := func(key []byte) *Range { + search := func(key []byte) *rtree.Range { rg := rangeTree.Get(newRange(key, []byte(""))) if rg == nil { return nil } - return rg.(*Range) + return rg.(*rtree.Range) } - assertIncomplete := func(startKey, endKey []byte, ranges []Range) { + assertIncomplete := func(startKey, endKey []byte, ranges []rtree.Range) { incomplete := rangeTree.GetIncompleteRange(startKey, endKey) c.Logf("%#v %#v\n%#v\n%#v\n", startKey, endKey, incomplete, ranges) c.Assert(len(incomplete), Equals, len(ranges)) @@ -45,7 +51,7 @@ func (s *testRangeTreeSuite) TestRangeTree(c *C) { for e := s + 1; e < 0xff; e++ { start := []byte{byte(s)} end := []byte{byte(e)} - assertIncomplete(start, end, []Range{}) + assertIncomplete(start, end, []rtree.Range{}) } } } @@ -58,23 +64,23 @@ func (s *testRangeTreeSuite) TestRangeTree(c *C) { rangeTree.Update(*rangeA) c.Assert(rangeTree.Len(), Equals, 1) - assertIncomplete([]byte("a"), []byte("b"), []Range{}) + assertIncomplete([]byte("a"), []byte("b"), []rtree.Range{}) assertIncomplete([]byte(""), []byte(""), - []Range{ + []rtree.Range{ {StartKey: []byte(""), EndKey: []byte("a")}, {StartKey: []byte("b"), EndKey: []byte("")}, }) rangeTree.Update(*rangeC) c.Assert(rangeTree.Len(), Equals, 2) - assertIncomplete([]byte("a"), []byte("c"), []Range{ + assertIncomplete([]byte("a"), []byte("c"), []rtree.Range{ {StartKey: []byte("b"), EndKey: []byte("c")}, }) - assertIncomplete([]byte("b"), []byte("c"), []Range{ + assertIncomplete([]byte("b"), []byte("c"), []rtree.Range{ {StartKey: []byte("b"), EndKey: []byte("c")}, }) assertIncomplete([]byte(""), []byte(""), - []Range{ + []rtree.Range{ {StartKey: []byte(""), EndKey: []byte("a")}, {StartKey: []byte("b"), EndKey: []byte("c")}, {StartKey: []byte("d"), EndKey: []byte("")}, @@ -90,7 +96,7 @@ func (s *testRangeTreeSuite) TestRangeTree(c *C) { c.Assert(rangeTree.Len(), Equals, 3) c.Assert(search([]byte("b")), DeepEquals, rangeB) assertIncomplete([]byte(""), []byte(""), - []Range{ + []rtree.Range{ {StartKey: []byte(""), EndKey: []byte("a")}, {StartKey: []byte("d"), EndKey: []byte("")}, }) @@ -98,7 +104,7 @@ func (s *testRangeTreeSuite) TestRangeTree(c *C) { rangeTree.Update(*rangeD) c.Assert(rangeTree.Len(), Equals, 4) c.Assert(search([]byte("d")), DeepEquals, rangeD) - assertIncomplete([]byte(""), []byte(""), []Range{ + assertIncomplete([]byte(""), []byte(""), []rtree.Range{ {StartKey: []byte(""), EndKey: []byte("a")}, }) @@ -115,7 +121,7 @@ func (s *testRangeTreeSuite) TestRangeTree(c *C) { // Overwrite range BD, c-d should be empty rangeTree.Update(*rangeB) c.Assert(rangeTree.Len(), Equals, 4) - assertIncomplete([]byte(""), []byte(""), []Range{ + assertIncomplete([]byte(""), []byte(""), []rtree.Range{ {StartKey: []byte("c"), EndKey: []byte("d")}, }) @@ -169,9 +175,9 @@ func (s *testRangeTreeSuite) TestRangeIntersect(c *C) { } func BenchmarkRangeTreeUpdate(b *testing.B) { - rangeTree := NewRangeTree() + rangeTree := rtree.NewRangeTree() for i := 0; i < b.N; i++ { - item := Range{ + item := rtree.Range{ StartKey: []byte(fmt.Sprintf("%20d", i)), EndKey: []byte(fmt.Sprintf("%20d", i+1))} rangeTree.Update(item) diff --git a/pkg/storage/gcs.go b/pkg/storage/gcs.go index 4f9c02593..ccfaaf70b 100644 --- a/pkg/storage/gcs.go +++ b/pkg/storage/gcs.go @@ -32,7 +32,6 @@ type GCSBackendOptions struct { } func (options *GCSBackendOptions) apply(gcs *backup.GCS) error { - gcs.Endpoint = options.Endpoint gcs.StorageClass = options.StorageClass gcs.PredefinedAcl = options.PredefinedACL diff --git a/pkg/storage/gcs_test.go b/pkg/storage/gcs_test.go index 60a26f616..05217518a 100644 --- a/pkg/storage/gcs_test.go +++ b/pkg/storage/gcs_test.go @@ -21,7 +21,7 @@ func (r *testStorageSuite) TestGCS(c *C) { server, err := fakestorage.NewServerWithOptions(opts) c.Assert(err, IsNil) bucketName := "testbucket" - server.CreateBucket(bucketName) + server.CreateBucketWithOpts(fakestorage.CreateBucketOpts{Name: bucketName}) gcs := &backup.GCS{ Bucket: bucketName, @@ -65,7 +65,7 @@ func (r *testStorageSuite) TestNewGCSStorage(c *C) { server, err := fakestorage.NewServerWithOptions(opts) c.Assert(err, IsNil) bucketName := "testbucket" - server.CreateBucket(bucketName) + server.CreateBucketWithOpts(fakestorage.CreateBucketOpts{Name: bucketName}) { gcs := &backup.GCS{ diff --git a/pkg/storage/local.go b/pkg/storage/local.go index b1d20cedc..e49fe16ee 100644 --- a/pkg/storage/local.go +++ b/pkg/storage/local.go @@ -9,14 +9,15 @@ import ( "path" ) -// localStorage represents local file system storage +// localStorage represents local file system storage. type localStorage struct { base string } func (l *localStorage) Write(ctx context.Context, name string, data []byte) error { filepath := path.Join(l.base, name) - return ioutil.WriteFile(filepath, data, 0644) + return ioutil.WriteFile(filepath, data, 0644) // nolint:gosec + // the backupmeta file _is_ intended to be world-readable. } func (l *localStorage) Read(ctx context.Context, name string) ([]byte, error) { diff --git a/pkg/storage/parse_test.go b/pkg/storage/parse_test.go index 9dbd4b39c..290ada81d 100644 --- a/pkg/storage/parse_test.go +++ b/pkg/storage/parse_test.go @@ -143,5 +143,4 @@ func (r *testStorageSuite) TestFormatBackendURL(c *C) { }, }) c.Assert(url.String(), Equals, "gcs://bucket/some%20prefix/") - } diff --git a/pkg/storage/s3.go b/pkg/storage/s3.go index fef264082..d8d6e780f 100644 --- a/pkg/storage/s3.go +++ b/pkg/storage/s3.go @@ -33,7 +33,7 @@ const ( maxRetries = 3 ) -// s3Handlers make it easy to inject test functions +// s3Handlers make it easy to inject test functions. type s3Handlers interface { HeadObjectWithContext(context.Context, *s3.HeadObjectInput, ...request.Option) (*s3.HeadObjectOutput, error) GetObjectWithContext(context.Context, *s3.GetObjectInput, ...request.Option) (*s3.GetObjectOutput, error) @@ -42,14 +42,14 @@ type s3Handlers interface { WaitUntilObjectExistsWithContext(context.Context, *s3.HeadObjectInput, ...request.WaiterOption) error } -// S3Storage info for s3 storage +// S3Storage info for s3 storage. type S3Storage struct { session *session.Session svc s3Handlers options *backup.S3 } -// S3BackendOptions contains options for s3 storage +// S3BackendOptions contains options for s3 storage. type S3BackendOptions struct { Endpoint string `json:"endpoint" toml:"endpoint"` Region string `json:"region" toml:"region"` @@ -211,7 +211,7 @@ func newS3Storage( // revive:disable-line:flag-parameter }, nil } -// checkBucket checks if a bucket exists +// checkBucket checks if a bucket exists. var checkS3Bucket = func(svc *s3.S3, bucket string) error { input := &s3.HeadBucketInput{ Bucket: aws.String(bucket), diff --git a/pkg/storage/s3_test.go b/pkg/storage/s3_test.go index bd35b6faf..3e3d874fb 100644 --- a/pkg/storage/s3_test.go +++ b/pkg/storage/s3_test.go @@ -99,8 +99,8 @@ func (r *testStorageSuite) TestApplyUpdate(c *C) { s3 := u.GetS3() c.Assert(err, IsNil) c.Assert(s3, DeepEquals, test.s3) - } + tests := []testcase{ { name: "no region and no endpoint", diff --git a/pkg/storage/storage.go b/pkg/storage/storage.go index 39e5f2a0e..f93acac55 100644 --- a/pkg/storage/storage.go +++ b/pkg/storage/storage.go @@ -9,7 +9,7 @@ import ( "github.com/pingcap/kvproto/pkg/backup" ) -// ExternalStorage represents a kind of file system storage +// ExternalStorage represents a kind of file system storage. type ExternalStorage interface { // Write file to storage Write(ctx context.Context, name string, data []byte) error diff --git a/pkg/summary/collector.go b/pkg/summary/collector.go index 1c72ede4f..fbed44ba1 100644 --- a/pkg/summary/collector.go +++ b/pkg/summary/collector.go @@ -23,7 +23,7 @@ const ( TotalBytes = "total bytes" ) -// LogCollector collects infos into summary log +// LogCollector collects infos into summary log. type LogCollector interface { SetUnit(unit string) diff --git a/pkg/utils/key_test.go b/pkg/utils/key_test.go index 3e20bae24..211515e70 100644 --- a/pkg/utils/key_test.go +++ b/pkg/utils/key_test.go @@ -30,7 +30,6 @@ func (r *testKeySuite) TestParseKey(c *C) { _, err = ParseKey("notSupport", rawKey) c.Assert(err, ErrorMatches, "*unknown format*") - } func (r *testKeySuite) TestCompareEndKey(c *C) { diff --git a/pkg/utils/progress.go b/pkg/utils/progress.go index 3b688f598..fcabd53b5 100644 --- a/pkg/utils/progress.go +++ b/pkg/utils/progress.go @@ -12,7 +12,7 @@ import ( "go.uber.org/zap" ) -// ProgressPrinter prints a progress bar +// ProgressPrinter prints a progress bar. type ProgressPrinter struct { name string total int64 diff --git a/pkg/utils/retry.go b/pkg/utils/retry.go index 05dab6a5d..c1b8008e9 100644 --- a/pkg/utils/retry.go +++ b/pkg/utils/retry.go @@ -7,10 +7,10 @@ import ( "time" ) -// RetryableFunc presents a retryable opreation +// RetryableFunc presents a retryable operation. type RetryableFunc func() error -// Backoffer implements a backoff policy for retrying operations +// Backoffer implements a backoff policy for retrying operations. type Backoffer interface { // NextBackoff returns a duration to wait before retrying again NextBackoff(err error) time.Duration diff --git a/pkg/utils/unit.go b/pkg/utils/unit.go index 253d97eb6..7f1fc04e3 100644 --- a/pkg/utils/unit.go +++ b/pkg/utils/unit.go @@ -2,11 +2,15 @@ package utils -// unit of storage const ( + // B is number of bytes in one byte. B = uint64(1) << (iota * 10) + // KB is number of bytes in one kibibyte. KB + // MB is number of bytes in one mebibyte. MB + // GB is number of bytes in one gibibyte. GB + // TB is number of bytes in one tebibyte. TB ) diff --git a/pkg/utils/worker.go b/pkg/utils/worker.go index c6e9b6206..635748314 100644 --- a/pkg/utils/worker.go +++ b/pkg/utils/worker.go @@ -7,14 +7,14 @@ import ( "go.uber.org/zap" ) -// WorkerPool contains a pool of workers +// WorkerPool contains a pool of workers. type WorkerPool struct { limit uint workers chan *Worker name string } -// Worker identified by ID +// Worker identified by ID. type Worker struct { ID uint64 } diff --git a/tools/go.mod b/tools/go.mod index 489a62fa0..f81d65744 100644 --- a/tools/go.mod +++ b/tools/go.mod @@ -5,8 +5,9 @@ go 1.13 require ( github.com/dnephin/govet v0.0.0-20171012192244-4a96d43e39d3 github.com/go-playground/overalls v0.0.0-20191218162659-7df9f728c018 - github.com/golangci/golangci-lint v1.25.0 + github.com/golangci/golangci-lint v1.26.0 github.com/mgechev/revive v1.0.2 github.com/yookoala/realpath v1.0.0 // indirect golang.org/x/tools v0.0.0-20200422205258-72e4a01eba43 + gopkg.in/go-playground/assert.v1 v1.2.1 // indirect ) diff --git a/tools/go.sum b/tools/go.sum index ddca4c333..40223bd83 100644 --- a/tools/go.sum +++ b/tools/go.sum @@ -1,6 +1,8 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/Djarvur/go-err113 v0.0.0-20200410182137-af658d038157 h1:hY39LwQHh+1kaovmIjOrlqnXNX6tygSRfLkkK33IkZU= +github.com/Djarvur/go-err113 v0.0.0-20200410182137-af658d038157/go.mod h1:4UJr5HIiMZrwgkSPdsjy2uOQExX/WEILpIrO9UPGuXs= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/OpenPeeDeeP/depguard v1.0.1 h1:VlW4R6jmBIv3/u1JNlawEvJMM4J+dPORPaZasQee8Us= github.com/OpenPeeDeeP/depguard v1.0.1/go.mod h1:xsIw86fROiiwelg+jB2uM9PiKihMMmUx/1V+TNhjQvM= @@ -101,8 +103,8 @@ github.com/golangci/gocyclo v0.0.0-20180528134321-2becd97e67ee h1:J2XAy40+7yz70u github.com/golangci/gocyclo v0.0.0-20180528134321-2becd97e67ee/go.mod h1:ozx7R9SIwqmqf5pRP90DhR2Oay2UIjGuKheCBCNwAYU= github.com/golangci/gofmt v0.0.0-20190930125516-244bba706f1a h1:iR3fYXUjHCR97qWS8ch1y9zPNsgXThGwjKPrYfqMPks= github.com/golangci/gofmt v0.0.0-20190930125516-244bba706f1a/go.mod h1:9qCChq59u/eW8im404Q2WWTrnBUQKjpNYKMbU4M7EFU= -github.com/golangci/golangci-lint v1.25.0 h1:fwVdXtCBBCmk9e/7bTjkeCMx52bhq1IqmEQOVDbHXcg= -github.com/golangci/golangci-lint v1.25.0/go.mod h1:BaJNZmLU6vdaTLEGJKTTL/05I3B2OfXaD9SrNVkwr7w= +github.com/golangci/golangci-lint v1.26.0 h1:CLLGRSA9BLMiNvsWPXHioYAdfIx9tkgdVWyA6bIdYCo= +github.com/golangci/golangci-lint v1.26.0/go.mod h1:tefbO6RcigFzvTnDC+Y51kntVGgkuCAVsC+mnfbPruc= github.com/golangci/ineffassign v0.0.0-20190609212857-42439a7714cc h1:gLLhTLMk2/SutryVJ6D4VZCU3CUqr8YloG7FPIBWFpI= github.com/golangci/ineffassign v0.0.0-20190609212857-42439a7714cc/go.mod h1:e5tpTHCfVze+7EpLEozzMB3eafxo2KT5veNg1k6byQU= github.com/golangci/lint-1 v0.0.0-20191013205115-297bf364a8e0 h1:MfyDlzVjl1hoaPzPD4Gpb/QgoRfSBR0jdhwGyAWwMSA= @@ -190,7 +192,7 @@ github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrk github.com/mitchellh/go-ps v0.0.0-20190716172923-621e5597135b/go.mod h1:r1VsdOzOPt1ZSrGZWFoNhsAedKnEd6r9Np1+5blZCWk= github.com/mitchellh/mapstructure v1.1.2 h1:fmNYVwqnSfB9mZU6OS2O6GsXM+wcskZDuKQzvN1EDeE= github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= -github.com/mozilla/tls-observatory v0.0.0-20190404164649-a3c1b6cfecfd/go.mod h1:SrKMQvPiws7F7iqYp8/TX+IhxCYhzr6N/1yb8cwHsGk= +github.com/mozilla/tls-observatory v0.0.0-20200220173314-aae45faa4006/go.mod h1:SrKMQvPiws7F7iqYp8/TX+IhxCYhzr6N/1yb8cwHsGk= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/nakabonne/nestif v0.3.0 h1:+yOViDGhg8ygGrmII72nV9B/zGxY188TYpfolntsaPw= github.com/nakabonne/nestif v0.3.0/go.mod h1:dI314BppzXjJ4HsCnbo7XzrJHPszZsjnk5wEBSYHI2c= @@ -202,10 +204,11 @@ github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn github.com/olekukonko/tablewriter v0.0.4 h1:vHD/YYe1Wolo78koG299f7V/VAS08c6IpCLn+Ejf/w8= github.com/olekukonko/tablewriter v0.0.4/go.mod h1:zq6QwlOf5SlnkVbMSr5EoBv3636FWnp+qbPhuoO21uA= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo v1.11.0 h1:JAKSXpt1YjtLA7YpPiqO9ss6sNXEsPfSGdwN0UHqzrw= -github.com/onsi/ginkgo v1.11.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/gomega v1.8.1 h1:C5Dqfs/LeauYDX0jJXIe2SWmwCbGzx9yF8C8xy3Lh34= -github.com/onsi/gomega v1.8.1/go.mod h1:Ho0h+IUsWyvy1OpqCwxlQ/21gkhVunqlU8fDGcoTdcA= +github.com/onsi/ginkgo v1.12.0 h1:Iw5WCbBcaAAd0fpRb1c9r5YCylv4XDoCSigm1zLevwU= +github.com/onsi/ginkgo v1.12.0/go.mod h1:oUhWkIvk5aDxtKvDDuw8gItl8pKl42LzjC9KZE0HfGg= +github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= +github.com/onsi/gomega v1.9.0 h1:R1uwffexN6Pr340GtYRIdZmAiN4J+iw6WG4wog1DUXg= +github.com/onsi/gomega v1.9.0/go.mod h1:Ho0h+IUsWyvy1OpqCwxlQ/21gkhVunqlU8fDGcoTdcA= github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k= github.com/pelletier/go-toml v1.2.0 h1:T5zMGML61Wp+FlcbWjRDT7yAxhJNAiPPLOFECq181zc= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= @@ -229,10 +232,10 @@ github.com/quasilyte/go-consistent v0.0.0-20190521200055-c6f3937de18c/go.mod h1: github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= -github.com/ryancurrah/gomodguard v1.0.2 h1:vumZpZardqQ9EfFIZDNEpKaMxfqqEBMhu0uSRcDO5x4= -github.com/ryancurrah/gomodguard v1.0.2/go.mod h1:9T/Cfuxs5StfsocWr4WzDL36HqnX0fVb9d5fSEaLhoE= -github.com/securego/gosec v0.0.0-20200103095621-79fbf3af8d83 h1:AtnWoOvTioyDXFvu96MWEeE8qj4COSQnJogzLy/u41A= -github.com/securego/gosec v0.0.0-20200103095621-79fbf3af8d83/go.mod h1:vvbZ2Ae7AzSq3/kywjUDxSNq2SJ27RxCz2un0H3ePqE= +github.com/ryancurrah/gomodguard v1.0.4 h1:oCreMAt9GuFXDe9jW4HBpc3GjdX3R/sUEcLAGh1zPx8= +github.com/ryancurrah/gomodguard v1.0.4/go.mod h1:9T/Cfuxs5StfsocWr4WzDL36HqnX0fVb9d5fSEaLhoE= +github.com/securego/gosec v0.0.0-20200316084457-7da9f46445fd h1:qB+l4fYZsH78xORC1aqVS0zNmgkQp4rkj2rvfxQMtzc= +github.com/securego/gosec v0.0.0-20200316084457-7da9f46445fd/go.mod h1:NurAFZsWJAEZjogSwdVPlHkOZB3DOAU7gsPP8VFZCHc= github.com/shirou/gopsutil v0.0.0-20190901111213-e4ec7b275ada/go.mod h1:WWnYX4lzhCH5h/3YBfyVA3VbLYjlMZZAQcW9ojMexNc= github.com/shirou/w32 v0.0.0-20160930032740-bb4de0191aa4/go.mod h1:qsXQc7+bwAM3Q1u/4XEfrquwF8Lw7D7y5cD8CuHnfIc= github.com/shurcooL/go v0.0.0-20180423040247-9e1955d9fb6e h1:MZM7FHLqUHYI0Y/mQAt3d2aYa0SiNms/hFqC9qJYolM= @@ -275,8 +278,10 @@ github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/subosito/gotenv v1.2.0 h1:Slr1R9HxAlEKefgq5jn9U+DnETlIUa6HfgEzj0g5d7s= github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw= -github.com/tetafro/godot v0.2.5 h1:7+EYJM/Z4gYZhBFdRrVm6JTj5ZLw/QI1j4RfEOXJviE= -github.com/tetafro/godot v0.2.5/go.mod h1:pT6/T8+h6//L/LwQcFc4C0xpfy1euZwzS1sHdrFCms0= +github.com/tdakkota/asciicheck v0.0.0-20200416190851-d7f85be797a2 h1:Xr9gkxfOP0KQWXKNqmwe8vEeSUiUj4Rlee9CMVX2ZUQ= +github.com/tdakkota/asciicheck v0.0.0-20200416190851-d7f85be797a2/go.mod h1:yHp0ai0Z9gUljN3o0xMhYJnH/IcvkdTBOX2fmJ93JEM= +github.com/tetafro/godot v0.3.3 h1:uJjg8N+Ee10rAnaqJGet1WeI0YW4fiX9pKbwqnsqH6k= +github.com/tetafro/godot v0.3.3/go.mod h1:pT6/T8+h6//L/LwQcFc4C0xpfy1euZwzS1sHdrFCms0= github.com/timakin/bodyclose v0.0.0-20190930140734-f7f2e9bca95e h1:RumXZ56IrCj4CL+g1b9OL/oH0QnsF976bC8xQFYUD5Q= github.com/timakin/bodyclose v0.0.0-20190930140734-f7f2e9bca95e/go.mod h1:Qimiffbc6q9tBWlVV6x0P9sat/ao1xEkREYPPj9hphk= github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= @@ -298,6 +303,7 @@ github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= github.com/yookoala/realpath v1.0.0 h1:7OA9pj4FZd+oZDsyvXWQvjn5oBdcHRTV44PpdMSuImQ= github.com/yookoala/realpath v1.0.0/go.mod h1:gJJMA9wuX7AcqLy1+ffPatSCySA1FQ2S8Ya9AIoYBpE= +github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= @@ -310,6 +316,7 @@ golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs= golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/mod v0.2.0 h1:KU7oHjnv3XNWfa5COkzUifxZmxp1TyI7ImMXqFxLwvQ= @@ -344,9 +351,10 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190922100055-0a153f010e69/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037 h1:YyJpGZS1sBuBCzLAR1VEpK193GlqGZbnPFnPV/5Rsb4= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e h1:N7DeIrjYszNmSW409R3frPPwglRwMkXSBzwVbkOjLLA= +golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= @@ -368,8 +376,11 @@ golang.org/x/tools v0.0.0-20190719005602-e377ae9d6386/go.mod h1:jcCCGcm9btYwXyDq golang.org/x/tools v0.0.0-20190910044552-dd2b5c81c578/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191108193012-7d206e10da11/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20200102140908-9497f49d5709/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200225230052-807dcd883420/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200228224639-71482053b885/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200324003944-a576cf524670/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8= +golang.org/x/tools v0.0.0-20200414032229-332987a829c3/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200422022333-3d57cf2e726e/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200422205258-72e4a01eba43 h1:Lcsc5ErIWemp8qAbYffG5vPrqjJ0zk82RTFGifeS1Pc= golang.org/x/tools v0.0.0-20200422205258-72e4a01eba43/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= @@ -391,6 +402,8 @@ gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= +gopkg.in/go-playground/assert.v1 v1.2.1 h1:xoYuJVE7KT85PYWrN730RguIQO0ePzVRfFMXadIrXTM= +gopkg.in/go-playground/assert.v1 v1.2.1/go.mod h1:9RXL0bg/zibRAgZUYszZSwO/z8Y/a8bDuhia5mkpMnE= gopkg.in/ini.v1 v1.51.0 h1:AQvPpx3LzTDM0AjnIRlVFwFFGC+npRopjZxLJj6gdno= gopkg.in/ini.v1 v1.51.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= @@ -400,7 +413,6 @@ gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bl gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.7/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= From 2bc34d9f9171dbdd98a14c53105ecee8b95fa255 Mon Sep 17 00:00:00 2001 From: Hillium Date: Wed, 13 May 2020 11:33:47 +0800 Subject: [PATCH 28/52] restore: fix test to match new change of master. --- pkg/restore/util_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/restore/util_test.go b/pkg/restore/util_test.go index fdd455cb5..54a501439 100644 --- a/pkg/restore/util_test.go +++ b/pkg/restore/util_test.go @@ -60,7 +60,7 @@ func (s *testRestoreUtilSuite) TestMapTableToFiles(c *C) { EndKey: tablecodec.EncodeTablePrefix(2)}, } - result := MapTableToFiles(append(filesOfTable2, filesOfTable1...)) + result := restore.MapTableToFiles(append(filesOfTable2, filesOfTable1...)) c.Assert(result[1], DeepEquals, filesOfTable1) c.Assert(result[2], DeepEquals, filesOfTable2) From b96de5b0e2fd5489689c2afe6138090fe6c8c87f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Sun, 17 May 2020 19:11:30 +0800 Subject: [PATCH 29/52] Apply suggestions from code review --- pkg/restore/client.go | 2 +- pkg/restore/pipeline_items.go | 2 +- pkg/task/restore.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/restore/client.go b/pkg/restore/client.go index 8518299b8..0eb6274fd 100644 --- a/pkg/restore/client.go +++ b/pkg/restore/client.go @@ -336,7 +336,7 @@ func (rc *Client) CreateTables( rewriteRules.Data = append(rewriteRules.Data, rules.Data...) newTables = append(newTables, et.Table) } - // Let's ensure that the original order. + // Let's ensure that it won't break the original order. sort.Slice(newTables, func(i, j int) bool { return tbMapping[newTables[i].Name.String()] < tbMapping[newTables[j].Name.String()] }) diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index 18cd9ccd3..ea5bc0ffa 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -84,7 +84,7 @@ func (b *Batcher) Len() int { // BatchSender is the abstract of how the batcher send a batch. type BatchSender interface { - // RestoreBatch will backup all ranges and tables + // RestoreBatch will send the restore request. RestoreBatch(ctx context.Context, ranges []rtree.Range, tbs []CreatedTable) error Close() } diff --git a/pkg/task/restore.go b/pkg/task/restore.go index c87e25d71..4f87c6d25 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -276,7 +276,7 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf return nil } -// dropToBlockhole drop all incoming tables into black hole. +// dropToBlackhole drop all incoming tables into black hole. func dropToBlackhole( ctx context.Context, tableStream <-chan restore.CreatedTable, From deb884880e6de969d67fae8ec82b5dbb15a3570b Mon Sep 17 00:00:00 2001 From: Hillium Date: Fri, 22 May 2020 14:37:44 +0800 Subject: [PATCH 30/52] restore: merge two progresses. --- pkg/restore/pipeline_items.go | 2 +- pkg/task/restore.go | 19 ++++++++----------- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index 18cd9ccd3..9ea08cc02 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -151,7 +151,7 @@ func (b *tikvSender) RestoreBatch(ctx context.Context, ranges []rtree.Range, tbs } func (b *tikvSender) Close() { - b.updateCh.Close() + // don't close update channel here, since we may need it then. } // NewBatcher creates a new batcher by client and updateCh. diff --git a/pkg/task/restore.go b/pkg/task/restore.go index c87e25d71..246cbb835 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -203,14 +203,6 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf summary.CollectInt("restore ranges", rangeSize) log.Info("range and file prepared", zap.Int("file count", len(files)), zap.Int("range count", rangeSize)) - // Redirect to log if there is no log file to avoid unreadable output. - updateCh := g.StartProgress( - ctx, - cmdName, - // Split/Scatter + Download/Ingest - int64(restore.EstimateRangeSize(files)+len(files)), - !cfg.LogProgress) - clusterCfg, err := restorePreWork(ctx, client, mgr) if err != nil { return err @@ -238,6 +230,14 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf // Restore sst files in batch. batchSize := utils.MinInt(int(cfg.Concurrency), maxRestoreBatchSizeLimit) + // Redirect to log if there is no log file to avoid unreadable output. + updateCh := g.StartProgress( + ctx, + cmdName, + // Split/Scatter + Download/Ingest + Checksum + int64(restore.EstimateRangeSize(files)+len(files)+len(tables)), + !cfg.LogProgress) + defer updateCh.Close() sender, err := restore.NewTiKVSender(ctx, client, updateCh) if err != nil { return err @@ -250,9 +250,6 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf var finish <-chan struct{} // Checksum if cfg.Checksum { - updateCh = g.StartProgress( - ctx, "Checksum", int64(len(tables)), true) - defer updateCh.Close() finish = client.GoValidateChecksum( ctx, afterRestoreStream, mgr.GetTiKV().GetClient(), errCh, updateCh) } else { From 4060eec4326092654881e2336e1a9e3bb388a15e Mon Sep 17 00:00:00 2001 From: Hillium Date: Mon, 25 May 2020 12:42:11 +0800 Subject: [PATCH 31/52] restore: fix a bug. that is, when table is too big or batch size is too low, we will fail to restore the head part of this table. --- pkg/restore/batcher_test.go | 159 +++++++++++++++++++++++----------- pkg/restore/pipeline_items.go | 29 +++---- 2 files changed, 120 insertions(+), 68 deletions(-) diff --git a/pkg/restore/batcher_test.go b/pkg/restore/batcher_test.go index ff979cbef..9ab0254ad 100644 --- a/pkg/restore/batcher_test.go +++ b/pkg/restore/batcher_test.go @@ -3,16 +3,18 @@ package restore_test import ( + "bytes" "context" + "sync" "time" + "github.com/pingcap/kvproto/pkg/import_sstpb" + "github.com/pingcap/br/pkg/restore" . "github.com/pingcap/check" "github.com/pingcap/errors" - "github.com/pingcap/log" "github.com/pingcap/parser/model" - "go.uber.org/zap" "github.com/pingcap/br/pkg/rtree" "github.com/pingcap/br/pkg/utils" @@ -21,55 +23,51 @@ import ( type testBatcherSuite struct{} type drySender struct { - tbls chan restore.CreatedTable - ranges chan rtree.Range - nBatch int + mu *sync.Mutex + + rewriteRules *restore.RewriteRules + ranges []rtree.Range + nBatch int } func (d *drySender) RestoreBatch( _ctx context.Context, ranges []rtree.Range, - tbs []restore.CreatedTable, + rewriteRules *restore.RewriteRules, ) error { + d.mu.Lock() + defer d.mu.Unlock() d.nBatch++ - for _, tbl := range tbs { - log.Info("dry restore", zap.Int64("table ID", tbl.Table.ID)) - d.tbls <- tbl - } - for _, rng := range ranges { - d.ranges <- rng - } + d.rewriteRules.Append(*rewriteRules) + d.ranges = append(d.ranges, ranges...) return nil } -func (d *drySender) Close() { - close(d.tbls) - close(d.ranges) -} +func (d *drySender) Close() {} -func (d *drySender) exhaust() (tbls []restore.CreatedTable, rngs []rtree.Range) { - for tbl := range d.tbls { - tbls = append(tbls, tbl) - } - for rng := range d.ranges { - rngs = append(rngs, rng) - } - return +func (d *drySender) Ranges() []rtree.Range { + return d.ranges } func newDrySender() *drySender { return &drySender{ - tbls: make(chan restore.CreatedTable, 4096), - ranges: make(chan rtree.Range, 4096), + rewriteRules: restore.EmptyRewriteRule(), + ranges: []rtree.Range{}, + mu: new(sync.Mutex), } } -func (d *drySender) RangeLen() int { - return len(d.ranges) +func (d *drySender) HasRewriteRuleOfKey(prefix string) bool { + for _, rule := range d.rewriteRules.Table { + if bytes.Equal([]byte(prefix), rule.OldKeyPrefix) { + return true + } + } + return false } -func (d *drySender) TableLen() int { - return len(d.tbls) +func (d *drySender) RangeLen() int { + return len(d.ranges) } func (d *drySender) BatchCount() int { @@ -98,6 +96,17 @@ func fakeTableWithRange(id int64, rngs []rtree.Range) restore.TableWithRange { return tblWithRng } +func fakeRewriteRules(oldPrefix string, newPrefix string) *restore.RewriteRules { + return &restore.RewriteRules{ + Table: []*import_sstpb.RewriteRule{ + { + OldKeyPrefix: []byte(oldPrefix), + NewKeyPrefix: []byte(newPrefix), + }, + }, + } +} + func fakeRange(startKey, endKey string) rtree.Range { return rtree.Range{ StartKey: []byte(startKey), @@ -105,6 +114,13 @@ func fakeRange(startKey, endKey string) rtree.Range { } } +func join(nested [][]rtree.Range) (plain []rtree.Range) { + for _, ranges := range nested { + plain = append(plain, ranges...) + } + return plain +} + // TestBasic tests basic workflow of batcher. func (*testBatcherSuite) TestBasic(c *C) { errCh := make(chan error, 8) @@ -112,10 +128,15 @@ func (*testBatcherSuite) TestBasic(c *C) { batcher, _ := restore.NewBatcher(sender, errCh) batcher.SetThreshold(2) - simpleTables := []restore.TableWithRange{ - fakeTableWithRange(1, []rtree.Range{fakeRange("aaa", "aab")}), - fakeTableWithRange(2, []rtree.Range{fakeRange("baa", "bab"), fakeRange("bac", "bad")}), - fakeTableWithRange(3, []rtree.Range{fakeRange("caa", "cab"), fakeRange("cac", "cad")}), + tableRanges := [][]rtree.Range{ + {fakeRange("aaa", "aab")}, + {fakeRange("baa", "bab"), fakeRange("bac", "bad")}, + {fakeRange("caa", "cab"), fakeRange("cac", "cad")}, + } + + simpleTables := []restore.TableWithRange{} + for i, ranges := range tableRanges { + simpleTables = append(simpleTables, fakeTableWithRange(int64(i), ranges)) } for _, tbl := range simpleTables { @@ -123,16 +144,9 @@ func (*testBatcherSuite) TestBasic(c *C) { } batcher.Close(context.TODO()) - tbls, rngs := sender.exhaust() - totalRngs := []rtree.Range{} + rngs := sender.Ranges() - c.Assert(len(tbls), Equals, len(simpleTables)) - for i, tbl := range simpleTables { - c.Assert(tbls[i], DeepEquals, tbl.CreatedTable) - totalRngs = append(totalRngs, tbl.Range...) - } - - c.Assert(totalRngs, DeepEquals, rngs) + c.Assert(join(tableRanges), DeepEquals, rngs) select { case err := <-errCh: c.Fatal(errors.Trace(err)) @@ -153,18 +167,15 @@ func (*testBatcherSuite) TestAutoSend(c *C) { // enable auto commit. batcher.EnableAutoCommit(context.TODO(), 100*time.Millisecond) - time.Sleep(120 * time.Millisecond) + time.Sleep(200 * time.Millisecond) c.Assert(sender.RangeLen(), Greater, 0) - c.Assert(sender.TableLen(), Greater, 0) c.Assert(batcher.Len(), Equals, 0) batcher.Close(context.TODO()) - tbls, rngs := sender.exhaust() - c.Assert(len(tbls), Greater, 0) + rngs := sender.Ranges() c.Assert(rngs, DeepEquals, simpleTable.Range) - c.Assert(tbls[0], DeepEquals, simpleTable.CreatedTable) select { case err := <-errCh: c.Fatal(errors.Trace(err)) @@ -189,10 +200,54 @@ func (*testBatcherSuite) TestSplitRangeOnSameTable(c *C) { batcher.Close(context.TODO()) - tbls, rngs := sender.exhaust() - c.Assert(len(tbls), Greater, 0) + rngs := sender.Ranges() c.Assert(rngs, DeepEquals, simpleTable.Range) - c.Assert(tbls[0], DeepEquals, simpleTable.CreatedTable) + select { + case err := <-errCh: + c.Fatal(errors.Trace(err)) + default: + } +} + +func (*testBatcherSuite) TestRewriteRules(c *C) { + tableRanges := [][]rtree.Range{ + {fakeRange("aaa", "aab")}, + {fakeRange("baa", "bab"), fakeRange("bac", "bad")}, + {fakeRange("caa", "cab"), fakeRange("cac", "cad"), + fakeRange("cae", "caf"), fakeRange("cag", "cai"), + fakeRange("caj", "cak"), fakeRange("cal", "cam"), + fakeRange("can", "cao"), fakeRange("cap", "caq")}, + } + rewriteRules := []*restore.RewriteRules{ + fakeRewriteRules("a", "ada"), + fakeRewriteRules("b", "bob"), + fakeRewriteRules("c", "cpp"), + } + + tables := make([]restore.TableWithRange, 0, len(tableRanges)) + for i, ranges := range tableRanges { + table := fakeTableWithRange(int64(i), ranges) + table.RewriteRule = rewriteRules[i] + tables = append(tables, table) + } + + ctx := context.TODO() + errCh := make(chan error, 8) + sender := newDrySender() + batcher, _ := restore.NewBatcher(sender, errCh) + batcher.SetThreshold(2) + + batcher.Add(ctx, tables[0]) + c.Assert(sender.RangeLen(), Equals, 0) + batcher.Add(ctx, tables[1]) + c.Assert(sender.HasRewriteRuleOfKey("a"), IsTrue) + c.Assert(sender.HasRewriteRuleOfKey("b"), IsTrue) + c.Assert(sender.RangeLen(), Equals, 2) + batcher.Add(ctx, tables[2]) + batcher.Close(ctx) + c.Assert(sender.HasRewriteRuleOfKey("c"), IsTrue) + c.Assert(sender.Ranges(), DeepEquals, join(tableRanges)) + select { case err := <-errCh: c.Fatal(errors.Trace(err)) diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index 10a626a32..9ffdafa0b 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -85,7 +85,7 @@ func (b *Batcher) Len() int { // BatchSender is the abstract of how the batcher send a batch. type BatchSender interface { // RestoreBatch will send the restore request. - RestoreBatch(ctx context.Context, ranges []rtree.Range, tbs []CreatedTable) error + RestoreBatch(ctx context.Context, ranges []rtree.Range, rewriteRules *RewriteRules) error Close() } @@ -116,15 +116,7 @@ func NewTiKVSender(ctx context.Context, cli *Client, updateCh glue.Progress) (Ba }, nil } -func (b *tikvSender) RestoreBatch(ctx context.Context, ranges []rtree.Range, tbs []CreatedTable) error { - rewriteRules := EmptyRewriteRule() - tableNames := make([]string, 0, len(tbs)) - for _, t := range tbs { - rewriteRules.Append(*t.RewriteRule) - tableNames = append(tableNames, fmt.Sprintf("%s.%s", t.OldTable.Db.Name, t.OldTable.Info.Name)) - } - log.Debug("split region by tables start", zap.Strings("tables", tableNames)) - +func (b *tikvSender) RestoreBatch(ctx context.Context, ranges []rtree.Range, rewriteRules *RewriteRules) error { if err := SplitRanges(ctx, b.client, ranges, rewriteRules, b.updateCh); err != nil { log.Error("failed on split range", zap.Any("ranges", ranges), @@ -132,7 +124,6 @@ func (b *tikvSender) RestoreBatch(ctx context.Context, ranges []rtree.Range, tbs ) return err } - log.Debug("split region by tables end", zap.Strings("tables", tableNames)) files := []*backup.File{} for _, fs := range ranges { @@ -233,13 +224,19 @@ func (b *Batcher) asyncSend(ctx context.Context) { } } -func (b *Batcher) drainRanges() (ranges []rtree.Range, emptyTables []CreatedTable) { +func (b *Batcher) drainRanges() ( + ranges []rtree.Range, + emptyTables []CreatedTable, + rewriteRules *RewriteRules, +) { b.cachedTablesMu.Lock() + rewriteRules = EmptyRewriteRule() defer b.cachedTablesMu.Unlock() for offset, thisTable := range b.cachedTables { thisTableLen := len(thisTable.Range) collected := len(ranges) + rewriteRules.Append(*thisTable.RewriteRule) // the batch is full, we should stop here! // we use strictly greater than because when we send a batch at equal, the offset should plus one. @@ -259,7 +256,7 @@ func (b *Batcher) drainRanges() (ranges []rtree.Range, emptyTables []CreatedTabl ranges = append(ranges, drained...) b.cachedTables = b.cachedTables[offset:] atomic.AddInt32(&b.size, -int32(len(drained))) - return ranges, emptyTables + return ranges, emptyTables, rewriteRules } emptyTables = append(emptyTables, thisTable.CreatedTable) @@ -278,13 +275,13 @@ func (b *Batcher) drainRanges() (ranges []rtree.Range, emptyTables []CreatedTabl // all tables are drained. b.cachedTables = []TableWithRange{} - return ranges, emptyTables + return ranges, emptyTables, rewriteRules } // Send sends all pending requests in the batcher. // returns tables sent in the current batch. func (b *Batcher) Send(ctx context.Context) ([]CreatedTable, error) { - ranges, tbs := b.drainRanges() + ranges, tbs, rewriteRules := b.drainRanges() tableNames := make([]string, 0, len(tbs)) for _, t := range tbs { tableNames = append(tableNames, fmt.Sprintf("%s.%s", t.OldTable.Db.Name, t.OldTable.Info.Name)) @@ -293,7 +290,7 @@ func (b *Batcher) Send(ctx context.Context) ([]CreatedTable, error) { zap.Strings("tables", tableNames), zap.Int("ranges", len(ranges)), ) - if err := b.sender.RestoreBatch(ctx, ranges, tbs); err != nil { + if err := b.sender.RestoreBatch(ctx, ranges, rewriteRules); err != nil { return nil, err } return tbs, nil From c1d2064dad110910d908e8341dc1f486f0b4074e Mon Sep 17 00:00:00 2001 From: Hillium Date: Mon, 25 May 2020 14:40:06 +0800 Subject: [PATCH 32/52] restore: extract batcher to another file --- pkg/restore/batcher.go | 244 ++++++++++++++++++++++++++++++++++ pkg/restore/pipeline_items.go | 215 +----------------------------- 2 files changed, 245 insertions(+), 214 deletions(-) create mode 100644 pkg/restore/batcher.go diff --git a/pkg/restore/batcher.go b/pkg/restore/batcher.go new file mode 100644 index 000000000..fa8077e80 --- /dev/null +++ b/pkg/restore/batcher.go @@ -0,0 +1,244 @@ +// Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. + +package restore + +import ( + "context" + "fmt" + "sync" + "sync/atomic" + "time" + + "github.com/pingcap/log" + "go.uber.org/zap" + + "github.com/pingcap/br/pkg/rtree" +) + +// Batcher collects ranges to restore and send batching split/ingest request. +type Batcher struct { + cachedTables []TableWithRange + cachedTablesMu *sync.Mutex + rewriteRules *RewriteRules + + // joiner is for joining the background batch sender. + joiner chan<- struct{} + sendErr chan<- error + outCh chan<- CreatedTable + sender BatchSender + batchSizeThreshold int + size int32 +} + +// Len calculate the current size of this batcher. +func (b *Batcher) Len() int { + return int(atomic.LoadInt32(&b.size)) +} + +// NewBatcher creates a new batcher by client and updateCh. +// this batcher will work background, send batches per second, or batch size reaches limit. +// and it will emit full-restored tables to the output channel returned. +func NewBatcher( + sender BatchSender, + errCh chan<- error, +) (*Batcher, <-chan CreatedTable) { + output := make(chan CreatedTable, defaultBatcherOutputChannelSize) + b := &Batcher{ + rewriteRules: EmptyRewriteRule(), + sendErr: errCh, + outCh: output, + sender: sender, + cachedTablesMu: new(sync.Mutex), + batchSizeThreshold: 1, + } + return b, output +} + +// EnableAutoCommit enables the batcher commit batch periodicity even batcher size isn't big enough. +// we make this function for disable AutoCommit in some case. +func (b *Batcher) EnableAutoCommit(ctx context.Context, delay time.Duration) { + if b.joiner != nil { + log.Warn("enable auto commit on a batcher that is enabled auto commit, nothing will happen") + log.Info("if desire, please disable auto commit firstly") + } + joiner := make(chan struct{}) + go b.workLoop(ctx, joiner, delay) + b.joiner = joiner +} + +// DisableAutoCommit blocks the current goroutine until the worker can gracefully stop, +// and then disable auto commit. +func (b *Batcher) DisableAutoCommit(ctx context.Context) { + b.joinWorker() + b.joiner = nil +} + +// joinWorker blocks the current goroutine until the worker can gracefully stop. +// return immediately when auto commit disabled. +func (b *Batcher) joinWorker() { + if b.joiner != nil { + log.Info("gracefully stoping worker goroutine") + b.joiner <- struct{}{} + log.Info("gracefully stopped worker goroutine") + } +} + +func (b *Batcher) workLoop(ctx context.Context, joiner <-chan struct{}, delay time.Duration) { + tick := time.NewTicker(delay) + defer tick.Stop() + for { + select { + case <-joiner: + log.Debug("graceful stop signal received") + return + case <-ctx.Done(): + b.sendErr <- ctx.Err() + return + case <-tick.C: + if b.Len() > 0 { + log.Info("sending batch because time limit exceed", zap.Int("size", b.Len())) + b.asyncSend(ctx) + } + } + } +} + +func (b *Batcher) asyncSend(ctx context.Context) { + tbls, err := b.Send(ctx) + if err != nil { + b.sendErr <- err + return + } + for _, t := range tbls { + b.outCh <- t + } +} + +type drainResult struct { + // TablesToSend are tables that would be send at this batch. + TablesToSend []CreatedTable + // BlankTablesAfterSend are tables that will be full-restored after this batch send. + BlankTablesAfterSend []CreatedTable + RewriteRules *RewriteRules + Ranges []rtree.Range +} + +func newDrainResult() drainResult { + return drainResult{ + TablesToSend: make([]CreatedTable, 0), + BlankTablesAfterSend: make([]CreatedTable, 0), + RewriteRules: EmptyRewriteRule(), + Ranges: make([]rtree.Range, 0), + } +} + +func (b *Batcher) drainRanges() drainResult { + result := newDrainResult() + + b.cachedTablesMu.Lock() + defer b.cachedTablesMu.Unlock() + + for offset, thisTable := range b.cachedTables { + thisTableLen := len(thisTable.Range) + collected := len(result.Ranges) + + result.RewriteRules.Append(*thisTable.RewriteRule) + result.TablesToSend = append(result.TablesToSend, thisTable.CreatedTable) + + // the batch is full, we should stop here! + // we use strictly greater than because when we send a batch at equal, the offset should plus one. + // (because the last table is sent, we should put it in emptyTables), and this will intrduce extra complex. + if thisTableLen+collected > b.batchSizeThreshold { + drainSize := b.batchSizeThreshold - collected + thisTableRanges := thisTable.Range + + var drained []rtree.Range + drained, b.cachedTables[offset].Range = thisTableRanges[:drainSize], thisTableRanges[drainSize:] + log.Debug("draining partial table to batch", + zap.Stringer("table", thisTable.Table.Name), + zap.Stringer("database", thisTable.OldTable.Db.Name), + zap.Int("size", thisTableLen), + zap.Int("drained", drainSize), + ) + result.Ranges = append(result.Ranges, drained...) + b.cachedTables = b.cachedTables[offset:] + atomic.AddInt32(&b.size, -int32(len(drained))) + return result + } + + result.BlankTablesAfterSend = append(result.BlankTablesAfterSend, thisTable.CreatedTable) + // let's 'drain' the ranges of current table. This op must not make the batch full. + result.Ranges = append(result.Ranges, thisTable.Range...) + // let's reduce the batcher size each time, to make a consistence of batcher's size. + atomic.AddInt32(&b.size, -int32(len(thisTable.Range))) + // clear the table length. + b.cachedTables[offset].Range = []rtree.Range{} + log.Debug("draining table to batch", + zap.Stringer("table", thisTable.Table.Name), + zap.Stringer("database", thisTable.OldTable.Db.Name), + zap.Int("size", thisTableLen), + ) + } + + // all tables are drained. + b.cachedTables = []TableWithRange{} + return result +} + +// Send sends all pending requests in the batcher. +// returns tables sent FULLY in the current batch. +func (b *Batcher) Send(ctx context.Context) ([]CreatedTable, error) { + drainResult := b.drainRanges() + tbs := drainResult.TablesToSend + ranges := drainResult.Ranges + tableNames := make([]string, 0, len(tbs)) + for _, t := range tbs { + tableNames = append(tableNames, fmt.Sprintf("%s.%s", t.OldTable.Db.Name, t.OldTable.Info.Name)) + } + log.Debug("do batch send", + zap.Strings("tables", tableNames), + zap.Int("ranges", len(ranges)), + ) + if err := b.sender.RestoreBatch(ctx, ranges, drainResult.RewriteRules); err != nil { + return nil, err + } + return drainResult.BlankTablesAfterSend, nil +} + +func (b *Batcher) sendIfFull(ctx context.Context) { + // never collect the send batch request message. + for b.Len() >= b.batchSizeThreshold { + log.Info("sending batch because batcher is full", zap.Int("size", b.Len())) + b.asyncSend(ctx) + } +} + +// Add adds a task to the Batcher. +func (b *Batcher) Add(ctx context.Context, tbs TableWithRange) { + b.cachedTablesMu.Lock() + log.Debug("adding table to batch", + zap.Stringer("table", tbs.Table.Name), + zap.Stringer("database", tbs.OldTable.Db.Name), + zap.Int64("old id", tbs.OldTable.Info.ID), + zap.Int64("new id", tbs.Table.ID), + zap.Int("table size", len(tbs.Range)), + zap.Int("batch size", b.Len()), + ) + b.cachedTables = append(b.cachedTables, tbs) + b.rewriteRules.Append(*tbs.RewriteRule) + atomic.AddInt32(&b.size, int32(len(tbs.Range))) + b.cachedTablesMu.Unlock() + + b.sendIfFull(ctx) +} + +// Close closes the batcher, sending all pending requests, close updateCh. +func (b *Batcher) Close(ctx context.Context) { + log.Info("sending batch lastly on close.", zap.Int("size", b.Len())) + for b.Len() > 0 { + b.asyncSend(ctx) + } + b.DisableAutoCommit(ctx) + close(b.outCh) + b.sender.Close() +} diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index 9ffdafa0b..8c9373b2f 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -4,10 +4,6 @@ package restore import ( "context" - "fmt" - "sync" - "sync/atomic" - "time" "github.com/pingcap/errors" "github.com/pingcap/kvproto/pkg/backup" @@ -40,21 +36,6 @@ type TableWithRange struct { Range []rtree.Range } -// Batcher collects ranges to restore and send batching split/ingest request. -type Batcher struct { - cachedTables []TableWithRange - cachedTablesMu *sync.Mutex - rewriteRules *RewriteRules - - // joiner is for joining the background batch sender. - joiner chan<- struct{} - sendErr chan<- error - outCh chan<- CreatedTable - sender BatchSender - batchSizeThreshold int - size int32 -} - // SetThreshold sets the threshold that how big the batch size reaching need to send batch. // note this function isn't goroutine safe yet, // just set threshold before anything starts(e.g. EnableAutoCommit), please. @@ -70,18 +51,13 @@ func Exhaust(ec <-chan error) []error { case err := <-ec: out = append(out, err) default: - // errCh will *never* closed(ya see, it has multi sender-part), + // errCh will NEVER be closed(ya see, it has multi sender-part), // so we just consume the current backlog of this cannel, then return. return out } } } -// Len calculate the current size of this batcher. -func (b *Batcher) Len() int { - return int(atomic.LoadInt32(&b.size)) -} - // BatchSender is the abstract of how the batcher send a batch. type BatchSender interface { // RestoreBatch will send the restore request. @@ -144,192 +120,3 @@ func (b *tikvSender) RestoreBatch(ctx context.Context, ranges []rtree.Range, rew func (b *tikvSender) Close() { // don't close update channel here, since we may need it then. } - -// NewBatcher creates a new batcher by client and updateCh. -// this batcher will work background, send batches per second, or batch size reaches limit. -// and it will emit full-restored tables to the output channel returned. -func NewBatcher( - sender BatchSender, - errCh chan<- error, -) (*Batcher, <-chan CreatedTable) { - output := make(chan CreatedTable, defaultBatcherOutputChannelSize) - b := &Batcher{ - rewriteRules: EmptyRewriteRule(), - sendErr: errCh, - outCh: output, - sender: sender, - cachedTablesMu: new(sync.Mutex), - batchSizeThreshold: 1, - } - return b, output -} - -// EnableAutoCommit enables the batcher commit batch periodicity even batcher size isn't big enough. -// we make this function for disable AutoCommit in some case. -func (b *Batcher) EnableAutoCommit(ctx context.Context, delay time.Duration) { - if b.joiner != nil { - log.Warn("enable auto commit on a batcher that is enabled auto commit, nothing will happen") - log.Info("if desire, please disable auto commit firstly") - } - joiner := make(chan struct{}) - go b.workLoop(ctx, joiner, delay) - b.joiner = joiner -} - -// DisableAutoCommit blocks the current goroutine until the worker can gracefully stop, -// and then disable auto commit. -func (b *Batcher) DisableAutoCommit(ctx context.Context) { - b.joinWorker() - b.joiner = nil -} - -// joinWorker blocks the current goroutine until the worker can gracefully stop. -// return immediately when auto commit disabled. -func (b *Batcher) joinWorker() { - if b.joiner != nil { - log.Info("gracefully stoping worker goroutine") - b.joiner <- struct{}{} - log.Info("gracefully stopped worker goroutine") - } -} - -func (b *Batcher) workLoop(ctx context.Context, joiner <-chan struct{}, delay time.Duration) { - tick := time.NewTicker(delay) - defer tick.Stop() - for { - select { - case <-joiner: - log.Debug("graceful stop signal received") - return - case <-ctx.Done(): - b.sendErr <- ctx.Err() - return - case <-tick.C: - if b.Len() > 0 { - log.Info("sending batch because time limit exceed", zap.Int("size", b.Len())) - b.asyncSend(ctx) - } - } - } -} - -func (b *Batcher) asyncSend(ctx context.Context) { - tbls, err := b.Send(ctx) - if err != nil { - b.sendErr <- err - return - } - for _, t := range tbls { - b.outCh <- t - } -} - -func (b *Batcher) drainRanges() ( - ranges []rtree.Range, - emptyTables []CreatedTable, - rewriteRules *RewriteRules, -) { - b.cachedTablesMu.Lock() - rewriteRules = EmptyRewriteRule() - defer b.cachedTablesMu.Unlock() - - for offset, thisTable := range b.cachedTables { - thisTableLen := len(thisTable.Range) - collected := len(ranges) - rewriteRules.Append(*thisTable.RewriteRule) - - // the batch is full, we should stop here! - // we use strictly greater than because when we send a batch at equal, the offset should plus one. - // (because the last table is sent, we should put it in emptyTables), and this will intrduce extra complex. - if thisTableLen+collected > b.batchSizeThreshold { - drainSize := b.batchSizeThreshold - collected - thisTableRanges := thisTable.Range - - var drained []rtree.Range - drained, b.cachedTables[offset].Range = thisTableRanges[:drainSize], thisTableRanges[drainSize:] - log.Debug("draining partial table to batch", - zap.Stringer("table", thisTable.Table.Name), - zap.Stringer("database", thisTable.OldTable.Db.Name), - zap.Int("size", thisTableLen), - zap.Int("drained", drainSize), - ) - ranges = append(ranges, drained...) - b.cachedTables = b.cachedTables[offset:] - atomic.AddInt32(&b.size, -int32(len(drained))) - return ranges, emptyTables, rewriteRules - } - - emptyTables = append(emptyTables, thisTable.CreatedTable) - // let's 'drain' the ranges of current table. This op must not make the batch full. - ranges = append(ranges, thisTable.Range...) - // let's reduce the batcher size each time, to make a consitance view of - atomic.AddInt32(&b.size, -int32(len(thisTable.Range))) - // clear the table length. - b.cachedTables[offset].Range = []rtree.Range{} - log.Debug("draining table to batch", - zap.Stringer("table", thisTable.Table.Name), - zap.Stringer("database", thisTable.OldTable.Db.Name), - zap.Int("size", thisTableLen), - ) - } - - // all tables are drained. - b.cachedTables = []TableWithRange{} - return ranges, emptyTables, rewriteRules -} - -// Send sends all pending requests in the batcher. -// returns tables sent in the current batch. -func (b *Batcher) Send(ctx context.Context) ([]CreatedTable, error) { - ranges, tbs, rewriteRules := b.drainRanges() - tableNames := make([]string, 0, len(tbs)) - for _, t := range tbs { - tableNames = append(tableNames, fmt.Sprintf("%s.%s", t.OldTable.Db.Name, t.OldTable.Info.Name)) - } - log.Debug("do batch send", - zap.Strings("tables", tableNames), - zap.Int("ranges", len(ranges)), - ) - if err := b.sender.RestoreBatch(ctx, ranges, rewriteRules); err != nil { - return nil, err - } - return tbs, nil -} - -func (b *Batcher) sendIfFull(ctx context.Context) { - // never collect the send batch request message. - for b.Len() >= b.batchSizeThreshold { - log.Info("sending batch because batcher is full", zap.Int("size", b.Len())) - b.asyncSend(ctx) - } -} - -// Add adds a task to the Batcher. -func (b *Batcher) Add(ctx context.Context, tbs TableWithRange) { - b.cachedTablesMu.Lock() - log.Debug("adding table to batch", - zap.Stringer("table", tbs.Table.Name), - zap.Stringer("database", tbs.OldTable.Db.Name), - zap.Int64("old id", tbs.OldTable.Info.ID), - zap.Int64("new id", tbs.Table.ID), - zap.Int("table size", len(tbs.Range)), - zap.Int("batch size", b.Len()), - ) - b.cachedTables = append(b.cachedTables, tbs) - b.rewriteRules.Append(*tbs.RewriteRule) - atomic.AddInt32(&b.size, int32(len(tbs.Range))) - b.cachedTablesMu.Unlock() - - b.sendIfFull(ctx) -} - -// Close closes the batcher, sending all pending requests, close updateCh. -func (b *Batcher) Close(ctx context.Context) { - log.Info("sending batch lastly on close.", zap.Int("size", b.Len())) - for b.Len() > 0 { - b.asyncSend(ctx) - } - b.DisableAutoCommit(ctx) - close(b.outCh) - b.sender.Close() -} From b0dd3550d809955808a0794ea99ae38d3d0ed40c Mon Sep 17 00:00:00 2001 From: yujuncen Date: Fri, 29 May 2020 11:17:50 +0800 Subject: [PATCH 33/52] task: don't return imediately when files is empty. --- pkg/restore/batcher.go | 6 +-- pkg/task/restore.go | 94 +++++++++++++++++++++--------------------- 2 files changed, 48 insertions(+), 52 deletions(-) diff --git a/pkg/restore/batcher.go b/pkg/restore/batcher.go index fa8077e80..03e68e99f 100644 --- a/pkg/restore/batcher.go +++ b/pkg/restore/batcher.go @@ -77,9 +77,9 @@ func (b *Batcher) DisableAutoCommit(ctx context.Context) { // return immediately when auto commit disabled. func (b *Batcher) joinWorker() { if b.joiner != nil { - log.Info("gracefully stoping worker goroutine") + log.Debug("gracefully stoping worker goroutine") b.joiner <- struct{}{} - log.Info("gracefully stopped worker goroutine") + log.Debug("gracefully stopped worker goroutine") } } @@ -169,7 +169,6 @@ func (b *Batcher) drainRanges() drainResult { result.BlankTablesAfterSend = append(result.BlankTablesAfterSend, thisTable.CreatedTable) // let's 'drain' the ranges of current table. This op must not make the batch full. result.Ranges = append(result.Ranges, thisTable.Range...) - // let's reduce the batcher size each time, to make a consistence of batcher's size. atomic.AddInt32(&b.size, -int32(len(thisTable.Range))) // clear the table length. b.cachedTables[offset].Range = []rtree.Range{} @@ -206,7 +205,6 @@ func (b *Batcher) Send(ctx context.Context) ([]CreatedTable, error) { } func (b *Batcher) sendIfFull(ctx context.Context) { - // never collect the send batch request message. for b.Len() >= b.batchSizeThreshold { log.Info("sending batch because batcher is full", zap.Int("size", b.Len())) b.asyncSend(ctx) diff --git a/pkg/task/restore.go b/pkg/task/restore.go index 98b5eda0f..e4b2df0dc 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -193,7 +193,7 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf if len(files) == 0 { log.Info("no files, empty databases and tables are restored") summary.SetSuccessStatus(true) - return nil + // don't return immediately, wait all pipeline done. } placementRules, err := client.GetPlacementRules(cfg.PD) @@ -256,7 +256,7 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf batcher, afterRestoreStream := restore.NewBatcher(sender, errCh) batcher.SetThreshold(batchSize) batcher.EnableAutoCommit(ctx, time.Second) - goRestore(ctx, rangeStream, placementRules, client, batcher, errCh) + go restoreTableStream(ctx, rangeStream, placementRules, client, batcher, errCh) var finish <-chan struct{} // Checksum @@ -589,8 +589,9 @@ func enableTiDBConfig() { config.StoreGlobalConfig(conf) } -// goRestore forks a goroutine to do the restore process. -func goRestore( +// restoreTableStream blocks current goroutine and restore a stream of tables, +// by send tables to batcher. +func restoreTableStream( ctx context.Context, inputCh <-chan restore.TableWithRange, rules []placement.Rule, @@ -598,54 +599,51 @@ func goRestore( batcher *restore.Batcher, errCh chan<- error, ) { - go func() { - // We cache old tables so that we can 'batch' recover TiFlash and tables. - oldTables := []*utils.Table{} - newTables := []*model.TableInfo{} - defer func() { - // when things done, we must clean pending requests. - batcher.Close(ctx) - log.Info("doing postwork", - zap.Int("new tables", len(newTables)), - zap.Int("old tables", len(oldTables)), - ) - splitPostWork(ctx, client, newTables) - if err := client.RecoverTiFlashReplica(oldTables); err != nil { - log.Error("failed on recover TiFlash replicas", zap.Error(err)) + // We cache old tables so that we can 'batch' recover TiFlash and tables. + oldTables := []*utils.Table{} + newTables := []*model.TableInfo{} + defer func() { + // when things done, we must clean pending requests. + batcher.Close(ctx) + log.Info("doing postwork", + zap.Int("new tables", len(newTables)), + zap.Int("old tables", len(oldTables)), + ) + splitPostWork(ctx, client, newTables) + if err := client.RecoverTiFlashReplica(oldTables); err != nil { + log.Error("failed on recover TiFlash replicas", zap.Error(err)) + errCh <- err + } + }() + + for { + select { + case <-ctx.Done(): + errCh <- ctx.Err() + return + case t, ok := <-inputCh: + if !ok { + return + } + tiFlashRep, err := client.RemoveTiFlashOfTable(t.CreatedTable, rules) + if err != nil { + log.Error("failed on remove TiFlash replicas", zap.Error(err)) errCh <- err + return } - }() + t.OldTable.TiFlashReplicas = tiFlashRep + oldTables = append(oldTables, t.OldTable) - for { - select { - case <-ctx.Done(): - errCh <- ctx.Err() + // Reuse of splitPrepareWork would be safe. + // But this operation sometime would be costly. + if err := splitPrepareWork(ctx, client, []*model.TableInfo{t.Table}); err != nil { + log.Error("failed on set online restore placement rules", zap.Error(err)) + errCh <- err return - case t, ok := <-inputCh: - if !ok { - return - } - // Omit the number of TiFlash have been removed. - tiFlashRep, err := client.RemoveTiFlashOfTable(t.CreatedTable, rules) - if err != nil { - log.Error("failed on remove TiFlash replicas", zap.Error(err)) - errCh <- err - return - } - t.OldTable.TiFlashReplicas = tiFlashRep - oldTables = append(oldTables, t.OldTable) - - // Reuse of splitPrepareWork would be safe. - // But this operation sometime would be costly. - if err := splitPrepareWork(ctx, client, []*model.TableInfo{t.Table}); err != nil { - log.Error("failed on set online restore placement rules", zap.Error(err)) - errCh <- err - return - } - newTables = append(newTables, t.Table) - - batcher.Add(ctx, t) } + newTables = append(newTables, t.Table) + + batcher.Add(ctx, t) } - }() + } } From b53526ee037ef53956df772da6200186a66f1e8a Mon Sep 17 00:00:00 2001 From: yujuncen Date: Tue, 2 Jun 2020 21:47:42 +0800 Subject: [PATCH 34/52] restore,task: do some refactor We move `splitPrepareWork` into a struct named `ContextManager`, so that we can batchly set placement rules on online restore. --- pkg/restore/batcher.go | 10 ++++ pkg/restore/batcher_test.go | 92 +++++++++++++++++++++++++++++------ pkg/restore/pipeline_items.go | 78 +++++++++++++++++++++++++++++ pkg/task/restore.go | 46 ++---------------- 4 files changed, 167 insertions(+), 59 deletions(-) diff --git a/pkg/restore/batcher.go b/pkg/restore/batcher.go index 03e68e99f..4a19ebf0d 100644 --- a/pkg/restore/batcher.go +++ b/pkg/restore/batcher.go @@ -26,6 +26,7 @@ type Batcher struct { sendErr chan<- error outCh chan<- CreatedTable sender BatchSender + manager ContextManager batchSizeThreshold int size int32 } @@ -40,6 +41,7 @@ func (b *Batcher) Len() int { // and it will emit full-restored tables to the output channel returned. func NewBatcher( sender BatchSender, + manager ContextManager, errCh chan<- error, ) (*Batcher, <-chan CreatedTable) { output := make(chan CreatedTable, defaultBatcherOutputChannelSize) @@ -48,6 +50,7 @@ func NewBatcher( sendErr: errCh, outCh: output, sender: sender, + manager: manager, cachedTablesMu: new(sync.Mutex), batchSizeThreshold: 1, } @@ -198,9 +201,16 @@ func (b *Batcher) Send(ctx context.Context) ([]CreatedTable, error) { zap.Strings("tables", tableNames), zap.Int("ranges", len(ranges)), ) + + if err := b.manager.Enter(ctx, drainResult.TablesToSend); err != nil { + return nil, err + } if err := b.sender.RestoreBatch(ctx, ranges, drainResult.RewriteRules); err != nil { return nil, err } + if err := b.manager.Leave(ctx, drainResult.BlankTablesAfterSend); err != nil { + return nil, err + } return drainResult.BlankTablesAfterSend, nil } diff --git a/pkg/restore/batcher_test.go b/pkg/restore/batcher_test.go index 9ab0254ad..8d0d114b5 100644 --- a/pkg/restore/batcher_test.go +++ b/pkg/restore/batcher_test.go @@ -9,6 +9,8 @@ import ( "time" "github.com/pingcap/kvproto/pkg/import_sstpb" + "github.com/pingcap/log" + "go.uber.org/zap" "github.com/pingcap/br/pkg/restore" @@ -57,6 +59,51 @@ func newDrySender() *drySender { } } +type recordCurrentTableManager map[int64]bool + +func newMockManager() recordCurrentTableManager { + return make(recordCurrentTableManager) +} + +func (manager recordCurrentTableManager) Enter(_ context.Context, tables []restore.CreatedTable) error { + for _, t := range tables { + log.Info("entering", zap.Int64("table ID", t.Table.ID)) + manager[t.Table.ID] = true + } + return nil +} + +func (manager recordCurrentTableManager) Leave(_ context.Context, tables []restore.CreatedTable) error { + for _, t := range tables { + if !manager[t.Table.ID] { + return errors.Errorf("Table %d is removed before added", t.Table.ID) + } + log.Info("leaving", zap.Int64("table ID", t.Table.ID)) + manager[t.Table.ID] = false + } + return nil +} + +func (manager recordCurrentTableManager) Has(tables ...restore.TableWithRange) bool { + ids := make([]int64, 0, len(tables)) + currentIDs := make([]int64, 0, len(manager)) + for _, t := range tables { + ids = append(ids, t.Table.ID) + } + for id, contains := range manager { + if contains { + currentIDs = append(currentIDs, id) + } + } + log.Info("testing", zap.Int64s("should has ID", ids), zap.Int64s("has ID", currentIDs)) + for _, i := range ids { + if !manager[i] { + return false + } + } + return true +} + func (d *drySender) HasRewriteRuleOfKey(prefix string) bool { for _, rule := range d.rewriteRules.Table { if bytes.Equal([]byte(prefix), rule.OldKeyPrefix) { @@ -125,7 +172,8 @@ func join(nested [][]rtree.Range) (plain []rtree.Range) { func (*testBatcherSuite) TestBasic(c *C) { errCh := make(chan error, 8) sender := newDrySender() - batcher, _ := restore.NewBatcher(sender, errCh) + manager := newMockManager() + batcher, _ := restore.NewBatcher(sender, manager, errCh) batcher.SetThreshold(2) tableRanges := [][]rtree.Range{ @@ -138,12 +186,12 @@ func (*testBatcherSuite) TestBasic(c *C) { for i, ranges := range tableRanges { simpleTables = append(simpleTables, fakeTableWithRange(int64(i), ranges)) } - + ctx := context.Background() for _, tbl := range simpleTables { - batcher.Add(context.TODO(), tbl) + batcher.Add(ctx, tbl) } - batcher.Close(context.TODO()) + batcher.Close(ctx) rngs := sender.Ranges() c.Assert(join(tableRanges), DeepEquals, rngs) @@ -157,22 +205,24 @@ func (*testBatcherSuite) TestBasic(c *C) { func (*testBatcherSuite) TestAutoSend(c *C) { errCh := make(chan error, 8) sender := newDrySender() - batcher, _ := restore.NewBatcher(sender, errCh) + manager := newMockManager() + batcher, _ := restore.NewBatcher(sender, manager, errCh) batcher.SetThreshold(1024) simpleTable := fakeTableWithRange(1, []rtree.Range{fakeRange("caa", "cab"), fakeRange("cac", "cad")}) - batcher.Add(context.TODO(), simpleTable) + ctx := context.Background() + batcher.Add(ctx, simpleTable) c.Assert(batcher.Len(), Greater, 0) // enable auto commit. - batcher.EnableAutoCommit(context.TODO(), 100*time.Millisecond) + batcher.EnableAutoCommit(ctx, 100*time.Millisecond) time.Sleep(200 * time.Millisecond) c.Assert(sender.RangeLen(), Greater, 0) c.Assert(batcher.Len(), Equals, 0) - batcher.Close(context.TODO()) + batcher.Close(ctx) rngs := sender.Ranges() c.Assert(rngs, DeepEquals, simpleTable.Range) @@ -186,7 +236,8 @@ func (*testBatcherSuite) TestAutoSend(c *C) { func (*testBatcherSuite) TestSplitRangeOnSameTable(c *C) { errCh := make(chan error, 8) sender := newDrySender() - batcher, _ := restore.NewBatcher(sender, errCh) + manager := newMockManager() + batcher, _ := restore.NewBatcher(sender, manager, errCh) batcher.SetThreshold(2) simpleTable := fakeTableWithRange(1, []rtree.Range{ @@ -195,10 +246,11 @@ func (*testBatcherSuite) TestSplitRangeOnSameTable(c *C) { fakeRange("caj", "cak"), fakeRange("cal", "cam"), fakeRange("can", "cao"), fakeRange("cap", "caq")}) - batcher.Add(context.TODO(), simpleTable) + ctx := context.TODO() + batcher.Add(ctx, simpleTable) c.Assert(sender.BatchCount(), Equals, 4) - batcher.Close(context.TODO()) + batcher.Close(ctx) rngs := sender.Ranges() c.Assert(rngs, DeepEquals, simpleTable.Range) @@ -234,7 +286,8 @@ func (*testBatcherSuite) TestRewriteRules(c *C) { ctx := context.TODO() errCh := make(chan error, 8) sender := newDrySender() - batcher, _ := restore.NewBatcher(sender, errCh) + manager := newMockManager() + batcher, _ := restore.NewBatcher(sender, manager, errCh) batcher.SetThreshold(2) batcher.Add(ctx, tables[0]) @@ -242,6 +295,7 @@ func (*testBatcherSuite) TestRewriteRules(c *C) { batcher.Add(ctx, tables[1]) c.Assert(sender.HasRewriteRuleOfKey("a"), IsTrue) c.Assert(sender.HasRewriteRuleOfKey("b"), IsTrue) + c.Assert(manager.Has(tables[1]), IsTrue) c.Assert(sender.RangeLen(), Equals, 2) batcher.Add(ctx, tables[2]) batcher.Close(ctx) @@ -258,7 +312,8 @@ func (*testBatcherSuite) TestRewriteRules(c *C) { func (*testBatcherSuite) TestBatcherLen(c *C) { errCh := make(chan error, 8) sender := newDrySender() - batcher, _ := restore.NewBatcher(sender, errCh) + manager := newMockManager() + batcher, _ := restore.NewBatcher(sender, manager, errCh) batcher.SetThreshold(15) simpleTable := fakeTableWithRange(1, []rtree.Range{ @@ -273,11 +328,16 @@ func (*testBatcherSuite) TestBatcherLen(c *C) { fakeRange("caj", "cak"), fakeRange("cal", "cam"), fakeRange("can", "cao"), fakeRange("cap", "caq")}) - batcher.Add(context.TODO(), simpleTable) + ctx := context.TODO() + batcher.Add(ctx, simpleTable) c.Assert(batcher.Len(), Equals, 8) - batcher.Add(context.TODO(), simpleTable2) + c.Assert(manager.Has(simpleTable), IsFalse) + c.Assert(manager.Has(simpleTable2), IsFalse) + batcher.Add(ctx, simpleTable2) c.Assert(batcher.Len(), Equals, 1) - batcher.Close(context.TODO()) + c.Assert(manager.Has(simpleTable2), IsTrue) + c.Assert(manager.Has(simpleTable), IsFalse) + batcher.Close(ctx) c.Assert(batcher.Len(), Equals, 0) select { diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index 8c9373b2f..3863a2305 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -21,6 +21,84 @@ const ( defaultBatcherOutputChannelSize = 1024 ) +// ContextManager is the struct to manage a TiKV 'context' for restore. +// Batcher will call Enter when any table should be restore on batch, +// so you can do some prepare work here(e.g. set placement rules for online restore). +type ContextManager interface { + // Enter make some tables 'enter' this context(a.k.a., prepare for restore). + Enter(ctx context.Context, tables []CreatedTable) error + // Leave make some tables 'leave' this context(a.k.a., restore is done, do some post-works). + Leave(ctx context.Context, tables []CreatedTable) error +} + +// NewBRContextManager makes a BR context manager, that is, +// set placement rules for online restore when enter(see ), +// unset them when leave. +func NewBRContextManager(client *Client) ContextManager { + return &brContextManager{ + client: client, + } +} + +type brContextManager struct { + client *Client + + // This 'set' of table ID allow us handle each table just once. + hasTable map[int64]bool +} + +func (manager *brContextManager) Enter(ctx context.Context, tables []CreatedTable) error { + placementRuleTables := make([]*model.TableInfo, 0, len(tables)) + + for _, tbl := range tables { + if manager.hasTable[tbl.Table.ID] { + placementRuleTables = append(placementRuleTables, tbl.Table) + } + manager.hasTable[tbl.Table.ID] = true + } + + return splitPrepareWork(ctx, manager.client, placementRuleTables) +} + +func (manager *brContextManager) Leave(ctx context.Context, tables []CreatedTable) error { + placementRuleTables := make([]*model.TableInfo, 0, len(tables)) + + for _, table := range tables { + placementRuleTables = append(placementRuleTables, table.Table) + } + + splitPostWork(ctx, manager.client, placementRuleTables) + return nil +} + +func splitPostWork(ctx context.Context, client *Client, tables []*model.TableInfo) { + err := client.ResetPlacementRules(ctx, tables) + if err != nil { + log.Warn("reset placement rules failed", zap.Error(err)) + return + } + + err = client.ResetRestoreLabels(ctx) + if err != nil { + log.Warn("reset store labels failed", zap.Error(err)) + } +} + +func splitPrepareWork(ctx context.Context, client *Client, tables []*model.TableInfo) error { + err := client.SetupPlacementRules(ctx, tables) + if err != nil { + log.Error("setup placement rules failed", zap.Error(err)) + return errors.Trace(err) + } + + err = client.WaitPlacementSchedule(ctx, tables) + if err != nil { + log.Error("wait placement schedule failed", zap.Error(err)) + return errors.Trace(err) + } + return nil +} + // CreatedTable is a table created on restore process, // but not yet filled with data. type CreatedTable struct { diff --git a/pkg/task/restore.go b/pkg/task/restore.go index a789fc55f..71e68bf6b 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -10,7 +10,6 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/kvproto/pkg/backup" "github.com/pingcap/log" - "github.com/pingcap/parser/model" "github.com/pingcap/pd/v4/server/schedule/placement" "github.com/pingcap/tidb-tools/pkg/filter" "github.com/pingcap/tidb/config" @@ -253,7 +252,8 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf if err != nil { return err } - batcher, afterRestoreStream := restore.NewBatcher(sender, errCh) + manager := restore.NewBRContextManager(client) + batcher, afterRestoreStream := restore.NewBatcher(sender, manager, errCh) batcher.SetThreshold(batchSize) batcher.EnableAutoCommit(ctx, time.Second) go restoreTableStream(ctx, rangeStream, placementRules, client, batcher, errCh) @@ -491,34 +491,6 @@ func addPDLeaderScheduler(ctx context.Context, mgr *conn.Mgr, removedSchedulers return nil } -func splitPrepareWork(ctx context.Context, client *restore.Client, tables []*model.TableInfo) error { - err := client.SetupPlacementRules(ctx, tables) - if err != nil { - log.Error("setup placement rules failed", zap.Error(err)) - return errors.Trace(err) - } - - err = client.WaitPlacementSchedule(ctx, tables) - if err != nil { - log.Error("wait placement schedule failed", zap.Error(err)) - return errors.Trace(err) - } - return nil -} - -func splitPostWork(ctx context.Context, client *restore.Client, tables []*model.TableInfo) { - err := client.ResetPlacementRules(ctx, tables) - if err != nil { - log.Warn("reset placement rules failed", zap.Error(err)) - return - } - - err = client.ResetRestoreLabels(ctx) - if err != nil { - log.Warn("reset store labels failed", zap.Error(err)) - } -} - // RunRestoreTiflashReplica restores the replica of tiflash saved in the last restore. func RunRestoreTiflashReplica(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConfig) error { defer summary.Summary(cmdName) @@ -601,15 +573,12 @@ func restoreTableStream( ) { // We cache old tables so that we can 'batch' recover TiFlash and tables. oldTables := []*utils.Table{} - newTables := []*model.TableInfo{} defer func() { // when things done, we must clean pending requests. batcher.Close(ctx) log.Info("doing postwork", - zap.Int("new tables", len(newTables)), - zap.Int("old tables", len(oldTables)), + zap.Int("table count", len(oldTables)), ) - splitPostWork(ctx, client, newTables) if err := client.RecoverTiFlashReplica(oldTables); err != nil { log.Error("failed on recover TiFlash replicas", zap.Error(err)) errCh <- err @@ -634,15 +603,6 @@ func restoreTableStream( t.OldTable.TiFlashReplicas = tiFlashRep oldTables = append(oldTables, t.OldTable) - // Reuse of splitPrepareWork would be safe. - // But this operation sometime would be costly. - if err := splitPrepareWork(ctx, client, []*model.TableInfo{t.Table}); err != nil { - log.Error("failed on set online restore placement rules", zap.Error(err)) - errCh <- err - return - } - newTables = append(newTables, t.Table) - batcher.Add(ctx, t) } } From dd6af602cfdb592fd7adc6197959e6ca383fefbb Mon Sep 17 00:00:00 2001 From: yujuncen Date: Tue, 2 Jun 2020 22:02:18 +0800 Subject: [PATCH 35/52] restore: fix a shaming bug... :| --- pkg/restore/pipeline_items.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index 3863a2305..233864f2d 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -37,6 +37,8 @@ type ContextManager interface { func NewBRContextManager(client *Client) ContextManager { return &brContextManager{ client: client, + + hasTable: make(map[int64]bool), } } From 3303cefde2a5ce2c8e918b52a02e05761633983b Mon Sep 17 00:00:00 2001 From: yujuncen Date: Fri, 5 Jun 2020 12:44:30 +0800 Subject: [PATCH 36/52] task,restore: panic on file broken --- pkg/restore/util.go | 4 ++-- pkg/task/restore.go | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/pkg/restore/util.go b/pkg/restore/util.go index 96c4d8ae8..d9ce7eceb 100644 --- a/pkg/restore/util.go +++ b/pkg/restore/util.go @@ -180,13 +180,13 @@ func MapTableToFiles(files []*backup.File) map[int64][]*backup.File { tableID := tablecodec.DecodeTableID(file.GetStartKey()) tableEndID := tablecodec.DecodeTableID(file.GetEndKey()) if tableID != tableEndID { - log.Error("key range spread between many files.", + log.Panic("key range spread between many files.", zap.String("file name", file.Name), zap.Binary("start key", file.GetStartKey()), zap.Binary("end key", file.GetEndKey())) } if tableID == 0 { - log.Error("invalid table key of file", + log.Panic("invalid table key of file", zap.String("file name", file.Name), zap.Binary("start key", file.GetStartKey()), zap.Binary("end key", file.GetEndKey())) diff --git a/pkg/task/restore.go b/pkg/task/restore.go index 7fade6b0f..1263d217c 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -265,7 +265,7 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf ctx, afterRestoreStream, mgr.GetTiKV().GetClient(), errCh, updateCh) } else { // when user skip checksum, just collect tables, and drop them. - finish = dropToBlackhole(ctx, afterRestoreStream, errCh) + finish = dropToBlackhole(ctx, afterRestoreStream, errCh, updateCh) } select { @@ -274,7 +274,7 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf case <-finish: } - // If any error happened, return now, don't execute checksum. + // If any error happened, return now. if err != nil { return err } @@ -289,6 +289,7 @@ func dropToBlackhole( ctx context.Context, tableStream <-chan restore.CreatedTable, errCh chan<- error, + updateCh glue.Progress, ) <-chan struct{} { outCh := make(chan struct{}, 1) go func() { @@ -304,6 +305,7 @@ func dropToBlackhole( if !ok { return } + updateCh.Inc() log.Info("skipping checksum of table because user config", zap.Stringer("database", tbl.OldTable.Db.Name), zap.Stringer("table", tbl.Table.Name), From a31e44c6f350423e9c0032a2d2112806a6b91245 Mon Sep 17 00:00:00 2001 From: yujuncen Date: Fri, 5 Jun 2020 14:38:01 +0800 Subject: [PATCH 37/52] restore: record tiflash count to disk when removed --- pkg/restore/client.go | 117 +++++++++++++++++++++++------------------- 1 file changed, 64 insertions(+), 53 deletions(-) diff --git a/pkg/restore/client.go b/pkg/restore/client.go index 68b1a105c..171e25510 100644 --- a/pkg/restore/client.go +++ b/pkg/restore/client.go @@ -28,6 +28,7 @@ import ( "github.com/pingcap/tidb/store/tikv/oracle" "github.com/pingcap/tidb/tablecodec" "github.com/pingcap/tidb/util/codec" + "go.uber.org/multierr" "go.uber.org/zap" "google.golang.org/grpc" "google.golang.org/grpc/backoff" @@ -65,9 +66,16 @@ type Client struct { isOnline bool noSchema bool hasSpeedLimited bool + // Those fields should be removed after we have FULLY supportted TiFlash. + // we place this field here to make a 'good' memory align, but mainly make golang-ci happy :) + tiFlashRecordUpdated bool restoreStores []uint64 + // tables that has TiFlash and those TiFlash have been removed, should be written to disk. + // Those fields should be removed after we have FULLY supportted TiFlash. + tablesRemovedTiFlash []*backup.Schema + storage storage.ExternalStorage backend *backup.StorageBackend } @@ -414,61 +422,27 @@ func (rc *Client) GoCreateTables( return outCh } -// RemoveTiFlashReplica removes all the tiflash replicas of a table -// TODO: remove this after tiflash supports restore. -func (rc *Client) RemoveTiFlashReplica( - tables []*utils.Table, newTables []*model.TableInfo, placementRules []placement.Rule) error { - schemas := make([]*backup.Schema, 0, len(tables)) - var updateReplica bool - // must use new table id to search placement rules - // here newTables and tables must have same order - for i, table := range tables { - if rule := utils.SearchPlacementRule(newTables[i].ID, placementRules, placement.Learner); rule != nil { - table.TiFlashReplicas = rule.Count - updateReplica = true - } - tableData, err := json.Marshal(newTables[i]) - if err != nil { - return errors.Trace(err) - } - dbData, err := json.Marshal(table.Db) - if err != nil { - return errors.Trace(err) - } - schemas = append(schemas, &backup.Schema{ - Db: dbData, - Table: tableData, - Crc64Xor: table.Crc64Xor, - TotalKvs: table.TotalKvs, - TotalBytes: table.TotalBytes, - TiflashReplicas: uint32(table.TiFlashReplicas), - }) +// makeTiFlashOfTableRecord make a 'record' repsenting TiFlash of a table that has been removed. +// We doesn't record table ID here because when restore TiFlash replicas, +// we use `ALTER TABLE db.tbl SET TIFLASH_REPLICA = xxx` DDL, instead of use some internal TiDB API. +func makeTiFlashOfTableRecord(table *utils.Table, replica int) (*backup.Schema, error) { + tableData, err := json.Marshal(table.Info) + if err != nil { + return nil, errors.Trace(err) } - - if updateReplica { - // Update backup meta - rc.backupMeta.Schemas = schemas - backupMetaData, err := proto.Marshal(rc.backupMeta) - if err != nil { - return errors.Trace(err) - } - backendURL := storage.FormatBackendURL(rc.backend) - log.Info("update backup meta", zap.Stringer("path", &backendURL)) - err = rc.storage.Write(rc.ctx, utils.SavedMetaFile, backupMetaData) - if err != nil { - return errors.Trace(err) - } + dbData, err := json.Marshal(table.Db) + if err != nil { + return nil, errors.Trace(err) } - - for _, table := range tables { - if table.TiFlashReplicas > 0 { - err := rc.db.AlterTiflashReplica(rc.ctx, table, 0) - if err != nil { - return errors.Trace(err) - } - } + result := &backup.Schema{ + Db: dbData, + Table: tableData, + Crc64Xor: table.Crc64Xor, + TotalKvs: table.TotalKvs, + TotalBytes: table.TotalBytes, + TiflashReplicas: uint32(replica), } - return nil + return result, nil } // RemoveTiFlashOfTable removes TiFlash replica of some table, @@ -478,7 +452,11 @@ func (rc *Client) RemoveTiFlashReplica( func (rc *Client) RemoveTiFlashOfTable(table CreatedTable, rule []placement.Rule) (int, error) { if rule := utils.SearchPlacementRule(table.Table.ID, rule, placement.Learner); rule != nil { if rule.Count > 0 { - err := rc.db.AlterTiflashReplica(rc.ctx, table.OldTable, 0) + err := multierr.Combine( + rc.db.AlterTiflashReplica(rc.ctx, table.OldTable, 0), + rc.removeTiFlashOf(table.OldTable, rule.Count), + rc.flushTiFlashRecord(), + ) if err != nil { return 0, errors.Trace(err) } @@ -488,6 +466,39 @@ func (rc *Client) RemoveTiFlashOfTable(table CreatedTable, rule []placement.Rule return 0, nil } +func (rc *Client) removeTiFlashOf(table *utils.Table, replica int) error { + tableRecord, err := makeTiFlashOfTableRecord(table, replica) + if err != nil { + return err + } + rc.tablesRemovedTiFlash = append(rc.tablesRemovedTiFlash, tableRecord) + rc.tiFlashRecordUpdated = true + return nil +} + +func (rc *Client) flushTiFlashRecord() error { + // Today nothing to do :D + if !rc.tiFlashRecordUpdated { + return nil + } + + // should we make a deep copy here? + // currently, write things directly to backup meta is OK since there seems nobody uses it. + // But would it be better if we don't do it? + rc.backupMeta.Schemas = rc.tablesRemovedTiFlash + backupMetaData, err := proto.Marshal(rc.backupMeta) + if err != nil { + return errors.Trace(err) + } + backendURL := storage.FormatBackendURL(rc.backend) + log.Info("update backup meta", zap.Stringer("path", &backendURL)) + err = rc.storage.Write(rc.ctx, utils.SavedMetaFile, backupMetaData) + if err != nil { + return errors.Trace(err) + } + return nil +} + // RecoverTiFlashOfTable recovers TiFlash replica of some table. // TODO: remove this after tiflash supports restore. func (rc *Client) RecoverTiFlashOfTable(table *utils.Table) error { From 88c8117b13e510b8387e91ef1b869c9118b29434 Mon Sep 17 00:00:00 2001 From: yujuncen Date: Tue, 9 Jun 2020 21:55:02 +0800 Subject: [PATCH 38/52] restore,task: simplify some code, --- pkg/restore/batcher.go | 20 ++++++++++++++++++++ pkg/restore/pipeline_items.go | 11 ++++++++++- pkg/task/restore.go | 14 ++++++-------- 3 files changed, 36 insertions(+), 9 deletions(-) diff --git a/pkg/restore/batcher.go b/pkg/restore/batcher.go index 4a19ebf0d..d104e7145 100644 --- a/pkg/restore/batcher.go +++ b/pkg/restore/batcher.go @@ -197,9 +197,19 @@ func (b *Batcher) Send(ctx context.Context) ([]CreatedTable, error) { for _, t := range tbs { tableNames = append(tableNames, fmt.Sprintf("%s.%s", t.OldTable.Db.Name, t.OldTable.Info.Name)) } + totalKV := uint64(0) + totalSize := uint64(0) + for _, r := range ranges { + for _, f := range r.Files { + totalKV += f.GetTotalKvs() + totalSize += f.GetTotalBytes() + } + } log.Debug("do batch send", zap.Strings("tables", tableNames), zap.Int("ranges", len(ranges)), + zap.Uint64("total kv", totalKV), + zap.Uint64("total size", totalSize), ) if err := b.manager.Enter(ctx, drainResult.TablesToSend); err != nil { @@ -211,6 +221,16 @@ func (b *Batcher) Send(ctx context.Context) ([]CreatedTable, error) { if err := b.manager.Leave(ctx, drainResult.BlankTablesAfterSend); err != nil { return nil, err } + blankTableNames := make([]string, 0, len(tbs)) + for _, t := range drainResult.BlankTablesAfterSend { + blankTableNames = append(blankTableNames, fmt.Sprintf("%s.%s", t.OldTable.Db.Name, t.OldTable.Info.Name)) + } + if len(blankTableNames) > 0 { + log.Debug("table fully restored", + zap.Strings("tables", blankTableNames), + zap.Int("ranges", len(ranges)), + ) + } return drainResult.BlankTablesAfterSend, nil } diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index 233864f2d..87ebec8e7 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -189,9 +189,18 @@ func (b *tikvSender) RestoreBatch(ctx context.Context, ranges []rtree.Range, rew if err := b.client.RestoreFiles(files, rewriteRules, b.rejectStoreMap, b.updateCh); err != nil { return err } + totalKV := uint64(0) + totalSize := uint64(0) + for _, f := range files { + totalKV += f.GetTotalKvs() + totalSize += f.GetTotalBytes() + } + log.Debug("send batch done", - zap.Int("range count", len(ranges)), + zap.Int("ranges", len(ranges)), zap.Int("file count", len(files)), + zap.Uint64("total kv", totalKV), + zap.Uint64("total size", totalSize), ) return nil diff --git a/pkg/task/restore.go b/pkg/task/restore.go index 1263d217c..fcf06fdfd 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -8,6 +8,7 @@ import ( "time" "github.com/pingcap/errors" + "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/backup" "github.com/pingcap/log" "github.com/pingcap/pd/v4/server/schedule/placement" @@ -219,14 +220,7 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf } // Always run the post-work even on error, so we don't stuck in the import // mode or emptied schedulers - shouldRestorePostWork := true - restorePostWork := func() { - if shouldRestorePostWork { - shouldRestorePostWork = false - restorePostWork(ctx, client, mgr, clusterCfg) - } - } - defer restorePostWork() + defer restorePostWork(ctx, client, mgr, clusterCfg) // Do not reset timestamp if we are doing incremental restore, because // we are not allowed to decrease timestamp. @@ -239,6 +233,10 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf // Restore sst files in batch. batchSize := utils.ClampInt(int(cfg.Concurrency), defaultRestoreConcurrency, maxRestoreBatchSizeLimit) + failpoint.Inject("small-batch-size", func(v failpoint.Value) { + log.Info("failpoint small batch size is on", zap.Int("size", v.(int))) + batchSize = v.(int) + }) // Redirect to log if there is no log file to avoid unreadable output. updateCh := g.StartProgress( From cc43d9d655dde5b9ea523603f6d9a8022ed3a5e1 Mon Sep 17 00:00:00 2001 From: yujuncen Date: Tue, 9 Jun 2020 22:49:59 +0800 Subject: [PATCH 39/52] task,restore: fix a bug. The bug causes, when a singal table is splt into multi part of batches, it sometimes fail to checksum. --- pkg/restore/batcher.go | 118 +++++++++++++++++++++-------- pkg/restore/batcher_test.go | 46 +++++------ pkg/restore/pipeline_items.go | 9 --- pkg/task/restore.go | 6 +- tests/br_small_batch_size/run.sh | 56 ++++++++++++++ tests/br_small_batch_size/workload | 12 +++ 6 files changed, 179 insertions(+), 68 deletions(-) create mode 100755 tests/br_small_batch_size/run.sh create mode 100644 tests/br_small_batch_size/workload diff --git a/pkg/restore/batcher.go b/pkg/restore/batcher.go index d104e7145..8ebe9281f 100644 --- a/pkg/restore/batcher.go +++ b/pkg/restore/batcher.go @@ -21,10 +21,22 @@ type Batcher struct { cachedTablesMu *sync.Mutex rewriteRules *RewriteRules - // joiner is for joining the background batch sender. - joiner chan<- struct{} - sendErr chan<- error - outCh chan<- CreatedTable + // autoCommitJoiner is for joining the background batch sender. + autoCommitJoiner chan<- struct{} + // workerJoiner is also for joining the background batch sender. + workerJoiner chan<- struct{} + // workerIsDone is for waiting for worker done: that is, after we send a + // signal to workerJoiner, we must give it enough time to get things done. + // Then, it should notify us by this waitgroup. + // Use waitgroup instead of a trivial channel for farther extention. + everythingIsDone *sync.WaitGroup + // sendErr is for output error information. + sendErr chan<- error + // sendCh is for communiate with sendWorker. + sendCh chan<- struct{} + // outCh is for output the restored table, so it can be sent to do something like checksum. + outCh chan<- CreatedTable + sender BatchSender manager ContextManager batchSizeThreshold int @@ -40,53 +52,95 @@ func (b *Batcher) Len() int { // this batcher will work background, send batches per second, or batch size reaches limit. // and it will emit full-restored tables to the output channel returned. func NewBatcher( + ctx context.Context, sender BatchSender, manager ContextManager, errCh chan<- error, ) (*Batcher, <-chan CreatedTable) { output := make(chan CreatedTable, defaultBatcherOutputChannelSize) + workerJoiner := make(chan struct{}) + sendChan := make(chan struct{}, 2) b := &Batcher{ rewriteRules: EmptyRewriteRule(), sendErr: errCh, outCh: output, sender: sender, manager: manager, + sendCh: sendChan, + workerJoiner: workerJoiner, cachedTablesMu: new(sync.Mutex), + everythingIsDone: new(sync.WaitGroup), batchSizeThreshold: 1, } + b.everythingIsDone.Add(1) + go b.sendWorker(ctx, sendChan, workerJoiner) return b, output } // EnableAutoCommit enables the batcher commit batch periodicity even batcher size isn't big enough. // we make this function for disable AutoCommit in some case. func (b *Batcher) EnableAutoCommit(ctx context.Context, delay time.Duration) { - if b.joiner != nil { - log.Warn("enable auto commit on a batcher that is enabled auto commit, nothing will happen") - log.Info("if desire, please disable auto commit firstly") + if b.autoCommitJoiner != nil { + log.Warn("enable auto commit on a batcher that auto commit is enabled, nothing will happen") + log.Info("if desire(e.g. change the peroid of auto commit), please disable auto commit firstly") } joiner := make(chan struct{}) - go b.workLoop(ctx, joiner, delay) - b.joiner = joiner + go b.autoCommitWorker(ctx, joiner, delay) + b.autoCommitJoiner = joiner } // DisableAutoCommit blocks the current goroutine until the worker can gracefully stop, // and then disable auto commit. -func (b *Batcher) DisableAutoCommit(ctx context.Context) { +func (b *Batcher) DisableAutoCommit() { b.joinWorker() - b.joiner = nil + b.autoCommitJoiner = nil +} + +func (b *Batcher) waitUntilSendDone() { + b.workerJoiner <- struct{}{} + b.everythingIsDone.Wait() } // joinWorker blocks the current goroutine until the worker can gracefully stop. // return immediately when auto commit disabled. func (b *Batcher) joinWorker() { - if b.joiner != nil { + if b.autoCommitJoiner != nil { log.Debug("gracefully stoping worker goroutine") - b.joiner <- struct{}{} + b.autoCommitJoiner <- struct{}{} log.Debug("gracefully stopped worker goroutine") } } -func (b *Batcher) workLoop(ctx context.Context, joiner <-chan struct{}, delay time.Duration) { +// sendWorker is the 'worker' that send all ranges to TiKV. +func (b *Batcher) sendWorker(ctx context.Context, send <-chan struct{}, joiner <-chan struct{}) { + doSend := func() { + if b.Len() > 0 { + tbls, err := b.Send(ctx) + if err != nil { + b.sendErr <- err + return + } + for _, t := range tbls { + b.outCh <- t + } + } + } + + for { + select { + case <-send: + doSend() + case <-joiner: + for b.Len() > 0 { + doSend() + } + b.everythingIsDone.Done() + return + } + } +} + +func (b *Batcher) autoCommitWorker(ctx context.Context, joiner <-chan struct{}, delay time.Duration) { tick := time.NewTicker(delay) defer tick.Stop() for { @@ -100,21 +154,14 @@ func (b *Batcher) workLoop(ctx context.Context, joiner <-chan struct{}, delay ti case <-tick.C: if b.Len() > 0 { log.Info("sending batch because time limit exceed", zap.Int("size", b.Len())) - b.asyncSend(ctx) + b.asyncSend() } } } } -func (b *Batcher) asyncSend(ctx context.Context) { - tbls, err := b.Send(ctx) - if err != nil { - b.sendErr <- err - return - } - for _, t := range tbls { - b.outCh <- t - } +func (b *Batcher) asyncSend() { + b.sendCh <- struct{}{} } type drainResult struct { @@ -234,15 +281,15 @@ func (b *Batcher) Send(ctx context.Context) ([]CreatedTable, error) { return drainResult.BlankTablesAfterSend, nil } -func (b *Batcher) sendIfFull(ctx context.Context) { +func (b *Batcher) sendIfFull() { for b.Len() >= b.batchSizeThreshold { log.Info("sending batch because batcher is full", zap.Int("size", b.Len())) - b.asyncSend(ctx) + b.asyncSend() } } // Add adds a task to the Batcher. -func (b *Batcher) Add(ctx context.Context, tbs TableWithRange) { +func (b *Batcher) Add(tbs TableWithRange) { b.cachedTablesMu.Lock() log.Debug("adding table to batch", zap.Stringer("table", tbs.Table.Name), @@ -257,16 +304,21 @@ func (b *Batcher) Add(ctx context.Context, tbs TableWithRange) { atomic.AddInt32(&b.size, int32(len(tbs.Range))) b.cachedTablesMu.Unlock() - b.sendIfFull(ctx) + b.sendIfFull() } // Close closes the batcher, sending all pending requests, close updateCh. -func (b *Batcher) Close(ctx context.Context) { +func (b *Batcher) Close() { log.Info("sending batch lastly on close.", zap.Int("size", b.Len())) - for b.Len() > 0 { - b.asyncSend(ctx) - } - b.DisableAutoCommit(ctx) + b.DisableAutoCommit() + b.waitUntilSendDone() close(b.outCh) b.sender.Close() } + +// SetThreshold sets the threshold that how big the batch size reaching need to send batch. +// note this function isn't goroutine safe yet, +// just set threshold before anything starts(e.g. EnableAutoCommit), please. +func (b *Batcher) SetThreshold(newThreshold int) { + b.batchSizeThreshold = newThreshold +} diff --git a/pkg/restore/batcher_test.go b/pkg/restore/batcher_test.go index 8d0d114b5..aaab9496c 100644 --- a/pkg/restore/batcher_test.go +++ b/pkg/restore/batcher_test.go @@ -170,10 +170,11 @@ func join(nested [][]rtree.Range) (plain []rtree.Range) { // TestBasic tests basic workflow of batcher. func (*testBatcherSuite) TestBasic(c *C) { + ctx := context.Background() errCh := make(chan error, 8) sender := newDrySender() manager := newMockManager() - batcher, _ := restore.NewBatcher(sender, manager, errCh) + batcher, _ := restore.NewBatcher(ctx, sender, manager, errCh) batcher.SetThreshold(2) tableRanges := [][]rtree.Range{ @@ -186,12 +187,11 @@ func (*testBatcherSuite) TestBasic(c *C) { for i, ranges := range tableRanges { simpleTables = append(simpleTables, fakeTableWithRange(int64(i), ranges)) } - ctx := context.Background() for _, tbl := range simpleTables { - batcher.Add(ctx, tbl) + batcher.Add(tbl) } - batcher.Close(ctx) + batcher.Close() rngs := sender.Ranges() c.Assert(join(tableRanges), DeepEquals, rngs) @@ -203,16 +203,16 @@ func (*testBatcherSuite) TestBasic(c *C) { } func (*testBatcherSuite) TestAutoSend(c *C) { + ctx := context.Background() errCh := make(chan error, 8) sender := newDrySender() manager := newMockManager() - batcher, _ := restore.NewBatcher(sender, manager, errCh) + batcher, _ := restore.NewBatcher(ctx, sender, manager, errCh) batcher.SetThreshold(1024) simpleTable := fakeTableWithRange(1, []rtree.Range{fakeRange("caa", "cab"), fakeRange("cac", "cad")}) - ctx := context.Background() - batcher.Add(ctx, simpleTable) + batcher.Add(simpleTable) c.Assert(batcher.Len(), Greater, 0) // enable auto commit. @@ -222,7 +222,7 @@ func (*testBatcherSuite) TestAutoSend(c *C) { c.Assert(sender.RangeLen(), Greater, 0) c.Assert(batcher.Len(), Equals, 0) - batcher.Close(ctx) + batcher.Close() rngs := sender.Ranges() c.Assert(rngs, DeepEquals, simpleTable.Range) @@ -234,10 +234,11 @@ func (*testBatcherSuite) TestAutoSend(c *C) { } func (*testBatcherSuite) TestSplitRangeOnSameTable(c *C) { + ctx := context.Background() errCh := make(chan error, 8) sender := newDrySender() manager := newMockManager() - batcher, _ := restore.NewBatcher(sender, manager, errCh) + batcher, _ := restore.NewBatcher(ctx, sender, manager, errCh) batcher.SetThreshold(2) simpleTable := fakeTableWithRange(1, []rtree.Range{ @@ -246,11 +247,10 @@ func (*testBatcherSuite) TestSplitRangeOnSameTable(c *C) { fakeRange("caj", "cak"), fakeRange("cal", "cam"), fakeRange("can", "cao"), fakeRange("cap", "caq")}) - ctx := context.TODO() - batcher.Add(ctx, simpleTable) + batcher.Add(simpleTable) c.Assert(sender.BatchCount(), Equals, 4) - batcher.Close(ctx) + batcher.Close() rngs := sender.Ranges() c.Assert(rngs, DeepEquals, simpleTable.Range) @@ -283,22 +283,22 @@ func (*testBatcherSuite) TestRewriteRules(c *C) { tables = append(tables, table) } - ctx := context.TODO() + ctx := context.Background() errCh := make(chan error, 8) sender := newDrySender() manager := newMockManager() - batcher, _ := restore.NewBatcher(sender, manager, errCh) + batcher, _ := restore.NewBatcher(ctx, sender, manager, errCh) batcher.SetThreshold(2) - batcher.Add(ctx, tables[0]) + batcher.Add(tables[0]) c.Assert(sender.RangeLen(), Equals, 0) - batcher.Add(ctx, tables[1]) + batcher.Add(tables[1]) c.Assert(sender.HasRewriteRuleOfKey("a"), IsTrue) c.Assert(sender.HasRewriteRuleOfKey("b"), IsTrue) c.Assert(manager.Has(tables[1]), IsTrue) c.Assert(sender.RangeLen(), Equals, 2) - batcher.Add(ctx, tables[2]) - batcher.Close(ctx) + batcher.Add(tables[2]) + batcher.Close() c.Assert(sender.HasRewriteRuleOfKey("c"), IsTrue) c.Assert(sender.Ranges(), DeepEquals, join(tableRanges)) @@ -310,10 +310,11 @@ func (*testBatcherSuite) TestRewriteRules(c *C) { } func (*testBatcherSuite) TestBatcherLen(c *C) { + ctx := context.Background() errCh := make(chan error, 8) sender := newDrySender() manager := newMockManager() - batcher, _ := restore.NewBatcher(sender, manager, errCh) + batcher, _ := restore.NewBatcher(ctx, sender, manager, errCh) batcher.SetThreshold(15) simpleTable := fakeTableWithRange(1, []rtree.Range{ @@ -328,16 +329,15 @@ func (*testBatcherSuite) TestBatcherLen(c *C) { fakeRange("caj", "cak"), fakeRange("cal", "cam"), fakeRange("can", "cao"), fakeRange("cap", "caq")}) - ctx := context.TODO() - batcher.Add(ctx, simpleTable) + batcher.Add(simpleTable) c.Assert(batcher.Len(), Equals, 8) c.Assert(manager.Has(simpleTable), IsFalse) c.Assert(manager.Has(simpleTable2), IsFalse) - batcher.Add(ctx, simpleTable2) + batcher.Add(simpleTable2) c.Assert(batcher.Len(), Equals, 1) c.Assert(manager.Has(simpleTable2), IsTrue) c.Assert(manager.Has(simpleTable), IsFalse) - batcher.Close(ctx) + batcher.Close() c.Assert(batcher.Len(), Equals, 0) select { diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index 87ebec8e7..6dc0c7ccc 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -116,13 +116,6 @@ type TableWithRange struct { Range []rtree.Range } -// SetThreshold sets the threshold that how big the batch size reaching need to send batch. -// note this function isn't goroutine safe yet, -// just set threshold before anything starts(e.g. EnableAutoCommit), please. -func (b *Batcher) SetThreshold(newThreshold int) { - b.batchSizeThreshold = newThreshold -} - // Exhaust drains all remaining errors in the channel, into a slice of errors. func Exhaust(ec <-chan error) []error { out := make([]error, 0, len(ec)) @@ -155,8 +148,6 @@ type tikvSender struct { func NewTiKVSender(ctx context.Context, cli *Client, updateCh glue.Progress) (BatchSender, error) { tiflashStores, err := conn.GetAllTiKVStores(ctx, cli.GetPDClient(), conn.TiFlashOnly) if err != nil { - // After TiFlash support restore, we can remove this panic. - // The origin of this panic is at RunRestore, and its semantic is nearing panic, don't worry about it. log.Error("failed to get and remove TiFlash replicas", zap.Error(errors.Trace(err))) return nil, err } diff --git a/pkg/task/restore.go b/pkg/task/restore.go index fcf06fdfd..d00172233 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -251,7 +251,7 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf return err } manager := restore.NewBRContextManager(client) - batcher, afterRestoreStream := restore.NewBatcher(sender, manager, errCh) + batcher, afterRestoreStream := restore.NewBatcher(ctx, sender, manager, errCh) batcher.SetThreshold(batchSize) batcher.EnableAutoCommit(ctx, time.Second) go restoreTableStream(ctx, rangeStream, placementRules, client, batcher, errCh) @@ -575,7 +575,7 @@ func restoreTableStream( oldTables := []*utils.Table{} defer func() { // when things done, we must clean pending requests. - batcher.Close(ctx) + batcher.Close() log.Info("doing postwork", zap.Int("table count", len(oldTables)), ) @@ -603,7 +603,7 @@ func restoreTableStream( t.OldTable.TiFlashReplicas = tiFlashRep oldTables = append(oldTables, t.OldTable) - batcher.Add(ctx, t) + batcher.Add(t) } } } diff --git a/tests/br_small_batch_size/run.sh b/tests/br_small_batch_size/run.sh new file mode 100755 index 000000000..4badfb926 --- /dev/null +++ b/tests/br_small_batch_size/run.sh @@ -0,0 +1,56 @@ +#!/bin/sh +# +# Copyright 2020 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# See the License for the specific language governing permissions and +# limitations under the License. + +random_values() { + length=$1 + count=$2 + python -c "import random; import string; [print(''.join(random.choices(string.ascii_letters, k=$length))) for _ in range($count) ]" | + awk '{print "(1" $1 "1)"}' | + tr "\n1" ",'" | + sed 's/,$//' +} + +create_and_insert() { + table_name=$1 + record_count=$2 + run_sql "CREATE TABLE $DB.$table_name(k varchar(256) primary key)" + stmt="INSERT INTO $DB.$table_name VALUES `random_values 255 $record_count`" + echo $stmt | mysql -uroot -h127.0.0.1 -P4000 +} + +set -eu +DB="$TEST_NAME" +TABLE="usertable" + +run_sql "CREATE DATABASE $DB;" + +create_and_insert t1 10000 +create_and_insert t2 10086 +create_and_insert t3 10010 +go-ycsb load mysql -P tests/$TEST_NAME/workload -p mysql.host=$TIDB_IP -p mysql.port=$TIDB_PORT -p mysql.user=root -p mysql.db=$DB + + +echo "backup start..." +backup_dir="$TEST_DIR/${TEST_NAME}_backup" +rm -rf $backup_dir +run_br backup full -s "local://$backup_dir" --pd $PD_ADDR + +run_sql "drop database $DB" + + +echo "restore start..." +GO_FAILPOINTS="github.com/pingcap/br/pkg/task/small-batch-size=return(2)" \ +run_br restore full -s "local://$backup_dir" --pd $PD_ADDR --ratelimit 1024 + diff --git a/tests/br_small_batch_size/workload b/tests/br_small_batch_size/workload new file mode 100644 index 000000000..caba5e1ca --- /dev/null +++ b/tests/br_small_batch_size/workload @@ -0,0 +1,12 @@ +recordcount=30000 +operationcount=0 +workload=core + +readallfields=true + +readproportion=0 +updateproportion=0 +scanproportion=0 +insertproportion=0 + +requestdistribution=uniform \ No newline at end of file From 312039d7ada8cb7c38b89a8b4ed34b40753c5b18 Mon Sep 17 00:00:00 2001 From: yujuncen Date: Tue, 9 Jun 2020 23:37:35 +0800 Subject: [PATCH 40/52] restore: some factory and fix 1. make the batcher worker has two send style 2. make functions for debuging tables and ranges 3. rewrite a test case to adapt the new batcher --- pkg/restore/batcher.go | 81 ++++++++++++++++++----------------- pkg/restore/batcher_test.go | 15 ++++++- pkg/restore/pipeline_items.go | 14 ++---- pkg/restore/util.go | 29 +++++++++++++ 4 files changed, 88 insertions(+), 51 deletions(-) diff --git a/pkg/restore/batcher.go b/pkg/restore/batcher.go index 8ebe9281f..955f9ebd9 100644 --- a/pkg/restore/batcher.go +++ b/pkg/restore/batcher.go @@ -4,7 +4,6 @@ package restore import ( "context" - "fmt" "sync" "sync/atomic" "time" @@ -15,6 +14,19 @@ import ( "github.com/pingcap/br/pkg/rtree" ) +// SendType is the 'type' of a send. +// when we make a 'send' command to worker, we may want to flush all penging ranges(when auto commit enabled), +// or, we just want to clean overflowing ranges(when just adding a table to batcher). +type SendType int + +const ( + // SendUntilLessThanBatch will make the batcher send batch until + // its remaining range is less than its batchSizeThreshold. + SendUntilLessThanBatch SendType = iota + // SendAll will make the batcher send all pending ranges. + SendAll +) + // Batcher collects ranges to restore and send batching split/ingest request. type Batcher struct { cachedTables []TableWithRange @@ -28,12 +40,12 @@ type Batcher struct { // workerIsDone is for waiting for worker done: that is, after we send a // signal to workerJoiner, we must give it enough time to get things done. // Then, it should notify us by this waitgroup. - // Use waitgroup instead of a trivial channel for farther extention. + // Use waitgroup instead of a trivial channel for farther extension. everythingIsDone *sync.WaitGroup // sendErr is for output error information. sendErr chan<- error // sendCh is for communiate with sendWorker. - sendCh chan<- struct{} + sendCh chan<- SendType // outCh is for output the restored table, so it can be sent to do something like checksum. outCh chan<- CreatedTable @@ -59,7 +71,7 @@ func NewBatcher( ) (*Batcher, <-chan CreatedTable) { output := make(chan CreatedTable, defaultBatcherOutputChannelSize) workerJoiner := make(chan struct{}) - sendChan := make(chan struct{}, 2) + sendChan := make(chan SendType, 2) b := &Batcher{ rewriteRules: EmptyRewriteRule(), sendErr: errCh, @@ -98,6 +110,7 @@ func (b *Batcher) DisableAutoCommit() { func (b *Batcher) waitUntilSendDone() { b.workerJoiner <- struct{}{} + close(b.workerJoiner) b.everythingIsDone.Wait() } @@ -107,14 +120,15 @@ func (b *Batcher) joinWorker() { if b.autoCommitJoiner != nil { log.Debug("gracefully stoping worker goroutine") b.autoCommitJoiner <- struct{}{} + close(b.autoCommitJoiner) log.Debug("gracefully stopped worker goroutine") } } // sendWorker is the 'worker' that send all ranges to TiKV. -func (b *Batcher) sendWorker(ctx context.Context, send <-chan struct{}, joiner <-chan struct{}) { - doSend := func() { - if b.Len() > 0 { +func (b *Batcher) sendWorker(ctx context.Context, send <-chan SendType, joiner <-chan struct{}) { + sendUntil := func(lessOrEqual int) { + for b.Len() > lessOrEqual { tbls, err := b.Send(ctx) if err != nil { b.sendErr <- err @@ -128,12 +142,15 @@ func (b *Batcher) sendWorker(ctx context.Context, send <-chan struct{}, joiner < for { select { - case <-send: - doSend() - case <-joiner: - for b.Len() > 0 { - doSend() + case sendType := <-send: + switch sendType { + case SendUntilLessThanBatch: + sendUntil(b.batchSizeThreshold) + case SendAll: + sendUntil(0) } + case <-joiner: + sendUntil(0) b.everythingIsDone.Done() return } @@ -154,14 +171,14 @@ func (b *Batcher) autoCommitWorker(ctx context.Context, joiner <-chan struct{}, case <-tick.C: if b.Len() > 0 { log.Info("sending batch because time limit exceed", zap.Int("size", b.Len())) - b.asyncSend() + b.asyncSend(SendAll) } } } } -func (b *Batcher) asyncSend() { - b.sendCh <- struct{}{} +func (b *Batcher) asyncSend(t SendType) { + b.sendCh <- t } type drainResult struct { @@ -240,23 +257,12 @@ func (b *Batcher) Send(ctx context.Context) ([]CreatedTable, error) { drainResult := b.drainRanges() tbs := drainResult.TablesToSend ranges := drainResult.Ranges - tableNames := make([]string, 0, len(tbs)) - for _, t := range tbs { - tableNames = append(tableNames, fmt.Sprintf("%s.%s", t.OldTable.Db.Name, t.OldTable.Info.Name)) - } - totalKV := uint64(0) - totalSize := uint64(0) - for _, r := range ranges { - for _, f := range r.Files { - totalKV += f.GetTotalKvs() - totalSize += f.GetTotalBytes() - } - } + log.Debug("do batch send", - zap.Strings("tables", tableNames), - zap.Int("ranges", len(ranges)), - zap.Uint64("total kv", totalKV), - zap.Uint64("total size", totalSize), + append( + DebugRanges(ranges), + DebugTables(tbs), + )..., ) if err := b.manager.Enter(ctx, drainResult.TablesToSend); err != nil { @@ -268,13 +274,9 @@ func (b *Batcher) Send(ctx context.Context) ([]CreatedTable, error) { if err := b.manager.Leave(ctx, drainResult.BlankTablesAfterSend); err != nil { return nil, err } - blankTableNames := make([]string, 0, len(tbs)) - for _, t := range drainResult.BlankTablesAfterSend { - blankTableNames = append(blankTableNames, fmt.Sprintf("%s.%s", t.OldTable.Db.Name, t.OldTable.Info.Name)) - } - if len(blankTableNames) > 0 { + if len(drainResult.BlankTablesAfterSend) > 0 { log.Debug("table fully restored", - zap.Strings("tables", blankTableNames), + DebugTables(drainResult.BlankTablesAfterSend), zap.Int("ranges", len(ranges)), ) } @@ -282,9 +284,9 @@ func (b *Batcher) Send(ctx context.Context) ([]CreatedTable, error) { } func (b *Batcher) sendIfFull() { - for b.Len() >= b.batchSizeThreshold { + if b.Len() >= b.batchSizeThreshold { log.Info("sending batch because batcher is full", zap.Int("size", b.Len())) - b.asyncSend() + b.asyncSend(SendUntilLessThanBatch) } } @@ -313,6 +315,7 @@ func (b *Batcher) Close() { b.DisableAutoCommit() b.waitUntilSendDone() close(b.outCh) + close(b.sendCh) b.sender.Close() } diff --git a/pkg/restore/batcher_test.go b/pkg/restore/batcher_test.go index aaab9496c..528c0fa7b 100644 --- a/pkg/restore/batcher_test.go +++ b/pkg/restore/batcher_test.go @@ -39,6 +39,7 @@ func (d *drySender) RestoreBatch( ) error { d.mu.Lock() defer d.mu.Unlock() + log.Info("fake restore range", restore.DebugRanges(ranges)...) d.nBatch++ d.rewriteRules.Append(*rewriteRules) d.ranges = append(d.ranges, ranges...) @@ -47,6 +48,10 @@ func (d *drySender) RestoreBatch( func (d *drySender) Close() {} +func waitForSend() { + time.Sleep(10 * time.Millisecond) +} + func (d *drySender) Ranges() []rtree.Range { return d.ranges } @@ -248,9 +253,8 @@ func (*testBatcherSuite) TestSplitRangeOnSameTable(c *C) { fakeRange("can", "cao"), fakeRange("cap", "caq")}) batcher.Add(simpleTable) - c.Assert(sender.BatchCount(), Equals, 4) - batcher.Close() + c.Assert(sender.BatchCount(), Equals, 4) rngs := sender.Ranges() c.Assert(rngs, DeepEquals, simpleTable.Range) @@ -291,12 +295,16 @@ func (*testBatcherSuite) TestRewriteRules(c *C) { batcher.SetThreshold(2) batcher.Add(tables[0]) + waitForSend() c.Assert(sender.RangeLen(), Equals, 0) + batcher.Add(tables[1]) + waitForSend() c.Assert(sender.HasRewriteRuleOfKey("a"), IsTrue) c.Assert(sender.HasRewriteRuleOfKey("b"), IsTrue) c.Assert(manager.Has(tables[1]), IsTrue) c.Assert(sender.RangeLen(), Equals, 2) + batcher.Add(tables[2]) batcher.Close() c.Assert(sender.HasRewriteRuleOfKey("c"), IsTrue) @@ -330,10 +338,13 @@ func (*testBatcherSuite) TestBatcherLen(c *C) { fakeRange("can", "cao"), fakeRange("cap", "caq")}) batcher.Add(simpleTable) + waitForSend() c.Assert(batcher.Len(), Equals, 8) c.Assert(manager.Has(simpleTable), IsFalse) c.Assert(manager.Has(simpleTable2), IsFalse) + batcher.Add(simpleTable2) + waitForSend() c.Assert(batcher.Len(), Equals, 1) c.Assert(manager.Has(simpleTable2), IsTrue) c.Assert(manager.Has(simpleTable), IsFalse) diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index 6dc0c7ccc..60bd4073c 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -180,18 +180,12 @@ func (b *tikvSender) RestoreBatch(ctx context.Context, ranges []rtree.Range, rew if err := b.client.RestoreFiles(files, rewriteRules, b.rejectStoreMap, b.updateCh); err != nil { return err } - totalKV := uint64(0) - totalSize := uint64(0) - for _, f := range files { - totalKV += f.GetTotalKvs() - totalSize += f.GetTotalBytes() - } log.Debug("send batch done", - zap.Int("ranges", len(ranges)), - zap.Int("file count", len(files)), - zap.Uint64("total kv", totalKV), - zap.Uint64("total size", totalSize), + append( + DebugRanges(ranges), + zap.Int("file count", len(files)), + )..., ) return nil diff --git a/pkg/restore/util.go b/pkg/restore/util.go index d9ce7eceb..3e831cc13 100644 --- a/pkg/restore/util.go +++ b/pkg/restore/util.go @@ -6,6 +6,7 @@ import ( "bytes" "context" "encoding/hex" + "fmt" "strings" "time" @@ -19,6 +20,7 @@ import ( "github.com/pingcap/tidb/tablecodec" "github.com/pingcap/tidb/util/codec" "go.uber.org/zap" + "go.uber.org/zap/zapcore" "github.com/pingcap/br/pkg/glue" "github.com/pingcap/br/pkg/rtree" @@ -537,3 +539,30 @@ func waitForRemoveRejectStores( return false } + +// DebugTables make zap field of table for debuging, including table names. +func DebugTables(tables []CreatedTable) zapcore.Field { + tableNames := make([]string, 0, len(tables)) + for _, t := range tables { + tableNames = append(tableNames, fmt.Sprintf("%s.%s", t.OldTable.Db.Name, t.OldTable.Info.Name)) + } + return zap.Strings("tables", tableNames) +} + +// DebugRanges make zap fields for debuging, which contains kv, size and count of ranges. +func DebugRanges(ranges []rtree.Range) []zapcore.Field { + totalKV := uint64(0) + totalSize := uint64(0) + for _, r := range ranges { + for _, f := range r.Files { + totalKV += f.GetTotalKvs() + totalSize += f.GetTotalBytes() + } + } + + return []zap.Field{ + zap.Int("ranges", len(ranges)), + zap.Uint64("total kv", totalKV), + zap.Uint64("total size", totalSize), + } +} From bbdbecd77a7410bbe6938de2e4d259a11bff1c4d Mon Sep 17 00:00:00 2001 From: yujuncen Date: Tue, 9 Jun 2020 23:52:10 +0800 Subject: [PATCH 41/52] tests: try to fix CI --- tests/br_small_batch_size/run.sh | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/br_small_batch_size/run.sh b/tests/br_small_batch_size/run.sh index 4badfb926..8af742360 100755 --- a/tests/br_small_batch_size/run.sh +++ b/tests/br_small_batch_size/run.sh @@ -16,10 +16,14 @@ random_values() { length=$1 count=$2 - python -c "import random; import string; [print(''.join(random.choices(string.ascii_letters, k=$length))) for _ in range($count) ]" | - awk '{print "(1" $1 "1)"}' | - tr "\n1" ",'" | - sed 's/,$//' + python -c " +import random +import string +for ignored in range($count): + print(''.join(random.choices(string.ascii_letters, k=$length)))" | + awk '{print "(1" $1 "1)"}' | + tr "\n1" ",'" | + sed 's/,$//' } create_and_insert() { From 2c269dc87dff9c3d78ac3ac63c68a35132a87c85 Mon Sep 17 00:00:00 2001 From: yujuncen Date: Wed, 10 Jun 2020 00:22:56 +0800 Subject: [PATCH 42/52] tests: try to fix CI, again --- tests/br_small_batch_size/run.sh | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/tests/br_small_batch_size/run.sh b/tests/br_small_batch_size/run.sh index 8af742360..aea469fac 100755 --- a/tests/br_small_batch_size/run.sh +++ b/tests/br_small_batch_size/run.sh @@ -20,7 +20,7 @@ random_values() { import random import string for ignored in range($count): - print(''.join(random.choices(string.ascii_letters, k=$length)))" | + print(''.join(random.choice(string.ascii_letters) for _ in range($length)))" | awk '{print "(1" $1 "1)"}' | tr "\n1" ",'" | sed 's/,$//' @@ -34,15 +34,27 @@ create_and_insert() { echo $stmt | mysql -uroot -h127.0.0.1 -P4000 } +check_size() { + table_name=$1 + record_count=$2 + + count=`run_sql 'select count(*) from $DB.$table_name' | awk '/count/{print $2}'` + + if [ $count -ne $record_count ]; then + echo "check size failed: $count vs $record_count" + fi +} + set -eu DB="$TEST_NAME" TABLE="usertable" run_sql "CREATE DATABASE $DB;" -create_and_insert t1 10000 -create_and_insert t2 10086 -create_and_insert t3 10010 +record_counts=(10000 10010 10086) +for i in $record_counts; do + create_and_insert "t$i" $i +done go-ycsb load mysql -P tests/$TEST_NAME/workload -p mysql.host=$TIDB_IP -p mysql.port=$TIDB_PORT -p mysql.user=root -p mysql.db=$DB @@ -58,3 +70,7 @@ echo "restore start..." GO_FAILPOINTS="github.com/pingcap/br/pkg/task/small-batch-size=return(2)" \ run_br restore full -s "local://$backup_dir" --pd $PD_ADDR --ratelimit 1024 +for i in $record_counts; do + check_size "t$i" $i +done +check_size $TABLE 10000 From 55d22e402248dc822af5444fe134a54d3bcb7b71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Fri, 12 Jun 2020 12:05:24 +0800 Subject: [PATCH 43/52] Apply suggestions from code review Co-authored-by: 3pointer --- pkg/restore/client.go | 1 + pkg/task/restore.go | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/restore/client.go b/pkg/restore/client.go index 171e25510..a45c05ab5 100644 --- a/pkg/restore/client.go +++ b/pkg/restore/client.go @@ -750,6 +750,7 @@ func (rc *Client) GoValidateChecksum( go func() { defer func() { log.Info("all checksum ended") + wg.Wait() outCh <- struct{}{} close(outCh) }() diff --git a/pkg/task/restore.go b/pkg/task/restore.go index 143b521f0..4ef0afc39 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -239,7 +239,7 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf ctx, cmdName, // Split/Scatter + Download/Ingest + Checksum - int64(restore.EstimateRangeSize(files)+len(files)+len(tables)), + int64(rangeSize+len(files)+len(tables)), !cfg.LogProgress) defer updateCh.Close() sender, err := restore.NewTiKVSender(ctx, client, updateCh) From c94435d09e46dfc7e6768e568f1ffe80bb392c07 Mon Sep 17 00:00:00 2001 From: yujuncen Date: Fri, 12 Jun 2020 13:00:27 +0800 Subject: [PATCH 44/52] restore: change some log levels --- pkg/restore/batcher.go | 8 ++++---- pkg/restore/batcher_test.go | 2 +- pkg/restore/client.go | 3 +-- pkg/restore/pipeline_items.go | 7 ++++--- pkg/restore/util.go | 8 ++++---- tests/br_db_online_newkv/run.sh | 1 + 6 files changed, 15 insertions(+), 14 deletions(-) diff --git a/pkg/restore/batcher.go b/pkg/restore/batcher.go index 955f9ebd9..c0dd702ed 100644 --- a/pkg/restore/batcher.go +++ b/pkg/restore/batcher.go @@ -258,10 +258,10 @@ func (b *Batcher) Send(ctx context.Context) ([]CreatedTable, error) { tbs := drainResult.TablesToSend ranges := drainResult.Ranges - log.Debug("do batch send", + log.Info("restore batch start", append( - DebugRanges(ranges), - DebugTables(tbs), + ZapRanges(ranges), + ZapTables(tbs), )..., ) @@ -276,7 +276,7 @@ func (b *Batcher) Send(ctx context.Context) ([]CreatedTable, error) { } if len(drainResult.BlankTablesAfterSend) > 0 { log.Debug("table fully restored", - DebugTables(drainResult.BlankTablesAfterSend), + ZapTables(drainResult.BlankTablesAfterSend), zap.Int("ranges", len(ranges)), ) } diff --git a/pkg/restore/batcher_test.go b/pkg/restore/batcher_test.go index 528c0fa7b..53a9fbbaa 100644 --- a/pkg/restore/batcher_test.go +++ b/pkg/restore/batcher_test.go @@ -39,7 +39,7 @@ func (d *drySender) RestoreBatch( ) error { d.mu.Lock() defer d.mu.Unlock() - log.Info("fake restore range", restore.DebugRanges(ranges)...) + log.Info("fake restore range", restore.ZapRanges(ranges)...) d.nBatch++ d.rewriteRules.Append(*rewriteRules) d.ranges = append(d.ranges, ranges...) diff --git a/pkg/restore/client.go b/pkg/restore/client.go index a45c05ab5..f6e68c79a 100644 --- a/pkg/restore/client.go +++ b/pkg/restore/client.go @@ -748,20 +748,19 @@ func (rc *Client) GoValidateChecksum( outCh := make(chan struct{}, 1) workers := utils.NewWorkerPool(defaultChecksumConcurrency, "RestoreChecksum") go func() { + wg := new(sync.WaitGroup) defer func() { log.Info("all checksum ended") wg.Wait() outCh <- struct{}{} close(outCh) }() - wg := new(sync.WaitGroup) for { select { case <-ctx.Done(): errCh <- ctx.Err() case tbl, ok := <-tableStream: if !ok { - wg.Wait() return } wg.Add(1) diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index 60bd4073c..41c6a7a9f 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -53,7 +53,7 @@ func (manager *brContextManager) Enter(ctx context.Context, tables []CreatedTabl placementRuleTables := make([]*model.TableInfo, 0, len(tables)) for _, tbl := range tables { - if manager.hasTable[tbl.Table.ID] { + if !manager.hasTable[tbl.Table.ID] { placementRuleTables = append(placementRuleTables, tbl.Table) } manager.hasTable[tbl.Table.ID] = true @@ -70,6 +70,7 @@ func (manager *brContextManager) Leave(ctx context.Context, tables []CreatedTabl } splitPostWork(ctx, manager.client, placementRuleTables) + log.Info("restore table done", ZapTables(tables)) return nil } @@ -181,9 +182,9 @@ func (b *tikvSender) RestoreBatch(ctx context.Context, ranges []rtree.Range, rew return err } - log.Debug("send batch done", + log.Info("restore batch done", append( - DebugRanges(ranges), + ZapRanges(ranges), zap.Int("file count", len(files)), )..., ) diff --git a/pkg/restore/util.go b/pkg/restore/util.go index 3e831cc13..5970c0add 100644 --- a/pkg/restore/util.go +++ b/pkg/restore/util.go @@ -540,8 +540,8 @@ func waitForRemoveRejectStores( return false } -// DebugTables make zap field of table for debuging, including table names. -func DebugTables(tables []CreatedTable) zapcore.Field { +// ZapTables make zap field of table for debuging, including table names. +func ZapTables(tables []CreatedTable) zapcore.Field { tableNames := make([]string, 0, len(tables)) for _, t := range tables { tableNames = append(tableNames, fmt.Sprintf("%s.%s", t.OldTable.Db.Name, t.OldTable.Info.Name)) @@ -549,8 +549,8 @@ func DebugTables(tables []CreatedTable) zapcore.Field { return zap.Strings("tables", tableNames) } -// DebugRanges make zap fields for debuging, which contains kv, size and count of ranges. -func DebugRanges(ranges []rtree.Range) []zapcore.Field { +// ZapRanges make zap fields for debuging, which contains kv, size and count of ranges. +func ZapRanges(ranges []rtree.Range) []zapcore.Field { totalKV := uint64(0) totalSize := uint64(0) for _, r := range ranges { diff --git a/tests/br_db_online_newkv/run.sh b/tests/br_db_online_newkv/run.sh index d8c3f15ff..528fed852 100755 --- a/tests/br_db_online_newkv/run.sh +++ b/tests/br_db_online_newkv/run.sh @@ -66,6 +66,7 @@ sleep 5 echo "restore start..." run_br restore db --db $DB -s "local://$TEST_DIR/$DB" --pd $PD_ADDR --online +# TODO we should check whether the restore RPCs are send to the new TiKV. table_count=$(run_sql "use $DB; show tables;" | grep "Tables_in" | wc -l) if [ "$table_count" -ne "2" ];then echo "TEST: [$TEST_NAME] failed!" From fe59bc189d7dfca8d95ab80c669e0866f3207503 Mon Sep 17 00:00:00 2001 From: yujuncen Date: Fri, 12 Jun 2020 13:18:20 +0800 Subject: [PATCH 45/52] restore: merge joiner of sendWorker into messagebox ... and, some small changes: - don't send sending request if here is one. - the method of how a batcher is send move to log level debug --- pkg/restore/batcher.go | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/pkg/restore/batcher.go b/pkg/restore/batcher.go index c0dd702ed..e9a96d013 100644 --- a/pkg/restore/batcher.go +++ b/pkg/restore/batcher.go @@ -25,6 +25,8 @@ const ( SendUntilLessThanBatch SendType = iota // SendAll will make the batcher send all pending ranges. SendAll + // SendAllThenClose will make the batcher send all pending ranges and then close itself. + SendAllThenClose ) // Batcher collects ranges to restore and send batching split/ingest request. @@ -35,8 +37,6 @@ type Batcher struct { // autoCommitJoiner is for joining the background batch sender. autoCommitJoiner chan<- struct{} - // workerJoiner is also for joining the background batch sender. - workerJoiner chan<- struct{} // workerIsDone is for waiting for worker done: that is, after we send a // signal to workerJoiner, we must give it enough time to get things done. // Then, it should notify us by this waitgroup. @@ -70,7 +70,6 @@ func NewBatcher( errCh chan<- error, ) (*Batcher, <-chan CreatedTable) { output := make(chan CreatedTable, defaultBatcherOutputChannelSize) - workerJoiner := make(chan struct{}) sendChan := make(chan SendType, 2) b := &Batcher{ rewriteRules: EmptyRewriteRule(), @@ -79,13 +78,12 @@ func NewBatcher( sender: sender, manager: manager, sendCh: sendChan, - workerJoiner: workerJoiner, cachedTablesMu: new(sync.Mutex), everythingIsDone: new(sync.WaitGroup), batchSizeThreshold: 1, } b.everythingIsDone.Add(1) - go b.sendWorker(ctx, sendChan, workerJoiner) + go b.sendWorker(ctx, sendChan) return b, output } @@ -109,8 +107,7 @@ func (b *Batcher) DisableAutoCommit() { } func (b *Batcher) waitUntilSendDone() { - b.workerJoiner <- struct{}{} - close(b.workerJoiner) + b.sendCh <- SendAllThenClose b.everythingIsDone.Wait() } @@ -126,7 +123,7 @@ func (b *Batcher) joinWorker() { } // sendWorker is the 'worker' that send all ranges to TiKV. -func (b *Batcher) sendWorker(ctx context.Context, send <-chan SendType, joiner <-chan struct{}) { +func (b *Batcher) sendWorker(ctx context.Context, send <-chan SendType) { sendUntil := func(lessOrEqual int) { for b.Len() > lessOrEqual { tbls, err := b.Send(ctx) @@ -140,16 +137,13 @@ func (b *Batcher) sendWorker(ctx context.Context, send <-chan SendType, joiner < } } - for { - select { - case sendType := <-send: - switch sendType { - case SendUntilLessThanBatch: - sendUntil(b.batchSizeThreshold) - case SendAll: - sendUntil(0) - } - case <-joiner: + for sendType := range send { + switch sendType { + case SendUntilLessThanBatch: + sendUntil(b.batchSizeThreshold) + case SendAll: + sendUntil(0) + case SendAllThenClose: sendUntil(0) b.everythingIsDone.Done() return @@ -170,7 +164,7 @@ func (b *Batcher) autoCommitWorker(ctx context.Context, joiner <-chan struct{}, return case <-tick.C: if b.Len() > 0 { - log.Info("sending batch because time limit exceed", zap.Int("size", b.Len())) + log.Debug("sending batch because time limit exceed", zap.Int("size", b.Len())) b.asyncSend(SendAll) } } @@ -178,7 +172,10 @@ func (b *Batcher) autoCommitWorker(ctx context.Context, joiner <-chan struct{}, } func (b *Batcher) asyncSend(t SendType) { - b.sendCh <- t + // add a check here so we won't replica sending. + if len(b.sendCh) == 0 { + b.sendCh <- t + } } type drainResult struct { @@ -285,7 +282,7 @@ func (b *Batcher) Send(ctx context.Context) ([]CreatedTable, error) { func (b *Batcher) sendIfFull() { if b.Len() >= b.batchSizeThreshold { - log.Info("sending batch because batcher is full", zap.Int("size", b.Len())) + log.Debug("sending batch because batcher is full", zap.Int("size", b.Len())) b.asyncSend(SendUntilLessThanBatch) } } From 4d67e54c3c17d1ea3db90366ebb33bca5e39fa81 Mon Sep 17 00:00:00 2001 From: yujuncen Date: Fri, 12 Jun 2020 14:45:18 +0800 Subject: [PATCH 46/52] restore,task: run RemoveRestoreLabels at restore post work --- pkg/restore/pipeline_items.go | 5 ----- pkg/task/restore.go | 4 ++++ 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index 41c6a7a9f..1bd7c544b 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -80,11 +80,6 @@ func splitPostWork(ctx context.Context, client *Client, tables []*model.TableInf log.Warn("reset placement rules failed", zap.Error(err)) return } - - err = client.ResetRestoreLabels(ctx) - if err != nil { - log.Warn("reset store labels failed", zap.Error(err)) - } } func splitPrepareWork(ctx context.Context, client *Client, tables []*model.TableInfo) error { diff --git a/pkg/task/restore.go b/pkg/task/restore.go index 4ef0afc39..9b9c2bafa 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -430,6 +430,7 @@ func removePDLeaderScheduler(ctx context.Context, mgr *conn.Mgr, existSchedulers } // restorePostWork executes some post work after restore. +// TODO: aggregate all lifetime manage methods into batcher's context manager field. func restorePostWork( ctx context.Context, client *restore.Client, mgr *conn.Mgr, clusterCfg clusterConfig, ) { @@ -467,6 +468,9 @@ func restorePostWork( if err := mgr.UpdatePDScheduleConfig(ctx, scheduleLimitCfg); err != nil { log.Warn("fail to update PD schedule config") } + if err := client.ResetRestoreLabels(ctx); err != nil { + log.Warn("reset store labels failed", zap.Error(err)) + } } func addPDLeaderScheduler(ctx context.Context, mgr *conn.Mgr, removedSchedulers []string) error { From dc1d293515b279658056727bce8b4ce6e12c30d0 Mon Sep 17 00:00:00 2001 From: yujuncen Date: Fri, 12 Jun 2020 15:44:51 +0800 Subject: [PATCH 47/52] task: adapt the remove-tiflash flag --- pkg/restore/pipeline_items.go | 24 ++++++++++++++++-------- pkg/task/restore.go | 20 ++++++++++++-------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index 1bd7c544b..8cc830d6b 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -141,15 +141,23 @@ type tikvSender struct { } // NewTiKVSender make a sender that send restore requests to TiKV. -func NewTiKVSender(ctx context.Context, cli *Client, updateCh glue.Progress) (BatchSender, error) { - tiflashStores, err := conn.GetAllTiKVStores(ctx, cli.GetPDClient(), conn.TiFlashOnly) - if err != nil { - log.Error("failed to get and remove TiFlash replicas", zap.Error(errors.Trace(err))) - return nil, err - } +func NewTiKVSender( + ctx context.Context, + cli *Client, + updateCh glue.Progress, + // TODO remove this field after we support TiFlash. + removeTiFlash bool, +) (BatchSender, error) { rejectStoreMap := make(map[uint64]bool) - for _, store := range tiflashStores { - rejectStoreMap[store.GetId()] = true + if removeTiFlash { + tiflashStores, err := conn.GetAllTiKVStores(ctx, cli.GetPDClient(), conn.TiFlashOnly) + if err != nil { + log.Error("failed to get and remove TiFlash replicas", zap.Error(errors.Trace(err))) + return nil, err + } + for _, store := range tiflashStores { + rejectStoreMap[store.GetId()] = true + } } return &tikvSender{ diff --git a/pkg/task/restore.go b/pkg/task/restore.go index 9b9c2bafa..b6c03b10a 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -242,7 +242,7 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf int64(rangeSize+len(files)+len(tables)), !cfg.LogProgress) defer updateCh.Close() - sender, err := restore.NewTiKVSender(ctx, client, updateCh) + sender, err := restore.NewTiKVSender(ctx, client, updateCh, cfg.RemoveTiFlash) if err != nil { return err } @@ -250,7 +250,7 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf batcher, afterRestoreStream := restore.NewBatcher(ctx, sender, manager, errCh) batcher.SetThreshold(batchSize) batcher.EnableAutoCommit(ctx, time.Second) - go restoreTableStream(ctx, rangeStream, placementRules, client, batcher, errCh) + go restoreTableStream(ctx, rangeStream, cfg.RemoveTiFlash, placementRules, client, batcher, errCh) var finish <-chan struct{} // Checksum @@ -556,6 +556,8 @@ func enableTiDBConfig() { func restoreTableStream( ctx context.Context, inputCh <-chan restore.TableWithRange, + // TODO: remove this field and rules field after we support TiFlash + removeTiFlashReplica bool, rules []placement.Rule, client *restore.Client, batcher *restore.Batcher, @@ -584,13 +586,15 @@ func restoreTableStream( if !ok { return } - tiFlashRep, err := client.RemoveTiFlashOfTable(t.CreatedTable, rules) - if err != nil { - log.Error("failed on remove TiFlash replicas", zap.Error(err)) - errCh <- err - return + if removeTiFlashReplica { + tiFlashRep, err := client.RemoveTiFlashOfTable(t.CreatedTable, rules) + if err != nil { + log.Error("failed on remove TiFlash replicas", zap.Error(err)) + errCh <- err + return + } + t.OldTable.TiFlashReplicas = tiFlashRep } - t.OldTable.TiFlashReplicas = tiFlashRep oldTables = append(oldTables, t.OldTable) batcher.Add(t) From 5ac8cfe61ddeebb5921951d4ccf68bd0ac717f10 Mon Sep 17 00:00:00 2001 From: yujuncen Date: Fri, 12 Jun 2020 18:14:27 +0800 Subject: [PATCH 48/52] restore,task: fetch new placement rules each time --- pkg/restore/client.go | 1 + pkg/task/restore.go | 17 +++++++++-------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/pkg/restore/client.go b/pkg/restore/client.go index f6e68c79a..5991a158e 100644 --- a/pkg/restore/client.go +++ b/pkg/restore/client.go @@ -452,6 +452,7 @@ func makeTiFlashOfTableRecord(table *utils.Table, replica int) (*backup.Schema, func (rc *Client) RemoveTiFlashOfTable(table CreatedTable, rule []placement.Rule) (int, error) { if rule := utils.SearchPlacementRule(table.Table.ID, rule, placement.Learner); rule != nil { if rule.Count > 0 { + log.Info("remove TiFlash of table", zap.Int64("table ID", table.Table.ID), zap.Int("count", rule.Count)) err := multierr.Combine( rc.db.AlterTiflashReplica(rc.ctx, table.OldTable, 0), rc.removeTiFlashOf(table.OldTable, rule.Count), diff --git a/pkg/task/restore.go b/pkg/task/restore.go index b6c03b10a..f01769be7 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -11,7 +11,6 @@ import ( "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/backup" "github.com/pingcap/log" - "github.com/pingcap/pd/v4/server/schedule/placement" "github.com/pingcap/tidb/config" "github.com/spf13/pflag" "go.uber.org/multierr" @@ -196,11 +195,6 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf // don't return immediately, wait all pipeline done. } - placementRules, err := client.GetPlacementRules(cfg.PD) - if err != nil { - return err - } - tableFileMap := restore.MapTableToFiles(files) log.Debug("mapped table to files", zap.Any("result map", tableFileMap)) @@ -250,7 +244,7 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf batcher, afterRestoreStream := restore.NewBatcher(ctx, sender, manager, errCh) batcher.SetThreshold(batchSize) batcher.EnableAutoCommit(ctx, time.Second) - go restoreTableStream(ctx, rangeStream, cfg.RemoveTiFlash, placementRules, client, batcher, errCh) + go restoreTableStream(ctx, rangeStream, cfg.RemoveTiFlash, cfg.PD, client, batcher, errCh) var finish <-chan struct{} // Checksum @@ -558,7 +552,7 @@ func restoreTableStream( inputCh <-chan restore.TableWithRange, // TODO: remove this field and rules field after we support TiFlash removeTiFlashReplica bool, - rules []placement.Rule, + pdAddr []string, client *restore.Client, batcher *restore.Batcher, errCh chan<- error, @@ -587,6 +581,13 @@ func restoreTableStream( return } if removeTiFlashReplica { + rules, err := client.GetPlacementRules(pdAddr) + if err != nil { + errCh <- err + return + } + log.Debug("get rules", zap.Any("rules", rules)) + log.Debug("try to remove tiflash of table", zap.Stringer("table name", t.Table.Name)) tiFlashRep, err := client.RemoveTiFlashOfTable(t.CreatedTable, rules) if err != nil { log.Error("failed on remove TiFlash replicas", zap.Error(err)) From 4cbbff09b0e93f12c0e55e353d9d39fe252d2739 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Mon, 15 Jun 2020 10:23:02 +0800 Subject: [PATCH 49/52] Apply suggestions from code review Co-authored-by: kennytm --- pkg/restore/batcher.go | 20 ++++++++++---------- pkg/restore/client.go | 4 ++-- pkg/restore/pipeline_items.go | 4 ++-- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/pkg/restore/batcher.go b/pkg/restore/batcher.go index e9a96d013..1585d973a 100644 --- a/pkg/restore/batcher.go +++ b/pkg/restore/batcher.go @@ -15,7 +15,7 @@ import ( ) // SendType is the 'type' of a send. -// when we make a 'send' command to worker, we may want to flush all penging ranges(when auto commit enabled), +// when we make a 'send' command to worker, we may want to flush all pending ranges (when auto commit enabled), // or, we just want to clean overflowing ranges(when just adding a table to batcher). type SendType int @@ -37,10 +37,10 @@ type Batcher struct { // autoCommitJoiner is for joining the background batch sender. autoCommitJoiner chan<- struct{} - // workerIsDone is for waiting for worker done: that is, after we send a - // signal to workerJoiner, we must give it enough time to get things done. + // everythingIsDone is for waiting for worker done: that is, after we send a + // signal to autoCommitJoiner, we must give it enough time to get things done. // Then, it should notify us by this waitgroup. - // Use waitgroup instead of a trivial channel for farther extension. + // Use waitgroup instead of a trivial channel for further extension. everythingIsDone *sync.WaitGroup // sendErr is for output error information. sendErr chan<- error @@ -87,7 +87,7 @@ func NewBatcher( return b, output } -// EnableAutoCommit enables the batcher commit batch periodicity even batcher size isn't big enough. +// EnableAutoCommit enables the batcher commit batch periodically even batcher size isn't big enough. // we make this function for disable AutoCommit in some case. func (b *Batcher) EnableAutoCommit(ctx context.Context, delay time.Duration) { if b.autoCommitJoiner != nil { @@ -115,7 +115,7 @@ func (b *Batcher) waitUntilSendDone() { // return immediately when auto commit disabled. func (b *Batcher) joinWorker() { if b.autoCommitJoiner != nil { - log.Debug("gracefully stoping worker goroutine") + log.Debug("gracefully stopping worker goroutine") b.autoCommitJoiner <- struct{}{} close(b.autoCommitJoiner) log.Debug("gracefully stopped worker goroutine") @@ -219,8 +219,8 @@ func (b *Batcher) drainRanges() drainResult { var drained []rtree.Range drained, b.cachedTables[offset].Range = thisTableRanges[:drainSize], thisTableRanges[drainSize:] log.Debug("draining partial table to batch", + zap.Stringer("db", thisTable.OldTable.Db.Name), zap.Stringer("table", thisTable.Table.Name), - zap.Stringer("database", thisTable.OldTable.Db.Name), zap.Int("size", thisTableLen), zap.Int("drained", drainSize), ) @@ -237,8 +237,8 @@ func (b *Batcher) drainRanges() drainResult { // clear the table length. b.cachedTables[offset].Range = []rtree.Range{} log.Debug("draining table to batch", + zap.Stringer("db", thisTable.OldTable.Db.Name), zap.Stringer("table", thisTable.Table.Name), - zap.Stringer("database", thisTable.OldTable.Db.Name), zap.Int("size", thisTableLen), ) } @@ -291,8 +291,8 @@ func (b *Batcher) sendIfFull() { func (b *Batcher) Add(tbs TableWithRange) { b.cachedTablesMu.Lock() log.Debug("adding table to batch", + zap.Stringer("db", tbs.OldTable.Db.Name), zap.Stringer("table", tbs.Table.Name), - zap.Stringer("database", tbs.OldTable.Db.Name), zap.Int64("old id", tbs.OldTable.Info.ID), zap.Int64("new id", tbs.Table.ID), zap.Int("table size", len(tbs.Range)), @@ -308,7 +308,7 @@ func (b *Batcher) Add(tbs TableWithRange) { // Close closes the batcher, sending all pending requests, close updateCh. func (b *Batcher) Close() { - log.Info("sending batch lastly on close.", zap.Int("size", b.Len())) + log.Info("sending batch lastly on close", zap.Int("size", b.Len())) b.DisableAutoCommit() b.waitUntilSendDone() close(b.outCh) diff --git a/pkg/restore/client.go b/pkg/restore/client.go index 5991a158e..a94311279 100644 --- a/pkg/restore/client.go +++ b/pkg/restore/client.go @@ -397,8 +397,8 @@ func (rc *Client) GoCreateTables( if err != nil { log.Error("create table failed", zap.Error(err), - zap.Stringer("table", t.Info.Name), - zap.Stringer("database", t.Db.Name)) + zap.Stringer("db", t.Db.Name), + zap.Stringer("table", t.Info.Name)) return err } log.Debug("table created and send to next", diff --git a/pkg/restore/pipeline_items.go b/pkg/restore/pipeline_items.go index 8cc830d6b..d99f5bd8e 100644 --- a/pkg/restore/pipeline_items.go +++ b/pkg/restore/pipeline_items.go @@ -121,7 +121,7 @@ func Exhaust(ec <-chan error) []error { out = append(out, err) default: // errCh will NEVER be closed(ya see, it has multi sender-part), - // so we just consume the current backlog of this cannel, then return. + // so we just consume the current backlog of this channel, then return. return out } } @@ -152,7 +152,7 @@ func NewTiKVSender( if removeTiFlash { tiflashStores, err := conn.GetAllTiKVStores(ctx, cli.GetPDClient(), conn.TiFlashOnly) if err != nil { - log.Error("failed to get and remove TiFlash replicas", zap.Error(errors.Trace(err))) + log.Error("failed to get and remove TiFlash replicas", zap.Error(err)) return nil, err } for _, store := range tiflashStores { From 460331f0d80d7869767422a8783446498557bf61 Mon Sep 17 00:00:00 2001 From: yujuncen Date: Mon, 15 Jun 2020 11:13:47 +0800 Subject: [PATCH 50/52] restore,task: run Leave always, and modify some log level --- pkg/restore/batcher.go | 53 ++++++++++++++++++++++++++++++++---------- pkg/task/restore.go | 9 +++---- 2 files changed, 44 insertions(+), 18 deletions(-) diff --git a/pkg/restore/batcher.go b/pkg/restore/batcher.go index 1585d973a..f8b7e18dc 100644 --- a/pkg/restore/batcher.go +++ b/pkg/restore/batcher.go @@ -60,7 +60,9 @@ func (b *Batcher) Len() int { return int(atomic.LoadInt32(&b.size)) } -// NewBatcher creates a new batcher by client and updateCh. +// NewBatcher creates a new batcher by a sender and a context manager. +// the former defines how the 'restore' a batch(i.e. send, or 'push down' the task to where). +// the context manager defines the 'lifetime' of restoring tables(i.e. how to enter 'restore' mode, and how to exit). // this batcher will work background, send batches per second, or batch size reaches limit. // and it will emit full-restored tables to the output channel returned. func NewBatcher( @@ -91,8 +93,9 @@ func NewBatcher( // we make this function for disable AutoCommit in some case. func (b *Batcher) EnableAutoCommit(ctx context.Context, delay time.Duration) { if b.autoCommitJoiner != nil { - log.Warn("enable auto commit on a batcher that auto commit is enabled, nothing will happen") - log.Info("if desire(e.g. change the peroid of auto commit), please disable auto commit firstly") + // IMO, making two auto commit goroutine wouldn't be a good idea. + // If desire(e.g. change the peroid of auto commit), please disable auto commit firstly. + log.L().DPanic("enabling auto commit on a batcher that auto commit has been enabled, which isn't allowed") } joiner := make(chan struct{}) go b.autoCommitWorker(ctx, joiner, delay) @@ -196,6 +199,24 @@ func newDrainResult() drainResult { } } +// drainRanges 'drains' ranges from current tables. +// for example, let a '-' character be a range, assume we have: +// |---|-----|-------| +// |t1 |t2 |t3 | +// after we run drainRanges() with batchSizeThreshold = 6, let '*' be the ranges will be sent this batch : +// |***|***--|-------| +// |t1 |t2 |-------| +// +// drainRanges() will return: +// TablesToSend: [t1, t2] (so we can make them enter restore mode) +// BlankTableAfterSend: [t1] (so we can make them leave restore mode after restoring this batch) +// RewriteRules: rewrite rules for [t1, t2] (so we can restore them) +// Ranges: those stared ranges (so we can restore them) +// +// then, it will leaving the batcher's cachedTables like this: +// |--|-------| +// |t2|t3 | +// as you can see, all restored ranges would be removed. func (b *Batcher) drainRanges() drainResult { result := newDrainResult() @@ -255,6 +276,23 @@ func (b *Batcher) Send(ctx context.Context) ([]CreatedTable, error) { tbs := drainResult.TablesToSend ranges := drainResult.Ranges + defer func() { + if err := b.manager.Leave(ctx, drainResult.BlankTablesAfterSend); err != nil { + log.Error("encountering error when leaving recover mode, we can go on but some regions may stick on restore mode", + append( + ZapRanges(ranges), + ZapTables(tbs), + zap.Error(err))..., + ) + } + if len(drainResult.BlankTablesAfterSend) > 0 { + log.Debug("table fully restored", + ZapTables(drainResult.BlankTablesAfterSend), + zap.Int("ranges", len(ranges)), + ) + } + }() + log.Info("restore batch start", append( ZapRanges(ranges), @@ -268,15 +306,6 @@ func (b *Batcher) Send(ctx context.Context) ([]CreatedTable, error) { if err := b.sender.RestoreBatch(ctx, ranges, drainResult.RewriteRules); err != nil { return nil, err } - if err := b.manager.Leave(ctx, drainResult.BlankTablesAfterSend); err != nil { - return nil, err - } - if len(drainResult.BlankTablesAfterSend) > 0 { - log.Debug("table fully restored", - ZapTables(drainResult.BlankTablesAfterSend), - zap.Int("ranges", len(ranges)), - ) - } return drainResult.BlankTablesAfterSend, nil } diff --git a/pkg/task/restore.go b/pkg/task/restore.go index f01769be7..684665135 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -272,7 +272,8 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf return nil } -// dropToBlackhole drop all incoming tables into black hole. +// dropToBlackhole drop all incoming tables into black hole, +// i.e. don't execute checksum, just increase the process anyhow. func dropToBlackhole( ctx context.Context, tableStream <-chan restore.CreatedTable, @@ -289,15 +290,11 @@ func dropToBlackhole( case <-ctx.Done(): errCh <- ctx.Err() return - case tbl, ok := <-tableStream: + case _, ok := <-tableStream: if !ok { return } updateCh.Inc() - log.Info("skipping checksum of table because user config", - zap.Stringer("database", tbl.OldTable.Db.Name), - zap.Stringer("table", tbl.Table.Name), - ) } } }() From 0ee5223f2783e6e14a41679aed2fcb10fd69241e Mon Sep 17 00:00:00 2001 From: yujuncen Date: Mon, 15 Jun 2020 11:51:12 +0800 Subject: [PATCH 51/52] restore: fix a bug that may cause checksum time incorrect --- pkg/restore/client.go | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pkg/restore/client.go b/pkg/restore/client.go index a94311279..2f315e1f8 100644 --- a/pkg/restore/client.go +++ b/pkg/restore/client.go @@ -739,20 +739,17 @@ func (rc *Client) GoValidateChecksum( errCh chan<- error, updateCh glue.Progress, ) <-chan struct{} { - start := time.Now() - defer func() { - elapsed := time.Since(start) - summary.CollectDuration("restore checksum", elapsed) - }() - log.Info("Start to validate checksum") outCh := make(chan struct{}, 1) workers := utils.NewWorkerPool(defaultChecksumConcurrency, "RestoreChecksum") go func() { + start := time.Now() wg := new(sync.WaitGroup) defer func() { log.Info("all checksum ended") wg.Wait() + elapsed := time.Since(start) + summary.CollectDuration("restore checksum", elapsed) outCh <- struct{}{} close(outCh) }() From 77ab77f8cbd9114600c389d2e6a63b1a46c84249 Mon Sep 17 00:00:00 2001 From: yujuncen Date: Mon, 15 Jun 2020 17:30:09 +0800 Subject: [PATCH 52/52] restore: don't Leave if never Enter --- pkg/restore/batcher.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pkg/restore/batcher.go b/pkg/restore/batcher.go index f8b7e18dc..1a4c1256d 100644 --- a/pkg/restore/batcher.go +++ b/pkg/restore/batcher.go @@ -276,6 +276,16 @@ func (b *Batcher) Send(ctx context.Context) ([]CreatedTable, error) { tbs := drainResult.TablesToSend ranges := drainResult.Ranges + log.Info("restore batch start", + append( + ZapRanges(ranges), + ZapTables(tbs), + )..., + ) + + if err := b.manager.Enter(ctx, drainResult.TablesToSend); err != nil { + return nil, err + } defer func() { if err := b.manager.Leave(ctx, drainResult.BlankTablesAfterSend); err != nil { log.Error("encountering error when leaving recover mode, we can go on but some regions may stick on restore mode", @@ -293,16 +303,6 @@ func (b *Batcher) Send(ctx context.Context) ([]CreatedTable, error) { } }() - log.Info("restore batch start", - append( - ZapRanges(ranges), - ZapTables(tbs), - )..., - ) - - if err := b.manager.Enter(ctx, drainResult.TablesToSend); err != nil { - return nil, err - } if err := b.sender.RestoreBatch(ctx, ranges, drainResult.RewriteRules); err != nil { return nil, err }