From fec35edcbaf999a69f344fab66dc27f527fb12ee Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Wed, 12 Jun 2019 17:13:25 +0800 Subject: [PATCH 01/24] *: use `reader` to reader binlog from upstream; do not check gap anymore --- pkg/streamer/reader.go | 6 - relay/relay.go | 282 +++++------------------------------------ relay/util.go | 23 ---- 3 files changed, 35 insertions(+), 276 deletions(-) diff --git a/pkg/streamer/reader.go b/pkg/streamer/reader.go index f64151b543..2cc352da43 100644 --- a/pkg/streamer/reader.go +++ b/pkg/streamer/reader.go @@ -263,12 +263,6 @@ func (r *BinlogReader) parseFile( onEventFunc := func(e *replication.BinlogEvent) error { log.Debugf("[streamer] read event %+v", e.Header) - if e.Header.Flags&0x0040 != 0 { - // now LOG_EVENT_RELAY_LOG_F is only used for events which used to fill the gap in relay log file when switching the master server - log.Debugf("skip event %+v created by relay writer", e.Header) - return nil - } - r.latestServerID = e.Header.ServerID // record server_id switch e.Header.EventType { diff --git a/relay/relay.go b/relay/relay.go index 55cb56dd03..ebc7aaea5f 100644 --- a/relay/relay.go +++ b/relay/relay.go @@ -21,10 +21,8 @@ import ( "fmt" "hash/crc32" "io" - "math" "os" "path" - "path/filepath" "strings" "sync" "time" @@ -38,17 +36,16 @@ import ( "github.com/pingcap/dm/dm/pb" "github.com/pingcap/dm/dm/unit" "github.com/pingcap/dm/pkg/binlog" - binlogreader "github.com/pingcap/dm/pkg/binlog/reader" fr "github.com/pingcap/dm/pkg/func-rollback" "github.com/pingcap/dm/pkg/gtid" "github.com/pingcap/dm/pkg/log" pkgstreamer "github.com/pingcap/dm/pkg/streamer" "github.com/pingcap/dm/pkg/utils" + "github.com/pingcap/dm/relay/common" + "github.com/pingcap/dm/relay/reader" ) var ( - // ErrBinlogPosGreaterThanFileSize represents binlog pos greater than file size - ErrBinlogPosGreaterThanFileSize = errors.New("the specific position is greater than the local binlog file size") // ErrNoIncompleteEventFound represents no incomplete event found in relay log file ErrNoIncompleteEventFound = errors.New("no incomplete event found in relay log file") // used to fill RelayLogInfo @@ -107,9 +104,7 @@ type Relay struct { db *sql.DB cfg *Config syncerCfg replication.BinlogSyncerConfig - reader binlogreader.Reader - gapSyncerCfg replication.BinlogSyncerConfig meta Meta lastSlaveConnectionID uint32 fd *os.File @@ -139,12 +134,6 @@ func NewRealRelay(cfg *Config) Process { VerifyChecksum: true, } - // use 2**32 -1 - ServerID as new ServerID to fill the gap in relay log file - gapSyncerCfg := syncerCfg - gapSyncerCfg.ServerID = math.MaxUint32 - syncerCfg.ServerID - // for gap streamer, we always use rawMode to avoid `invalid table id, no corresponding table map event` error event starting sync from a RowsEvent - gapSyncerCfg.RawModeEnabled = true - if !cfg.EnableGTID { // for rawMode(true), we only parse FormatDescriptionEvent and RotateEvent // if not need to support GTID mode, we can enable rawMode @@ -158,10 +147,9 @@ func NewRealRelay(cfg *Config) Process { } return &Relay{ - cfg: cfg, - syncerCfg: syncerCfg, - gapSyncerCfg: gapSyncerCfg, - meta: NewLocalMeta(cfg.Flavor, cfg.RelayDir), + cfg: cfg, + syncerCfg: syncerCfg, + meta: NewLocalMeta(cfg.Flavor, cfg.RelayDir), } } @@ -201,8 +189,6 @@ func (r *Relay) Init() (err error) { // Process implements the dm.Unit interface. func (r *Relay) Process(ctx context.Context, pr chan pb.ProcessResult) { - r.reader = binlogreader.NewTCPReader(r.syncerCfg) - errs := make([]*pb.ProcessError, 0, 1) err := r.process(ctx) if err != nil && errors.Cause(err) != replication.ErrSyncClosed { @@ -265,40 +251,25 @@ func (r *Relay) process(parentCtx context.Context) error { r.updateMetricsRelaySubDirIndex() } - err = r.setUpReader() + // TODO: do recover before reading from upstream. + reader2, err := r.setUpReader() if err != nil { return errors.Trace(err) } + defer func() { + err = reader2.Close() + if err != nil { + log.Errorf("[relay] close binlog event reader error %v", err) + } + }() var ( - _, lastPos = r.meta.Pos() - _, lastGTID = r.meta.GTID() - tryReSync = true // used to handle master-slave switch - addRelayFlag = false // whether needing to add a LOG_EVENT_RELAY_LOG_F flag to the event - - // fill gap steps: - // 1. record the pos after the gap - // 2. create a special streamer to fill the gap - // 3. catchup pos after the gap - // 4. close the special streamer - gapReader binlogreader.Reader - gapSyncStartPos *mysql.Position // the pos of the event starting to fill the gap - gapSyncEndPos *mysql.Position // the pos of the event after the gap - eventFormat *replication.FormatDescriptionEvent // latest FormatDescriptionEvent, used when re-calculate checksum + _, lastPos = r.meta.Pos() + _, lastGTID = r.meta.GTID() + tryReSync = true // used to handle master-slave switch ) - closeGapSyncer := func() { - addRelayFlag = false // reset to false when closing the gap syncer - if gapReader != nil { - gapReader.Close() - gapReader = nil - } - gapSyncStartPos = nil - gapSyncEndPos = nil - } - defer func() { - closeGapSyncer() if r.fd != nil { r.fd.Close() } @@ -307,35 +278,10 @@ func (r *Relay) process(parentCtx context.Context) error { go r.doIntervalOps(parentCtx) for { - if gapReader == nil && gapSyncStartPos != nil && gapSyncEndPos != nil { - gapReader = binlogreader.NewTCPReader(r.gapSyncerCfg) - err = gapReader.StartSyncByPos(*gapSyncStartPos) - if err != nil { - return errors.Annotatef(err, "start to fill gap in relay log file from %v", gapSyncStartPos) - } - log.Infof("[relay] start to fill gap in relay log file from %v", gapSyncStartPos) - if r.cfg.EnableGTID { - _, currentGtidSet := r.meta.GTID() - if currentGtidSet.Equal(r.gSetWhenSwitch) { - addRelayFlag = true - log.Infof("[relay] binlog events in the gap for file %s already exists in previous sub directory, adding a LOG_EVENT_RELAY_LOG_F flag to them. current GTID set %s", r.fd.Name(), currentGtidSet) - } else if currentGtidSet.Contain(r.gSetWhenSwitch) { - // only after the first gap filled, currentGtidSet can be the superset of gSetWhenSwitch - log.Infof("[relay] binlog events in the gap for file %s not exists in previous sub directory, no need to add a LOG_EVENT_RELAY_LOG_F flag. current GTID set %s, previous sub directory end GTID set %s", r.fd.Name(), currentGtidSet, r.gSetWhenSwitch) - } else { - return errors.Errorf("currnet GTID set %s is not equal or superset of previous sub directory end GTID set %s", currentGtidSet, r.gSetWhenSwitch) - } - } - } - ctx, cancel := context.WithTimeout(parentCtx, eventTimeout) readTimer := time.Now() var e *replication.BinlogEvent - if gapReader != nil { - e, err = gapReader.GetEvent(ctx) - } else { - e, err = r.reader.GetEvent(ctx) - } + e, err = reader2.GetEvent(ctx) cancel() binlogReadDurationHistogram.Observe(time.Since(readTimer).Seconds()) @@ -352,38 +298,8 @@ func (r *Relay) process(parentCtx context.Context) error { // do nothing default: if utils.IsErrBinlogPurged(err) { - if gapReader != nil { - pos, err2 := tryFindGapStartPos(err, r.fd.Name()) - if err2 == nil { - log.Errorf("[relay] %s", err.Error()) // log the error - gapSyncEndPos2 := *gapSyncEndPos // save the endPos - closeGapSyncer() // close the previous gap streamer - _, err2 = r.fd.Seek(int64(pos), io.SeekStart) // seek to a valid pos - if err2 != nil { - return errors.Annotatef(err, "seek %s to pos %d", r.fd.Name(), pos) - } - err2 = r.fd.Truncate(int64(pos)) // truncate the incomplete event part - if err2 != nil { - return errors.Annotatef(err, "truncate %s to size %d", r.fd.Name(), pos) - } - log.Infof("[relay] truncate %s to size %d", r.fd.Name(), pos) - gapSyncStartPos = &mysql.Position{ - Name: lastPos.Name, - Pos: pos, - } - gapSyncEndPos = &gapSyncEndPos2 // reset the endPos - log.Infof("[relay] adjust gap streamer's start pos to %d", pos) - continue - } - return errors.Annotatef(err, "gap streamer") - } if tryReSync && r.cfg.EnableGTID && r.cfg.AutoFixGTID { - err = r.reSetUpReader(r.syncerCfg) - if err != nil { - return errors.Annotatef(err, "try auto switch with GTID") - } - tryReSync = false // do not support repeat try re-sync - continue + // TODO: try auto fix GTID } } binlogReadErrorCounter.Inc() @@ -398,7 +314,6 @@ func (r *Relay) process(parentCtx context.Context) error { switch ev := e.Event.(type) { case *replication.FormatDescriptionEvent: // FormatDescriptionEvent is the first event in binlog, we will close old one and create a new - eventFormat = ev // record FormatDescriptionEvent exist, err := r.handleFormatDescriptionEvent(lastPos.Name) if err != nil { return errors.Trace(err) @@ -426,7 +341,7 @@ func (r *Relay) process(parentCtx context.Context) error { // even for `BEGIN`, we still update pos / GTID lastPos.Pos = e.Header.LogPos lastGTID.Set(ev.GSet) // in order to call `ev.GSet`, can not combine QueryEvent and XIDEvent - isDDL := checkIsDDL(string(ev.Query), parser2) + isDDL := common.CheckIsDDL(string(ev.Query), parser2) if isDDL { needSavePos = true // need save pos for DDL } @@ -452,37 +367,6 @@ func (r *Relay) process(parentCtx context.Context) error { } } - if gapReader == nil { - gapDetected, fSize, err := r.detectGap(e) - if err != nil { - relayLogWriteErrorCounter.Inc() - return errors.Annotatef(err, "detect relay log file gap for event %+v", e.Header) - } - if gapDetected { - gapSyncStartPos = &mysql.Position{ - Name: lastPos.Name, - Pos: fSize, - } - gapSyncEndPos = &mysql.Position{ - Name: lastPos.Name, - Pos: e.Header.LogPos, - } - log.Infof("[relay] gap detected from %d to %d in %s, current event %+v", fSize, e.Header.LogPos-e.Header.EventSize, lastPos.Name, e.Header) - continue // skip this event after the gap, it will be wrote to the file when filling the gap - } - } else { - // why check gapSyncEndPos != nil? - if gapSyncEndPos != nil && e.Header.LogPos >= gapSyncEndPos.Pos { - // catch up, after write this event, gap will be filled - log.Infof("[relay] fill gap reaching the end pos %v, current event %+v", gapSyncEndPos.String(), e.Header) - closeGapSyncer() - } else if addRelayFlag { - // add LOG_EVENT_RELAY_LOG_F flag to events which used to fill the gap - // ref: https://dev.mysql.com/doc/internals/en/binlog-event-flag.html - r.addFlagToEvent(e, 0x0040, eventFormat) - } - } - cmp, fSize, err := r.compareEventWithFileSize(e) if err != nil { relayLogWriteErrorCounter.Inc() @@ -785,120 +669,28 @@ func (r *Relay) checkFormatDescriptionEventExists(filename string) (exists bool, return false, nil } -func (r *Relay) setUpReader() error { - defer func() { - status := r.reader.Status() - log.Infof("[relay] set up binlog reader with status %s", status) - }() - - if r.cfg.EnableGTID { - return r.setUpReaderByGTID() - } - return r.setUpReaderByPos() -} - -func (r *Relay) setUpReaderByGTID() error { - uuid, gs := r.meta.GTID() - log.Infof("[relay] start sync for master(%s, %s) from GTID set %s", r.masterNode(), uuid, gs) - - err := r.reader.StartSyncByGTID(gs) - if err != nil { - log.Errorf("[relay] start sync in GTID mode from %s error %v", gs.String(), err) - return r.setUpReaderByPos() - } - - return nil -} - -func (r *Relay) setUpReaderByPos() error { - // if the first binlog not exists in local, we should fetch from the first position, whatever the specific position is. +// setUpReader setups the underlying reader used to read binlog events from the upstream master server. +func (r *Relay) setUpReader() (reader.Reader, error) { uuid, pos := r.meta.Pos() - log.Infof("[relay] start sync for master (%s, %s) from %s", r.masterNode(), uuid, pos.String()) - if pos.Name == "" { - // let mysql decides - return r.reader.StartSyncByPos(pos) - } - if stat, err := os.Stat(filepath.Join(r.meta.Dir(), pos.Name)); os.IsNotExist(err) { - log.Infof("[relay] should sync from %s:4 instead of %s:%d because the binlog file not exists in local before and should sync from the very beginning", pos.Name, pos.Name, pos.Pos) - pos.Pos = 4 - } else if err != nil { - return errors.Trace(err) - } else { - if stat.Size() > int64(pos.Pos) { - // it means binlog file already exists, and the local binlog file already contains the specific position - // so we can just fetch from the biggest position, that's the stat.Size() - // - // NOTE: is it possible the data from pos.Pos to stat.Size corrupt - log.Infof("[relay] the binlog file %s already contains position %d, so we should sync from %d", pos.Name, pos.Pos, stat.Size()) - pos.Pos = uint32(stat.Size()) - err := r.meta.Save(pos, nil) - if err != nil { - return errors.Trace(err) - } - } else if stat.Size() < int64(pos.Pos) { - // in such case, we should stop immediately and check - return errors.Annotatef(ErrBinlogPosGreaterThanFileSize, "%s size=%d, specific pos=%d", pos.Name, stat.Size(), pos.Pos) - } + _, gs := r.meta.GTID() + cfg := &reader.Config{ + SyncConfig: r.syncerCfg, + Pos: pos, + GTIDs: gs, + MasterID: r.masterNode(), + EnableGTID: r.cfg.EnableGTID, } - return r.reader.StartSyncByPos(pos) -} - -// reSyncBinlog re-tries sync binlog when master-slave switched -func (r *Relay) reSyncBinlog(cfg replication.BinlogSyncerConfig) error { - err := r.retrySyncGTIDs() + reader2 := reader.NewReader(cfg) + err := reader2.Start() if err != nil { - return errors.Trace(err) + // do not log the whole config to protect the password in `SyncConfig`. + // and other config items should already logged before or included in `err`. + return nil, errors.Annotatef(err, "start reader for UUID %s", uuid) } - return r.reSetUpReader(cfg) -} -// retrySyncGTIDs try to auto fix GTID set -// assume that reset master before switching to new master, and only the new master would write -// it's a weak function to try best to fix GTID set while switching master/slave -func (r *Relay) retrySyncGTIDs() error { - // TODO: now we don't implement quering GTID from MariaDB, implement it later - if r.cfg.Flavor != mysql.MySQLFlavor { - return nil - } - _, oldGTIDSet := r.meta.GTID() - log.Infof("[relay] start retry sync with old GTID %s", oldGTIDSet.String()) - - _, newGTIDSet, err := utils.GetMasterStatus(r.db, r.cfg.Flavor) - if err != nil { - return errors.Annotatef(err, "get master status") - } - log.Infof("[relay] new master GTID set %v", newGTIDSet) - - masterUUID, err := utils.GetServerUUID(r.db, r.cfg.Flavor) - if err != nil { - return errors.Annotatef(err, "get master UUID") - } - log.Infof("master UUID %s", masterUUID) - - oldGTIDSet.Replace(newGTIDSet, []interface{}{masterUUID}) - - // add sub relay dir for new master server - // save and flush meta for new master server - err = r.meta.AddDir(masterUUID, nil, oldGTIDSet) - if err != nil { - return errors.Annotatef(err, "add sub relay directory for master server %s", masterUUID) - } - - r.updateMetricsRelaySubDirIndex() - - return nil -} - -func (r *Relay) reSetUpReader(cfg replication.BinlogSyncerConfig) error { - if r.reader != nil { - err := r.reader.Close() - r.reader = nil - if err != nil { - return errors.Trace(err) - } - } - return r.setUpReader() + log.Infof("[relay] started underlying reader for UUID %s ", uuid) + return reader2, nil } func (r *Relay) masterNode() string { @@ -912,10 +704,6 @@ func (r *Relay) IsClosed() bool { // stopSync stops syncing, now it used by Close and Pause func (r *Relay) stopSync() { - if r.reader != nil { - r.reader.Close() - r.reader = nil - } if r.fd != nil { r.fd.Close() r.fd = nil diff --git a/relay/util.go b/relay/util.go index b4fdf9038a..d0bc465b53 100644 --- a/relay/util.go +++ b/relay/util.go @@ -17,34 +17,11 @@ import ( "strings" "github.com/pingcap/errors" - "github.com/pingcap/parser" - "github.com/pingcap/parser/ast" "github.com/siddontang/go-mysql/replication" - parserpkg "github.com/pingcap/dm/pkg/parser" "github.com/pingcap/dm/pkg/utils" ) -// checkIsDDL checks input SQL whether is a valid DDL statement -func checkIsDDL(sql string, p *parser.Parser) bool { - sql = utils.TrimCtrlChars(sql) - - // if parse error, treat it as not a DDL - stmts, err := parserpkg.Parse(p, sql, "", "") - if err != nil || len(stmts) == 0 { - return false - } - - stmt := stmts[0] - switch stmt.(type) { - case ast.DDLNode: - return true - default: - // some this like `BEGIN` - return false - } -} - // searchLastCompleteEventPos searches the last complete event (end) pos with an incomplete event at the end of the file. // NOTE: test this when we can generate various types of binlog events func searchLastCompleteEventPos(file string) (uint32, error) { From 3e6a3050689712412357e9590a52edd0c9aad797 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Wed, 12 Jun 2019 18:00:33 +0800 Subject: [PATCH 02/24] *: let `reader` handle more errors --- relay/reader/error.go | 10 ++++++ relay/reader/error_test.go | 23 ++++++++++--- relay/reader/reader.go | 23 +++++++++---- relay/reader/reader_test.go | 69 +++++++++++++++++++------------------ relay/relay.go | 17 +++++---- 5 files changed, 90 insertions(+), 52 deletions(-) diff --git a/relay/reader/error.go b/relay/reader/error.go index 39c7bc2dcb..40d38c715a 100644 --- a/relay/reader/error.go +++ b/relay/reader/error.go @@ -28,3 +28,13 @@ func isIgnorableError(err error) bool { } return false } + +// isRetryableError checks whether the error is retryable. +func isRetryableError(err error) bool { + err = errors.Cause(err) + switch err { + case context.DeadlineExceeded: + return true + } + return false +} diff --git a/relay/reader/error_test.go b/relay/reader/error_test.go index 6fd61bf194..095bc3a297 100644 --- a/relay/reader/error_test.go +++ b/relay/reader/error_test.go @@ -16,26 +16,39 @@ package reader import ( "context" - . "github.com/pingcap/check" + "github.com/pingcap/check" "github.com/pingcap/errors" ) var ( - _ = Suite(&testErrorSuite{}) + _ = check.Suite(&testErrorSuite{}) ) type testErrorSuite struct { } -func (t *testErrorSuite) TestIgnorable(c *C) { +func (t *testErrorSuite) TestIgnorable(c *check.C) { err := errors.New("custom error") - c.Assert(isIgnorableError(err), IsFalse) + c.Assert(isIgnorableError(err), check.IsFalse) cases := []error{ context.Canceled, errors.Annotate(context.Canceled, "annotated"), } for _, cs := range cases { - c.Assert(isIgnorableError(cs), IsTrue) + c.Assert(isIgnorableError(cs), check.IsTrue) + } +} + +func (t *testErrorSuite) TestRetryable(c *check.C) { + err := errors.New("custom error") + c.Assert(isRetryableError(err), check.IsFalse) + + cases := []error{ + context.DeadlineExceeded, + errors.Annotate(context.DeadlineExceeded, "annotated"), + } + for _, cs := range cases { + c.Assert(isRetryableError(cs), check.IsTrue) } } diff --git a/relay/reader/reader.go b/relay/reader/reader.go index 2f8803ceb9..34bf3cf2f7 100644 --- a/relay/reader/reader.go +++ b/relay/reader/reader.go @@ -27,6 +27,13 @@ import ( "github.com/pingcap/dm/pkg/log" ) +// Result represents a read operation result. +type Result struct { + Event *replication.BinlogEvent + ErrIgnorable bool // the error is ignorable + ErrRetryable bool // the error is retryable +} + // Reader reads binlog events from a upstream master server. // The read binlog events should be send to a transformer. // The reader should support: @@ -42,7 +49,7 @@ type Reader interface { // GetEvent gets the binlog event one by one, it will block if no event can be read. // You can pass a context (like Cancel or Timeout) to break the block. - GetEvent(ctx context.Context) (*replication.BinlogEvent, error) + GetEvent(ctx context.Context) (Result, error) } // Config is the configuration used by the Reader. @@ -116,24 +123,26 @@ func (r *reader) Close() error { // GetEvent implements Reader.GetEvent. // If some ignorable error occurred, the returned event and error both are nil. // NOTE: can only close the reader after this returned. -func (r *reader) GetEvent(ctx context.Context) (*replication.BinlogEvent, error) { +func (r *reader) GetEvent(ctx context.Context) (Result, error) { r.mu.RLock() defer r.mu.RUnlock() + var result Result if r.stage != common.StagePrepared { - return nil, errors.Errorf("stage %s, expect %s, please start the reader first", r.stage, common.StagePrepared) + return result, errors.Errorf("stage %s, expect %s, please start the reader first", r.stage, common.StagePrepared) } for { ev, err := r.in.GetEvent(ctx) // NOTE: add retryable error support if needed later if err == nil { - return ev, nil + result.Event = ev } else if isIgnorableError(err) { - log.Warnf("[relay] get event with ignorable error %s", err) - return nil, nil // return without error and also without binlog event + result.ErrIgnorable = true + } else if isRetryableError(err) { + result.ErrRetryable = true } - return nil, errors.Trace(err) + return result, errors.Trace(err) } } diff --git a/relay/reader/reader_test.go b/relay/reader/reader_test.go index 5135faace9..4ea5e246dc 100644 --- a/relay/reader/reader_test.go +++ b/relay/reader/reader_test.go @@ -18,7 +18,7 @@ import ( "testing" "time" - . "github.com/pingcap/check" + "github.com/pingcap/check" "github.com/pingcap/errors" "github.com/siddontang/go-mysql/replication" @@ -26,17 +26,17 @@ import ( ) var ( - _ = Suite(&testReaderSuite{}) + _ = check.Suite(&testReaderSuite{}) ) func TestSuite(t *testing.T) { - TestingT(t) + check.TestingT(t) } type testReaderSuite struct { } -func (t *testReaderSuite) TestInterface(c *C) { +func (t *testReaderSuite) TestInterface(c *check.C) { cases := []*replication.BinlogEvent{ {RawData: []byte{1}}, {RawData: []byte{2}}, @@ -60,18 +60,18 @@ func (t *testReaderSuite) TestInterface(c *C) { t.testInterfaceWithReader(c, r, cases) } -func (t *testReaderSuite) testInterfaceWithReader(c *C, r Reader, cases []*replication.BinlogEvent) { +func (t *testReaderSuite) testInterfaceWithReader(c *check.C, r Reader, cases []*replication.BinlogEvent) { // replace underlying reader with a mock reader for testing concreteR := r.(*reader) - c.Assert(concreteR, NotNil) + c.Assert(concreteR, check.NotNil) mockR := br.NewMockReader() concreteR.in = mockR // start reader err := r.Start() - c.Assert(err, IsNil) + c.Assert(err, check.IsNil) err = r.Start() // call multi times - c.Assert(err, NotNil) + c.Assert(err, check.NotNil) // getEvent by pushing event to mock reader ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) @@ -79,33 +79,33 @@ func (t *testReaderSuite) testInterfaceWithReader(c *C, r Reader, cases []*repli concreteMR := mockR.(*br.MockReader) go func() { for _, cs := range cases { - c.Assert(concreteMR.PushEvent(ctx, cs), IsNil) + c.Assert(concreteMR.PushEvent(ctx, cs), check.IsNil) } }() obtained := make([]*replication.BinlogEvent, 0, len(cases)) for { - ev, err2 := r.GetEvent(ctx) - c.Assert(err2, IsNil) - obtained = append(obtained, ev) + result, err2 := r.GetEvent(ctx) + c.Assert(err2, check.IsNil) + obtained = append(obtained, result.Event) if len(obtained) == len(cases) { break } } - c.Assert(obtained, DeepEquals, cases) + c.Assert(obtained, check.DeepEquals, cases) // close reader err = r.Close() - c.Assert(err, IsNil) + c.Assert(err, check.IsNil) err = r.Close() - c.Assert(err, NotNil) // call multi times + c.Assert(err, check.NotNil) // call multi times // getEvent from a closed reader - ev, err := r.GetEvent(ctx) - c.Assert(err, NotNil) - c.Assert(ev, IsNil) + result, err := r.GetEvent(ctx) + c.Assert(err, check.NotNil) + c.Assert(result.Event, check.IsNil) } -func (t *testReaderSuite) TestGetEventWithError(c *C) { +func (t *testReaderSuite) TestGetEventWithError(c *check.C) { cfg := &Config{ SyncConfig: replication.BinlogSyncerConfig{ ServerID: 101, @@ -116,24 +116,28 @@ func (t *testReaderSuite) TestGetEventWithError(c *C) { r := NewReader(cfg) // replace underlying reader with a mock reader for testing concreteR := r.(*reader) - c.Assert(concreteR, NotNil) + c.Assert(concreteR, check.NotNil) mockR := br.NewMockReader() concreteR.in = mockR errOther := errors.New("other error") in := []error{ - context.DeadlineExceeded, // should be handled in the outer context.Canceled, // ignorable + context.DeadlineExceeded, // retryable errOther, } - expected := []error{ - context.DeadlineExceeded, - nil, // from ignorable - errOther, + expected := []Result{ + { + ErrIgnorable: true, + }, + { + ErrRetryable: true, + }, + {}, } err := r.Start() - c.Assert(err, IsNil) + c.Assert(err, check.IsNil) // getEvent by pushing event to mock reader ctx, cancel := context.WithCancel(context.Background()) @@ -141,19 +145,18 @@ func (t *testReaderSuite) TestGetEventWithError(c *C) { concreteMR := mockR.(*br.MockReader) go func() { for _, cs := range in { - c.Assert(concreteMR.PushError(ctx, cs), IsNil) + c.Assert(concreteMR.PushError(ctx, cs), check.IsNil) } }() - obtained := make([]error, 0, len(expected)) + results := make([]Result, 0, len(expected)) for { - ev, err2 := r.GetEvent(ctx) - err2 = errors.Cause(err2) - c.Assert(ev, IsNil) - obtained = append(obtained, err2) + result, err2 := r.GetEvent(ctx) + c.Assert(err2, check.NotNil) + results = append(results, result) if err2 == errOther { break // all received } } - c.Assert(obtained, DeepEquals, expected) + c.Assert(results, check.DeepEquals, expected) } diff --git a/relay/relay.go b/relay/relay.go index ebc7aaea5f..84a3d3b058 100644 --- a/relay/relay.go +++ b/relay/relay.go @@ -267,6 +267,7 @@ func (r *Relay) process(parentCtx context.Context) error { _, lastPos = r.meta.Pos() _, lastGTID = r.meta.GTID() tryReSync = true // used to handle master-slave switch + rResult reader.Result ) defer func() { @@ -280,22 +281,23 @@ func (r *Relay) process(parentCtx context.Context) error { for { ctx, cancel := context.WithTimeout(parentCtx, eventTimeout) readTimer := time.Now() - var e *replication.BinlogEvent - e, err = reader2.GetEvent(ctx) + rResult, err = reader2.GetEvent(ctx) cancel() binlogReadDurationHistogram.Observe(time.Since(readTimer).Seconds()) if err != nil { - switch errors.Cause(err) { - case context.Canceled: + if rResult.ErrIgnorable { + log.Infof("relay] get ignorable error %v when reading binlog event", err) return nil - case context.DeadlineExceeded: - log.Infof("[relay] deadline %s exceeded, no binlog event received", eventTimeout) + } else if rResult.ErrRetryable { + log.Infof("relay] get retryable error %v when reading binlog event", err) continue + } + switch err { case replication.ErrChecksumMismatch: relayLogDataCorruptionCounter.Inc() case replication.ErrSyncClosed, replication.ErrNeedSyncAgain: - // do nothing + // do nothing, but the error will be returned default: if utils.IsErrBinlogPurged(err) { if tryReSync && r.cfg.EnableGTID && r.cfg.AutoFixGTID { @@ -310,6 +312,7 @@ func (r *Relay) process(parentCtx context.Context) error { needSavePos := false + e := rResult.Event log.Debugf("[relay] receive binlog event with header %+v", e.Header) switch ev := e.Event.(type) { case *replication.FormatDescriptionEvent: From f22c1051c3c03c0d96e3682aac06116beab5cb0e Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Wed, 12 Jun 2019 20:04:00 +0800 Subject: [PATCH 03/24] *: use `transformer` and `writer`; TODO, handle duplicated PreviousGTIDsEvent --- relay/relay.go | 286 ++++++-------------------- relay/transformer/transformer.go | 10 +- relay/transformer/transformer_test.go | 24 +-- 3 files changed, 75 insertions(+), 245 deletions(-) diff --git a/relay/relay.go b/relay/relay.go index 84a3d3b058..f31871d8cb 100644 --- a/relay/relay.go +++ b/relay/relay.go @@ -14,20 +14,16 @@ package relay import ( - "bytes" "context" "database/sql" - "encoding/binary" "fmt" - "hash/crc32" - "io" "os" - "path" "strings" "sync" "time" "github.com/pingcap/errors" + "github.com/pingcap/parser" "github.com/siddontang/go-mysql/mysql" "github.com/siddontang/go-mysql/replication" "github.com/siddontang/go/sync2" @@ -41,8 +37,9 @@ import ( "github.com/pingcap/dm/pkg/log" pkgstreamer "github.com/pingcap/dm/pkg/streamer" "github.com/pingcap/dm/pkg/utils" - "github.com/pingcap/dm/relay/common" "github.com/pingcap/dm/relay/reader" + "github.com/pingcap/dm/relay/transformer" + "github.com/pingcap/dm/relay/writer" ) var ( @@ -263,11 +260,25 @@ func (r *Relay) process(parentCtx context.Context) error { } }() + writer2, err := r.setUpWriter(parser2) + if err != nil { + return errors.Trace(err) + } + defer func() { + err = writer2.Close() + if err != nil { + log.Errorf("[relay] close binlog event writer error %v", err) + } + }() + + transformer2 := transformer.NewTransformer(parser2) + var ( _, lastPos = r.meta.Pos() _, lastGTID = r.meta.GTID() tryReSync = true // used to handle master-slave switch rResult reader.Result + wResult *writer.Result ) defer func() { @@ -279,6 +290,7 @@ func (r *Relay) process(parentCtx context.Context) error { go r.doIntervalOps(parentCtx) for { + // 1. reader events from upstream server ctx, cancel := context.WithTimeout(parentCtx, eventTimeout) readTimer := time.Now() rResult, err = reader2.GetEvent(ctx) @@ -287,10 +299,10 @@ func (r *Relay) process(parentCtx context.Context) error { if err != nil { if rResult.ErrIgnorable { - log.Infof("relay] get ignorable error %v when reading binlog event", err) + log.Infof("[relay] get ignorable error %v when reading binlog event", err) return nil } else if rResult.ErrRetryable { - log.Infof("relay] get retryable error %v when reading binlog event", err) + log.Infof("[relay] get retryable error %v when reading binlog event", err) continue } switch err { @@ -309,102 +321,50 @@ func (r *Relay) process(parentCtx context.Context) error { return errors.Trace(err) } tryReSync = true - - needSavePos := false - e := rResult.Event log.Debugf("[relay] receive binlog event with header %+v", e.Header) - switch ev := e.Event.(type) { - case *replication.FormatDescriptionEvent: - // FormatDescriptionEvent is the first event in binlog, we will close old one and create a new - exist, err := r.handleFormatDescriptionEvent(lastPos.Name) - if err != nil { - return errors.Trace(err) - } - if exist { - // exists previously, skip - continue - } - case *replication.RotateEvent: - // for RotateEvent, update binlog name - currentPos := mysql.Position{ - Name: string(ev.NextLogName), - Pos: uint32(ev.Position), - } - if currentPos.Name > lastPos.Name { - lastPos = currentPos + + // 2. transform events + tResult := transformer2.Transform(e) + if len(tResult.NextLogName) > 0 && tResult.NextLogName > lastPos.Name { + lastPos = mysql.Position{ + Name: string(tResult.NextLogName), + Pos: uint32(tResult.LogPos), } log.Infof("[relay] rotate to %s", lastPos.String()) - if e.Header.Timestamp == 0 || e.Header.LogPos == 0 { - // skip fake rotate event - continue - } - case *replication.QueryEvent: - // when RawModeEnabled not true, QueryEvent will be parsed - // even for `BEGIN`, we still update pos / GTID - lastPos.Pos = e.Header.LogPos - lastGTID.Set(ev.GSet) // in order to call `ev.GSet`, can not combine QueryEvent and XIDEvent - isDDL := common.CheckIsDDL(string(ev.Query), parser2) - if isDDL { - needSavePos = true // need save pos for DDL - } - case *replication.XIDEvent: - // when RawModeEnabled not true, XIDEvent will be parsed - lastPos.Pos = e.Header.LogPos - lastGTID.Set(ev.GSet) - needSavePos = true // need save pos for XID - case *replication.GenericEvent: - // handle some un-parsed events - switch e.Header.EventType { - case replication.HEARTBEAT_EVENT: - // skip artificial heartbeat event - // ref: https://dev.mysql.com/doc/internals/en/heartbeat-event.html - continue - } - default: - if e.Header.Flags&0x0020 != 0 { - // skip events with LOG_EVENT_ARTIFICIAL_F flag set - // ref: https://dev.mysql.com/doc/internals/en/binlog-event-flag.html - log.Warnf("[relay] skip artificial event %+v", e.Header) - continue - } + } + if tResult.Ignore { + log.Infof("[relay] ignore event %+v by transformer", e.Header) + continue } - cmp, fSize, err := r.compareEventWithFileSize(e) + // 3. save events into file + writeTimer := time.Now() + log.Debugf("[relay] writing binlog event with header %+v", e.Header) + wResult, err = writer2.WriteEvent(e) if err != nil { relayLogWriteErrorCounter.Inc() return errors.Trace(err) - } - if cmp < 0 { - log.Warnf("[relay] skip obsolete event %+v (with relay file size %d)", e.Header, fSize) + } else if wResult.Ignore { + log.Infof("[relay] ignore event %+v by writer", e.Header) continue - } else if cmp > 0 { - relayLogWriteErrorCounter.Inc() - return errors.Errorf("some events missing, current event %+v, lastPos %v, current GTID %v, relay file size %d", e.Header, lastPos, lastGTID, fSize) } + relayLogWriteDurationHistogram.Observe(time.Since(writeTimer).Seconds()) + // 4. update meta and metrics + needSavePos := tResult.CanSaveGTID if !r.cfg.EnableGTID { // if go-mysql set RawModeEnabled to true // then it will only parse FormatDescriptionEvent and RotateEvent // then check `e.Event.(type)` for `QueryEvent` and `XIDEvent` will never be true // so we need to update pos for all events // and also save pos for all events - lastPos.Pos = e.Header.LogPos + if e.Header.EventType != replication.ROTATE_EVENT { + lastPos.Pos = e.Header.LogPos // for RotateEvent, lastPos updated to the next binlog file's position. + } needSavePos = true } - writeTimer := time.Now() - log.Debugf("[relay] writing binlog event with header %+v", e.Header) - if n, err2 := r.fd.Write(e.RawData); err2 != nil { - relayLogWriteErrorCounter.Inc() - return errors.Trace(err2) - } else if n != len(e.RawData) { - relayLogWriteErrorCounter.Inc() - // FIXME: should we panic here? it seems unreachable - return errors.Trace(io.ErrShortWrite) - } - - relayLogWriteDurationHistogram.Observe(time.Since(writeTimer).Seconds()) relayLogWriteSizeHistogram.Observe(float64(e.Header.EventSize)) relayLogPosGauge.WithLabelValues("relay").Set(float64(lastPos.Pos)) if index, err2 := binlog.GetFilenameIndex(lastPos.Name); err2 != nil { @@ -422,112 +382,6 @@ func (r *Relay) process(parentCtx context.Context) error { } } -// addFlagToEvent adds flag to binlog event -func (r *Relay) addFlagToEvent(e *replication.BinlogEvent, f uint16, eventFormat *replication.FormatDescriptionEvent) { - newF := e.Header.Flags | f - // header structure: - // 4 byte timestamp - // 1 byte event - // 4 byte server-id - // 4 byte event size - // 4 byte log pos - // 2 byte flags - startIdx := 4 + 1 + 4 + 4 + 4 - binary.LittleEndian.PutUint16(e.RawData[startIdx:startIdx+2], newF) - e.Header.Flags = newF - - // re-calculate checksum if needed - if eventFormat == nil || eventFormat.ChecksumAlgorithm != replication.BINLOG_CHECKSUM_ALG_CRC32 { - return - } - calculatedPart := e.RawData[0 : len(e.RawData)-replication.BinlogChecksumLength] - checksum := crc32.ChecksumIEEE(calculatedPart) - binary.LittleEndian.PutUint32(e.RawData[len(e.RawData)-replication.BinlogChecksumLength:], checksum) -} - -// detectGap detects whether gap exists in relay log file -func (r *Relay) detectGap(e *replication.BinlogEvent) (bool, uint32, error) { - if r.fd == nil { - return false, 0, nil - } - fi, err := r.fd.Stat() - if err != nil { - return false, 0, errors.Trace(err) - } - - size := uint32(fi.Size()) - if e.Header.LogPos-e.Header.EventSize > size { - return true, size, nil - } - return false, size, nil -} - -// compareEventWithFileSize compares event's start pos with relay log file size -// returns result: -// -1: less than file size -// 0: equal file size -// 1: greater than file size -func (r *Relay) compareEventWithFileSize(e *replication.BinlogEvent) (result int, fSize uint32, err error) { - if r.fd != nil { - fi, err := r.fd.Stat() - if err != nil { - return 0, 0, errors.Annotatef(err, "compare relay log file size with %+v", e.Header) - } - fSize = uint32(fi.Size()) - startPos := e.Header.LogPos - e.Header.EventSize - if startPos < fSize { - return -1, fSize, nil - } else if startPos > fSize { - return 1, fSize, nil - } - } - return 0, fSize, nil -} - -// handleFormatDescriptionEvent tries to create new binlog file and write binlog header -func (r *Relay) handleFormatDescriptionEvent(filename string) (exist bool, err error) { - if r.fd != nil { - // close the previous binlog log - r.fd.Close() - r.fd = nil - } - - if len(filename) == 0 { - binlogReadErrorCounter.Inc() - return false, errors.NotValidf("write FormatDescriptionEvent with empty binlog filename") - } - - fullPath := path.Join(r.meta.Dir(), filename) - fd, err := os.OpenFile(fullPath, os.O_CREATE|os.O_RDWR, 0644) - if err != nil { - return false, errors.Annotatef(err, "file full path %s", fullPath) - } - r.fd = fd - - // record current active relay log file, and keep it until newer file opened - // when current file's fd closed, we should not reset this, because it may re-open again - r.setActiveRelayLog(filename) - - err = r.writeBinlogHeaderIfNotExists() - if err != nil { - return false, errors.Annotatef(err, "file full path %s", fullPath) - } - - exist, err = r.checkFormatDescriptionEventExists(filename) - if err != nil { - relayLogDataCorruptionCounter.Inc() - return false, errors.Annotatef(err, "file full path %s", fullPath) - } - - ret, err := r.fd.Seek(0, io.SeekEnd) - if err != nil { - return false, errors.Annotatef(err, "file full path %s", fullPath) - } - log.Infof("[relay] %s seek to end (%d)", filename, ret) - - return exist, nil -} - // isNewServer checks whether switched to new server func (r *Relay) isNewServer() (bool, error) { if len(r.meta.UUID()) == 0 { @@ -635,43 +489,6 @@ func (r *Relay) doIntervalOps(ctx context.Context) { } } -func (r *Relay) writeBinlogHeaderIfNotExists() error { - b := make([]byte, binlogHeaderSize) - _, err := r.fd.Read(b) - log.Debugf("[relay] the first 4 bytes are %v", b) - if err == io.EOF || !bytes.Equal(b, replication.BinLogFileHeader) { - _, err = r.fd.Seek(0, io.SeekStart) - if err != nil { - return errors.Trace(err) - } - log.Info("[relay] write binlog header") - // write binlog header fe'bin' - if _, err = r.fd.Write(replication.BinLogFileHeader); err != nil { - return errors.Trace(err) - } - // Note: it's trival to monitor the writing duration and size here. so ignore it. - } else if err != nil { - relayLogDataCorruptionCounter.Inc() - return errors.Trace(err) - } - return nil -} - -func (r *Relay) checkFormatDescriptionEventExists(filename string) (exists bool, err error) { - eof, err2 := replication.NewBinlogParser().ParseSingleEvent(r.fd, func(e *replication.BinlogEvent) error { - return nil - }) - if err2 != nil { - return false, errors.Trace(err2) - } - // FormatDescriptionEvent is the first event and only one FormatDescriptionEvent in a file. - if !eof { - log.Infof("[relay] binlog file %s already has Format_desc event, so ignore it", filename) - return true, nil - } - return false, nil -} - // setUpReader setups the underlying reader used to read binlog events from the upstream master server. func (r *Relay) setUpReader() (reader.Reader, error) { uuid, pos := r.meta.Pos() @@ -696,6 +513,23 @@ func (r *Relay) setUpReader() (reader.Reader, error) { return reader2, nil } +// setUpWriter setups the underlying writer used to writer binlog events into file or other places. +func (r *Relay) setUpWriter(parser2 *parser.Parser) (writer.Writer, error) { + uuid, pos := r.meta.Pos() + cfg := &writer.FileConfig{ + RelayDir: r.meta.Dir(), + Filename: pos.Name, + } + writer2 := writer.NewFileWriter(cfg, parser2) + err := writer2.Start() + if err != nil { + return nil, errors.Annotatef(err, "start writer for UUID %s with config %+v", uuid, cfg) + } + + log.Infof("[relay] started underlying writer for UUID %s with config %+v", uuid, cfg) + return writer2, nil +} + func (r *Relay) masterNode() string { return fmt.Sprintf("%s:%d", r.cfg.From.Host, r.cfg.From.Port) } diff --git a/relay/transformer/transformer.go b/relay/transformer/transformer.go index 2fbea159b8..54ea64f07b 100644 --- a/relay/transformer/transformer.go +++ b/relay/transformer/transformer.go @@ -43,7 +43,7 @@ type Result struct { // NOTE: more features maybe moved from outer into Transformer later. type Transformer interface { // Transform transforms a binlog event. - Transform(e *replication.BinlogEvent) *Result + Transform(e *replication.BinlogEvent) Result } // transformer implements Transformer interface. @@ -59,8 +59,8 @@ func NewTransformer(parser2 *parser.Parser) Transformer { } // Transform implements Transformer.Transform. -func (t *transformer) Transform(e *replication.BinlogEvent) *Result { - result := &Result{ +func (t *transformer) Transform(e *replication.BinlogEvent) Result { + result := Result{ LogPos: e.Header.LogPos, } @@ -68,9 +68,7 @@ func (t *transformer) Transform(e *replication.BinlogEvent) *Result { case *replication.RotateEvent: result.LogPos = uint32(ev.Position) // next event's position result.NextLogName = string(ev.NextLogName) // for RotateEvent, update binlog name - if e.Header.Timestamp == 0 || e.Header.LogPos == 0 { - result.Ignore = true // ignore fake rotate event - } + // NOTE: we need to get the first binlog filename from fake RotateEvent when using auto position case *replication.QueryEvent: // when RawModeEnabled not true, QueryEvent will be parsed. // even for `BEGIN`, we still update pos/GTID, but only save GTID for DDL. diff --git a/relay/transformer/transformer_test.go b/relay/transformer/transformer_test.go index 70e165898a..c72b2fbea9 100644 --- a/relay/transformer/transformer_test.go +++ b/relay/transformer/transformer_test.go @@ -39,7 +39,7 @@ type testTransformerSuite struct { type Case struct { event *replication.BinlogEvent - result *Result + result Result } func (t *testTransformerSuite) TestTransform(c *check.C) { @@ -64,7 +64,7 @@ func (t *testTransformerSuite) TestTransform(c *check.C) { c.Assert(err, check.IsNil) cases = append(cases, Case{ event: ev, - result: &Result{ + result: Result{ LogPos: uint32(position), NextLogName: nextLogName, }, @@ -76,8 +76,7 @@ func (t *testTransformerSuite) TestTransform(c *check.C) { c.Assert(err, check.IsNil) cases = append(cases, Case{ event: ev, - result: &Result{ - Ignore: true, // ignore fake RotateEvent + result: Result{ LogPos: uint32(position), NextLogName: nextLogName, }, @@ -92,8 +91,7 @@ func (t *testTransformerSuite) TestTransform(c *check.C) { ev.Header.LogPos = 0 // set to zero cases = append(cases, Case{ event: ev, - result: &Result{ - Ignore: true, // ignore fake RotateEvent + result: Result{ LogPos: uint32(position), NextLogName: nextLogName, }, @@ -106,7 +104,7 @@ func (t *testTransformerSuite) TestTransform(c *check.C) { ev.Event.(*replication.QueryEvent).GSet = gtidSet.Origin() // set GTIDs manually cases = append(cases, Case{ event: ev, - result: &Result{ + result: Result{ LogPos: ev.Header.LogPos, GTIDSet: gtidSet.Origin(), CanSaveGTID: true, @@ -119,7 +117,7 @@ func (t *testTransformerSuite) TestTransform(c *check.C) { c.Assert(err, check.IsNil) cases = append(cases, Case{ event: ev, - result: &Result{ + result: Result{ LogPos: ev.Header.LogPos, }, }) @@ -131,7 +129,7 @@ func (t *testTransformerSuite) TestTransform(c *check.C) { ev.Event.(*replication.XIDEvent).GSet = gtidSet.Origin() // set GTIDs manually cases = append(cases, Case{ event: ev, - result: &Result{ + result: Result{ LogPos: ev.Header.LogPos, GTIDSet: gtidSet.Origin(), CanSaveGTID: true, @@ -142,7 +140,7 @@ func (t *testTransformerSuite) TestTransform(c *check.C) { ev = &replication.BinlogEvent{Header: header, Event: &replication.GenericEvent{}} cases = append(cases, Case{ event: ev, - result: &Result{ + result: Result{ LogPos: ev.Header.LogPos, }, }) @@ -154,7 +152,7 @@ func (t *testTransformerSuite) TestTransform(c *check.C) { ev.Header.EventType = replication.HEARTBEAT_EVENT cases = append(cases, Case{ event: ev, - result: &Result{ + result: Result{ Ignore: true, LogPos: ev.Header.LogPos, }, @@ -165,7 +163,7 @@ func (t *testTransformerSuite) TestTransform(c *check.C) { c.Assert(err, check.IsNil) cases = append(cases, Case{ event: ev, - result: &Result{ + result: Result{ LogPos: ev.Header.LogPos, }, }) @@ -176,7 +174,7 @@ func (t *testTransformerSuite) TestTransform(c *check.C) { ev.Header.Flags |= artificialFlag cases = append(cases, Case{ event: ev, - result: &Result{ + result: Result{ Ignore: true, LogPos: ev.Header.LogPos, }, From 12d4cac6f9b45f028d258be3712f3468480b3cc6 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Thu, 13 Jun 2019 15:00:14 +0800 Subject: [PATCH 04/24] relay: fix meta update --- relay/relay.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/relay/relay.go b/relay/relay.go index f31871d8cb..f11f3a635e 100644 --- a/relay/relay.go +++ b/relay/relay.go @@ -353,6 +353,11 @@ func (r *Relay) process(parentCtx context.Context) error { // 4. update meta and metrics needSavePos := tResult.CanSaveGTID + lastPos.Pos = tResult.LogPos + err = lastGTID.Set(tResult.GTIDSet) + if err != nil { + log.Errorf("[relay] update last GTID set to %v error %v", tResult.GTIDSet, err) + } if !r.cfg.EnableGTID { // if go-mysql set RawModeEnabled to true // then it will only parse FormatDescriptionEvent and RotateEvent @@ -360,7 +365,7 @@ func (r *Relay) process(parentCtx context.Context) error { // so we need to update pos for all events // and also save pos for all events if e.Header.EventType != replication.ROTATE_EVENT { - lastPos.Pos = e.Header.LogPos // for RotateEvent, lastPos updated to the next binlog file's position. + lastPos.Pos = e.Header.LogPos // makfor RotateEvent, lastPos updated to the next binlog file's position. } needSavePos = true } From fece6929a40fe635cb4d73ecb140ae792fd6c756 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Mon, 17 Jun 2019 10:39:09 +0800 Subject: [PATCH 05/24] relay: remove some useless code --- relay/util.go | 69 --------------------------------------------------- 1 file changed, 69 deletions(-) delete mode 100644 relay/util.go diff --git a/relay/util.go b/relay/util.go deleted file mode 100644 index d0bc465b53..0000000000 --- a/relay/util.go +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright 2019 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// See the License for the specific language governing permissions and -// limitations under the License. - -package relay - -import ( - "strings" - - "github.com/pingcap/errors" - "github.com/siddontang/go-mysql/replication" - - "github.com/pingcap/dm/pkg/utils" -) - -// searchLastCompleteEventPos searches the last complete event (end) pos with an incomplete event at the end of the file. -// NOTE: test this when we can generate various types of binlog events -func searchLastCompleteEventPos(file string) (uint32, error) { - var lastPos uint32 - onEventFunc := func(e *replication.BinlogEvent) error { - lastPos = e.Header.LogPos - return nil - } - - parser2 := replication.NewBinlogParser() - parser2.SetVerifyChecksum(true) - parser2.SetUseDecimal(true) - - err := parser2.ParseFile(file, 4, onEventFunc) - if strings.Contains(err.Error(), "err EOF") { - if lastPos > 0 { - return lastPos, nil - } - return 0, errors.Errorf("search for %s end, but not got a valid last pos", file) - } - return 0, errors.Annotatef(ErrNoIncompleteEventFound, "file %s, last pos %d", file, lastPos) -} - -// tryFindGapStartPos tries to find a correct pos can be used to fill the gap. -// support cases: -// ERROR 1236 (HY000): binlog truncated in the middle of event; consider out of disk space on master; the first event 'mysql-bin.000003' at 1000, the last event read from '/var/lib/mysql/mysql-bin.000003' at 123, the last byte read from '/var/lib/mysql/mysql-bin.000003' at 1019. -// ERROR 1236 (HY000): bogus data in log event; the first event 'mysql-bin.000003' at 1000, the last event read from '/var/lib/mysql/mysql-bin.000003' at 123, the last byte read from '/var/lib/mysql/mysql-bin.000003' at 1019. -// if more cases found, add them later. -func tryFindGapStartPos(err error, file string) (uint32, error) { - if !utils.IsErrBinlogPurged(err) { - return 0, errors.NotSupportedf("error code is not 1236") - } - originErr := errors.Cause(err) - errMsg := originErr.Error() - if strings.Contains(errMsg, "binlog truncated in the middle of event") || - strings.Contains(errMsg, "bogus data in log event") { - // try to handle the case where relay log file trimmed (or not wrote successfully) with an incomplete binlog event at the end of the file - pos, err2 := searchLastCompleteEventPos(file) // try search a valid end pos for the file - if err2 != nil { - return 0, errors.Trace(err) - } - return pos, nil - } - return 0, errors.NotFoundf("correct start pos to fill the gap") -} From 481b62441f4b1aec30da2d05334efe5a99f6d985 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Mon, 17 Jun 2019 14:22:43 +0800 Subject: [PATCH 06/24] relay: refine code --- relay/relay.go | 51 ++++++++++++++++++-------------------------------- 1 file changed, 18 insertions(+), 33 deletions(-) diff --git a/relay/relay.go b/relay/relay.go index f11f3a635e..1089b07cb3 100644 --- a/relay/relay.go +++ b/relay/relay.go @@ -33,7 +33,6 @@ import ( "github.com/pingcap/dm/dm/unit" "github.com/pingcap/dm/pkg/binlog" fr "github.com/pingcap/dm/pkg/func-rollback" - "github.com/pingcap/dm/pkg/gtid" "github.com/pingcap/dm/pkg/log" pkgstreamer "github.com/pingcap/dm/pkg/streamer" "github.com/pingcap/dm/pkg/utils" @@ -102,11 +101,8 @@ type Relay struct { cfg *Config syncerCfg replication.BinlogSyncerConfig - meta Meta - lastSlaveConnectionID uint32 - fd *os.File - closed sync2.AtomicBool - gSetWhenSwitch gtid.Set // GTID set when master-slave switching or the first startup + meta Meta + closed sync2.AtomicBool sync.RWMutex activeRelayLog struct { @@ -273,28 +269,28 @@ func (r *Relay) process(parentCtx context.Context) error { transformer2 := transformer.NewTransformer(parser2) + go r.doIntervalOps(parentCtx) + + return errors.Trace(r.handleEvents(parentCtx, reader2, transformer2, writer2)) +} + +// handleEvents handles binlog events, including: +// 1. read events from upstream +// 2. transform events +// 3. write events into relay log files +// 4. update metadata if needed +func (r *Relay) handleEvents(ctx context.Context, reader2 reader.Reader, transformer2 transformer.Transformer, writer2 writer.Writer) error { var ( _, lastPos = r.meta.Pos() _, lastGTID = r.meta.GTID() - tryReSync = true // used to handle master-slave switch - rResult reader.Result - wResult *writer.Result ) - defer func() { - if r.fd != nil { - r.fd.Close() - } - }() - - go r.doIntervalOps(parentCtx) - for { // 1. reader events from upstream server - ctx, cancel := context.WithTimeout(parentCtx, eventTimeout) + ctx2, cancel2 := context.WithTimeout(ctx, eventTimeout) readTimer := time.Now() - rResult, err = reader2.GetEvent(ctx) - cancel() + rResult, err := reader2.GetEvent(ctx2) + cancel2() binlogReadDurationHistogram.Observe(time.Since(readTimer).Seconds()) if err != nil { @@ -312,15 +308,12 @@ func (r *Relay) process(parentCtx context.Context) error { // do nothing, but the error will be returned default: if utils.IsErrBinlogPurged(err) { - if tryReSync && r.cfg.EnableGTID && r.cfg.AutoFixGTID { - // TODO: try auto fix GTID - } + // TODO: try auto fix GTID, and can support auto switching between upstream server later. } binlogReadErrorCounter.Inc() } return errors.Trace(err) } - tryReSync = true e := rResult.Event log.Debugf("[relay] receive binlog event with header %+v", e.Header) @@ -341,7 +334,7 @@ func (r *Relay) process(parentCtx context.Context) error { // 3. save events into file writeTimer := time.Now() log.Debugf("[relay] writing binlog event with header %+v", e.Header) - wResult, err = writer2.WriteEvent(e) + wResult, err := writer2.WriteEvent(e) if err != nil { relayLogWriteErrorCounter.Inc() return errors.Trace(err) @@ -428,10 +421,6 @@ func (r *Relay) reSetupMeta() error { } } - // record GTID set when switching or the first startup - _, r.gSetWhenSwitch = r.meta.GTID() - log.Infof("[relay] record previous sub directory end GTID or first startup GTID %s", r.gSetWhenSwitch) - r.updateMetricsRelaySubDirIndex() return nil @@ -546,10 +535,6 @@ func (r *Relay) IsClosed() bool { // stopSync stops syncing, now it used by Close and Pause func (r *Relay) stopSync() { - if r.fd != nil { - r.fd.Close() - r.fd = nil - } if err := r.meta.Flush(); err != nil { log.Errorf("[relay] flush checkpoint error %v", errors.ErrorStack(err)) } From c5460f9b1f9cb2c4e09b716ad9108170010e4ae4 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Mon, 17 Jun 2019 17:44:04 +0800 Subject: [PATCH 07/24] relay: test for handling events --- relay/meta_test.go | 11 +- relay/relay.go | 2 +- relay/relay_test.go | 223 ++++++++++++++++++++++++++ relay/transformer/transformer.go | 6 +- relay/transformer/transformer_test.go | 2 +- 5 files changed, 229 insertions(+), 15 deletions(-) create mode 100644 relay/relay_test.go diff --git a/relay/meta_test.go b/relay/meta_test.go index 558901e063..196fdd690a 100644 --- a/relay/meta_test.go +++ b/relay/meta_test.go @@ -17,7 +17,6 @@ import ( "io/ioutil" "os" "strings" - "testing" . "github.com/pingcap/check" "github.com/siddontang/go-mysql/mysql" @@ -25,13 +24,9 @@ import ( "github.com/pingcap/dm/pkg/gtid" ) -var _ = Suite(&testRelaySuite{}) +var _ = Suite(&testMetaSuite{}) -func TestSuite(t *testing.T) { - TestingT(t) -} - -type testRelaySuite struct { +type testMetaSuite struct { } type MetaTestCase struct { @@ -41,7 +36,7 @@ type MetaTestCase struct { gset gtid.Set } -func (r *testRelaySuite) TestLocalMeta(c *C) { +func (r *testMetaSuite) TestLocalMeta(c *C) { dir, err := ioutil.TempDir("", "test_local_meta") c.Assert(err, IsNil) defer os.RemoveAll(dir) diff --git a/relay/relay.go b/relay/relay.go index 1089b07cb3..0e4bbc8bb5 100644 --- a/relay/relay.go +++ b/relay/relay.go @@ -358,7 +358,7 @@ func (r *Relay) handleEvents(ctx context.Context, reader2 reader.Reader, transfo // so we need to update pos for all events // and also save pos for all events if e.Header.EventType != replication.ROTATE_EVENT { - lastPos.Pos = e.Header.LogPos // makfor RotateEvent, lastPos updated to the next binlog file's position. + lastPos.Pos = e.Header.LogPos // for RotateEvent, lastPos updated to the next binlog file's position. } needSavePos = true } diff --git a/relay/relay_test.go b/relay/relay_test.go new file mode 100644 index 0000000000..339111a379 --- /dev/null +++ b/relay/relay_test.go @@ -0,0 +1,223 @@ +// Copyright 2019 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package relay + +import ( + "context" + "testing" + "time" + + . "github.com/pingcap/check" + "github.com/pingcap/errors" + "github.com/pingcap/parser" + gmysql "github.com/siddontang/go-mysql/mysql" + "github.com/siddontang/go-mysql/replication" + + "github.com/pingcap/dm/pkg/binlog/event" + "github.com/pingcap/dm/relay/reader" + "github.com/pingcap/dm/relay/transformer" + "github.com/pingcap/dm/relay/writer" +) + +var _ = Suite(&testRelaySuite{}) + +func TestSuite(t *testing.T) { + TestingT(t) +} + +type testRelaySuite struct { +} + +// mockReader is used only for relay testing. +type mockReader struct { + result reader.Result + err error +} + +func (r *mockReader) Start() error { + return nil +} + +func (r *mockReader) Close() error { + return nil +} + +func (r *mockReader) GetEvent(ctx context.Context) (reader.Result, error) { + select { + case <-ctx.Done(): + return reader.Result{ErrIgnorable: true}, ctx.Err() + default: + } + return r.result, r.err +} + +// mockWriter is used only for relay testing. +type mockWriter struct { + result writer.Result + err error + latestEvent *replication.BinlogEvent +} + +func (w *mockWriter) Start() error { + return nil +} + +func (w *mockWriter) Close() error { + return nil +} + +func (w *mockWriter) Recover() (*writer.RecoverResult, error) { + return nil, nil +} + +func (w *mockWriter) WriteEvent(ev *replication.BinlogEvent) (*writer.Result, error) { + w.latestEvent = ev // hold it + return &w.result, w.err +} + +func (w *mockWriter) Flush() error { + return nil +} + +func (t *testRelaySuite) TestHandleEvent(c *C) { + // NOTE: we can test metrics later. + var ( + reader2 = &mockReader{} + transformer2 = transformer.NewTransformer(parser.New()) + writer2 = &mockWriter{} + relayCfg = &Config{ + RelayDir: c.MkDir(), + Flavor: gmysql.MariaDBFlavor, + } + r = NewRelay(relayCfg).(*Relay) + + eventHeader = &replication.EventHeader{ + Timestamp: uint32(time.Now().Unix()), + ServerID: 11, + } + binlogPos = gmysql.Position{"mysql-bin.666888", 4} + rotateEv, _ = event.GenRotateEvent(eventHeader, 123, []byte(binlogPos.Name), uint64(binlogPos.Pos)) + queryEv, _ = event.GenQueryEvent(eventHeader, 123, 0, 0, 0, nil, nil, []byte("BEGIN")) + ) + // NOTE: we can mock meta later. + c.Assert(r.meta.Load(), IsNil) + c.Assert(r.meta.AddDir("24ecd093-8cec-11e9-aa0d-0242ac170002", nil, nil), IsNil) + + // attach GTID sets to QueryEv + queryEv2 := queryEv.Event.(*replication.QueryEvent) + queryEv2.GSet, _ = gmysql.ParseGTIDSet(relayCfg.Flavor, "1-2-3") + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond) + defer cancel() + + // reader return with ignorable error + reader2.result.ErrIgnorable = true + reader2.err = errors.New("reader error for testing") + // return with `nil` + err := r.handleEvents(ctx, reader2, transformer2, writer2) + c.Assert(err, IsNil) + + // reader return with non-ignorable error + reader2.result.ErrIgnorable = false + // return with the annotated reader error + for _, reader2.err = range []error{ + errors.New("reader error for testing"), + replication.ErrChecksumMismatch, + replication.ErrSyncClosed, + replication.ErrNeedSyncAgain, + } { + err = r.handleEvents(ctx, reader2, transformer2, writer2) + c.Assert(errors.Cause(err), Equals, reader2.err) + } + + // reader return valid event + reader2.err = nil + reader2.result.Event = rotateEv + + // writer return error + writer2.err = errors.New("writer error for testing") + // return with the annotated writer error + err = r.handleEvents(ctx, reader2, transformer2, writer2) + c.Assert(errors.Cause(err), Equals, writer2.err) + + // writer without error + writer2.err = nil + err = r.handleEvents(ctx, reader2, transformer2, writer2) // returned when ctx timeout + c.Assert(err, IsNil) + // check written event + c.Assert(writer2.latestEvent, Equals, reader2.result.Event) + // check meta + _, pos := r.meta.Pos() + _, gs := r.meta.GTID() + c.Assert(pos, DeepEquals, binlogPos) + c.Assert(gs.String(), Equals, "") // no GTID sets in event yet + + ctx2, cancel2 := context.WithTimeout(context.Background(), 10*time.Millisecond) + defer cancel2() + + // write a QueryEvent with GTID sets + reader2.result.Event = queryEv + err = r.handleEvents(ctx2, reader2, transformer2, writer2) + c.Assert(err, IsNil) + // check written event + c.Assert(writer2.latestEvent, Equals, reader2.result.Event) + // check meta + _, pos = r.meta.Pos() + _, gs = r.meta.GTID() + c.Assert(pos.Name, Equals, binlogPos.Name) + c.Assert(pos.Pos, Equals, queryEv.Header.LogPos) + c.Assert(gs.Origin(), DeepEquals, queryEv2.GSet) // got GTID sets + + // reader return retryable error + reader2.result.ErrRetryable = true + reader2.err = errors.New("reader error for testing") + ctx3, cancel3 := context.WithTimeout(context.Background(), 10*time.Millisecond) + defer cancel3() + err = r.handleEvents(ctx3, reader2, transformer2, writer2) + c.Assert(err, IsNil) + select { + case <-ctx3.Done(): + default: + c.Fatalf("retryable error for reader not retried") + } + + // transformer return ignorable for the event + reader2.result.ErrIgnorable = false + reader2.err = nil + reader2.result.Event = &replication.BinlogEvent{ + Header: &replication.EventHeader{EventType: replication.HEARTBEAT_EVENT}, + Event: &replication.GenericEvent{}} + ctx4, cancel4 := context.WithTimeout(context.Background(), 10*time.Millisecond) + defer cancel4() + err = r.handleEvents(ctx4, reader2, transformer2, writer2) + c.Assert(err, IsNil) + select { + case <-ctx4.Done(): + default: + c.Fatalf("ignorable event for transformer not ignored") + } + + // writer return ignorable for the event + reader2.result.Event = queryEv + writer2.result.Ignore = true + ctx5, cancel5 := context.WithTimeout(context.Background(), 10*time.Millisecond) + defer cancel5() + err = r.handleEvents(ctx5, reader2, transformer2, writer2) + c.Assert(err, IsNil) + select { + case <-ctx5.Done(): + default: + c.Fatalf("ignorable event for writer not ignored") + } +} diff --git a/relay/transformer/transformer.go b/relay/transformer/transformer.go index 54ea64f07b..2f05bb4b17 100644 --- a/relay/transformer/transformer.go +++ b/relay/transformer/transformer.go @@ -21,10 +21,6 @@ import ( "github.com/pingcap/dm/relay/common" ) -var ( - artificialFlag = uint16(0x0020) // LOG_EVENT_ARTIFICIAL_F -) - // Result represents a transform result. type Result struct { Ignore bool // whether the event should be ignored @@ -90,7 +86,7 @@ func (t *transformer) Transform(e *replication.BinlogEvent) Result { result.Ignore = true } default: - if e.Header.Flags&artificialFlag != 0 { + if e.Header.Flags&replication.LOG_EVENT_ARTIFICIAL_F != 0 { // ignore events with LOG_EVENT_ARTIFICIAL_F flag(0x0020) set // ref: https://dev.mysql.com/doc/internals/en/binlog-event-flag.html result.Ignore = true diff --git a/relay/transformer/transformer_test.go b/relay/transformer/transformer_test.go index c72b2fbea9..a82dfb6fa3 100644 --- a/relay/transformer/transformer_test.go +++ b/relay/transformer/transformer_test.go @@ -171,7 +171,7 @@ func (t *testTransformerSuite) TestTransform(c *check.C) { // other event type with LOG_EVENT_ARTIFICIAL_F ev, err = event.GenCommonGTIDEvent(mysql.MySQLFlavor, header.ServerID, latestPos, gtidSet) c.Assert(err, check.IsNil) - ev.Header.Flags |= artificialFlag + ev.Header.Flags |= replication.LOG_EVENT_ARTIFICIAL_F cases = append(cases, Case{ event: ev, result: Result{ From dd604c00ebf1727ccdd5b975871577e19cbf49d4 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Tue, 18 Jun 2019 15:46:44 +0800 Subject: [PATCH 08/24] relay: move some test only code into _test.go file --- dm/worker/relay_test.go | 105 ++++++++++++++++++++++++++++++++++++++-- relay/relay.go | 93 ----------------------------------- 2 files changed, 100 insertions(+), 98 deletions(-) diff --git a/dm/worker/relay_test.go b/dm/worker/relay_test.go index 25076795c0..fbfdc323e9 100644 --- a/dm/worker/relay_test.go +++ b/dm/worker/relay_test.go @@ -17,20 +17,115 @@ import ( "context" . "github.com/pingcap/check" + "github.com/pingcap/errors" + "github.com/pingcap/dm/dm/config" "github.com/pingcap/dm/dm/pb" + pkgstreamer "github.com/pingcap/dm/pkg/streamer" "github.com/pingcap/dm/relay" "github.com/pingcap/dm/relay/purger" - "github.com/pingcap/errors" ) type testRelay struct{} var _ = Suite(&testRelay{}) +/*********** dummy relay log process unit, used only for testing *************/ + +// DummyRelay is a dummy relay +type DummyRelay struct { + initErr error + + processResult pb.ProcessResult + errorInfo *pb.RelayError + reloadErr error +} + +// NewDummyRelay creates an instance of dummy Relay. +func NewDummyRelay(cfg *relay.Config) relay.Process { + return &DummyRelay{} +} + +// Init implements Process interface +func (d *DummyRelay) Init() error { + return d.initErr +} + +// InjectInitError injects init error +func (d *DummyRelay) InjectInitError(err error) { + d.initErr = err +} + +// Process implements Process interface +func (d *DummyRelay) Process(ctx context.Context, pr chan pb.ProcessResult) { + select { + case <-ctx.Done(): + pr <- d.processResult + } +} + +// InjectProcessResult injects process result +func (d *DummyRelay) InjectProcessResult(result pb.ProcessResult) { + d.processResult = result +} + +// SwitchMaster implements Process interface +func (d *DummyRelay) SwitchMaster(ctx context.Context, req *pb.SwitchRelayMasterRequest) error { + return nil +} + +// Migrate implements Process interface +func (d *DummyRelay) Migrate(ctx context.Context, binlogName string, binlogPos uint32) error { + return nil +} + +// ActiveRelayLog implements Process interface +func (d *DummyRelay) ActiveRelayLog() *pkgstreamer.RelayLogInfo { + return nil +} + +// Reload implements Process interface +func (d *DummyRelay) Reload(newCfg *relay.Config) error { + return d.reloadErr +} + +// InjectReloadError injects reload error +func (d *DummyRelay) InjectReloadError(err error) { + d.reloadErr = err +} + +// Update implements Process interface +func (d *DummyRelay) Update(cfg *config.SubTaskConfig) error { + return nil +} + +// Resume implements Process interface +func (d *DummyRelay) Resume(ctx context.Context, pr chan pb.ProcessResult) {} + +// Pause implements Process interface +func (d *DummyRelay) Pause() {} + +// Error implements Process interface +func (d *DummyRelay) Error() interface{} { + return d.errorInfo +} + +// Status implements Process interface +func (d *DummyRelay) Status() interface{} { + return &pb.RelayStatus{ + Stage: pb.Stage_New, + } +} + +// Close implements Process interface +func (d *DummyRelay) Close() {} + +// IsClosed implements Process interface +func (d *DummyRelay) IsClosed() bool { return false } + func (t *testRelay) TestRelay(c *C) { originNewRelay := relay.NewRelay - relay.NewRelay = relay.NewDummyRelay + relay.NewRelay = NewDummyRelay originNewPurger := purger.NewPurger purger.NewPurger = purger.NewDummyPurger defer func() { @@ -62,7 +157,7 @@ func (t *testRelay) testInit(c *C, holder *realRelayHolder) { _, err := holder.Init(nil) c.Assert(err, IsNil) - r, ok := holder.relay.(*relay.DummyRelay) + r, ok := holder.relay.(*DummyRelay) c.Assert(ok, IsTrue) initErr := errors.New("init error") @@ -106,7 +201,7 @@ func (t *testRelay) testStart(c *C, holder *realRelayHolder) { } func (t *testRelay) testClose(c *C, holder *realRelayHolder) { - r, ok := holder.relay.(*relay.DummyRelay) + r, ok := holder.relay.(*DummyRelay) c.Assert(ok, IsTrue) processResult := &pb.ProcessResult{ IsCanceled: true, @@ -191,7 +286,7 @@ func (t *testRelay) testUpdate(c *C, holder *realRelayHolder) { c.Assert(waitRelayStage(holder, originStage, 10), IsTrue) c.Assert(holder.closed.Get(), Equals, closedFalse) - r, ok := holder.relay.(*relay.DummyRelay) + r, ok := holder.relay.(*DummyRelay) c.Assert(ok, IsTrue) err := errors.New("reload error") diff --git a/relay/relay.go b/relay/relay.go index 0e4bbc8bb5..16e49a4a42 100644 --- a/relay/relay.go +++ b/relay/relay.go @@ -717,96 +717,3 @@ func (r *Relay) Migrate(ctx context.Context, binlogName string, binlogPos uint32 } return nil } - -/*********** dummy relay log process unit *************/ - -// DummyRelay is a dummy relay -type DummyRelay struct { - initErr error - - processResult pb.ProcessResult - errorInfo *pb.RelayError - reloadErr error -} - -// NewDummyRelay creates an instance of dummy Relay. -func NewDummyRelay(cfg *Config) Process { - return &DummyRelay{} -} - -// Init implements Process interface -func (d *DummyRelay) Init() error { - return d.initErr -} - -// InjectInitError injects init error -func (d *DummyRelay) InjectInitError(err error) { - d.initErr = err -} - -// Process implements Process interface -func (d *DummyRelay) Process(ctx context.Context, pr chan pb.ProcessResult) { - select { - case <-ctx.Done(): - pr <- d.processResult - } -} - -// InjectProcessResult injects process result -func (d *DummyRelay) InjectProcessResult(result pb.ProcessResult) { - d.processResult = result -} - -// SwitchMaster implements Process interface -func (d *DummyRelay) SwitchMaster(ctx context.Context, req *pb.SwitchRelayMasterRequest) error { - return nil -} - -// Migrate implements Process interface -func (d *DummyRelay) Migrate(ctx context.Context, binlogName string, binlogPos uint32) error { - return nil -} - -// ActiveRelayLog implements Process interface -func (d *DummyRelay) ActiveRelayLog() *pkgstreamer.RelayLogInfo { - return nil -} - -// Reload implements Process interface -func (d *DummyRelay) Reload(newCfg *Config) error { - return d.reloadErr -} - -// InjectReloadError injects reload error -func (d *DummyRelay) InjectReloadError(err error) { - d.reloadErr = err -} - -// Update implements Process interface -func (d *DummyRelay) Update(cfg *config.SubTaskConfig) error { - return nil -} - -// Resume implements Process interface -func (d *DummyRelay) Resume(ctx context.Context, pr chan pb.ProcessResult) {} - -// Pause implements Process interface -func (d *DummyRelay) Pause() {} - -// Error implements Process interface -func (d *DummyRelay) Error() interface{} { - return d.errorInfo -} - -// Status implements Process interface -func (d *DummyRelay) Status() interface{} { - return &pb.RelayStatus{ - Stage: pb.Stage_New, - } -} - -// Close implements Process interface -func (d *DummyRelay) Close() {} - -// IsClosed implements Process interface -func (d *DummyRelay) IsClosed() bool { return false } From c314dd42f69e61f69dc34c3ca18aac5cf32c70ee Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Tue, 18 Jun 2019 16:18:53 +0800 Subject: [PATCH 09/24] relay: extract and test for `isNewServer` --- relay/relay.go | 19 +---------- relay/util.go | 40 ++++++++++++++++++++++++ relay/util_test.go | 78 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 119 insertions(+), 18 deletions(-) create mode 100644 relay/util.go create mode 100644 relay/util_test.go diff --git a/relay/relay.go b/relay/relay.go index 16e49a4a42..0a88fba30c 100644 --- a/relay/relay.go +++ b/relay/relay.go @@ -230,7 +230,7 @@ func (r *Relay) process(parentCtx context.Context) error { return errors.Trace(err) } - isNew, err := r.isNewServer() + isNew, err := isNewServer(r.meta.UUID(), r.db, r.cfg.Flavor) if err != nil { return errors.Trace(err) } @@ -380,23 +380,6 @@ func (r *Relay) handleEvents(ctx context.Context, reader2 reader.Reader, transfo } } -// isNewServer checks whether switched to new server -func (r *Relay) isNewServer() (bool, error) { - if len(r.meta.UUID()) == 0 { - // no sub dir exists before - return true, nil - } - uuid, err := utils.GetServerUUID(r.db, r.cfg.Flavor) - if err != nil { - return false, errors.Trace(err) - } - if strings.HasPrefix(r.meta.UUID(), uuid) { - // same server as before - return false, nil - } - return true, nil -} - func (r *Relay) reSetupMeta() error { uuid, err := utils.GetServerUUID(r.db, r.cfg.Flavor) if err != nil { diff --git a/relay/util.go b/relay/util.go new file mode 100644 index 0000000000..ade8a2ff87 --- /dev/null +++ b/relay/util.go @@ -0,0 +1,40 @@ +// Copyright 2019 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package relay + +import ( + "database/sql" + "strings" + + "github.com/pingcap/errors" + + "github.com/pingcap/dm/pkg/utils" +) + +// isNewServer checks whether is connecting to a new server. +func isNewServer(prevUUID string, db *sql.DB, flavor string) (bool, error) { + if len(prevUUID) == 0 { + // no sub dir exists before + return true, nil + } + uuid, err := utils.GetServerUUID(db, flavor) + if err != nil { + return false, errors.Trace(err) + } + if strings.HasPrefix(prevUUID, uuid) { + // same server as before + return false, nil + } + return true, nil +} diff --git a/relay/util_test.go b/relay/util_test.go new file mode 100644 index 0000000000..ca1d853471 --- /dev/null +++ b/relay/util_test.go @@ -0,0 +1,78 @@ +// Copyright 2019 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package relay + +import ( + "database/sql" + "fmt" + "os" + "strconv" + + . "github.com/pingcap/check" + gmysql "github.com/siddontang/go-mysql/mysql" + + "github.com/pingcap/dm/pkg/utils" +) + +var _ = Suite(&testUtilSuite{}) + +type testUtilSuite struct { + host string + port int + user string + password string + db *sql.DB +} + +func (t *testUtilSuite) SetUpSuite(c *C) { + t.host = os.Getenv("MYSQL_HOST") + if t.host == "" { + t.host = "127.0.0.1" + } + t.port, _ = strconv.Atoi(os.Getenv("MYSQL_PORT")) + if t.port == 0 { + t.port = 3306 + } + t.user = os.Getenv("MYSQL_USER") + if t.user == "" { + t.user = "root" + } + t.password = os.Getenv("MYSQL_PSWD") + + dsn := fmt.Sprintf("%s:%s@tcp(%s:%d)/?charset=utf8mb4", t.user, t.password, t.host, t.port) + db, err := sql.Open("mysql", dsn) + c.Assert(err, IsNil) + t.db = db +} + +func (t *testUtilSuite) TestIsNewServer(c *C) { + flavor := gmysql.MySQLFlavor + + // no prevUUID, is new server. + isNew, err := isNewServer("", t.db, flavor) + c.Assert(err, IsNil) + c.Assert(isNew, IsTrue) + + // different server + isNew, err = isNewServer("not-exists-uuid.000001", t.db, flavor) + c.Assert(err, IsNil) + c.Assert(isNew, IsTrue) + + // the same server + currUUID, err := utils.GetServerUUID(t.db, flavor) + c.Assert(err, IsNil) + isNew, err = isNewServer(fmt.Sprintf("%s.000001", currUUID), t.db, flavor) + c.Assert(err, IsNil) + c.Assert(isNew, IsFalse) +} From 92d99833dcc7c7741903cf94c551d2654a185e05 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Tue, 18 Jun 2019 19:25:42 +0800 Subject: [PATCH 10/24] relay: test `reSetupMeta` --- relay/relay.go | 1 + relay/relay_test.go | 73 +++++++++++++++++++++++++++++++++++++++++++++ relay/util_test.go | 26 ++-------------- 3 files changed, 76 insertions(+), 24 deletions(-) diff --git a/relay/relay.go b/relay/relay.go index 0a88fba30c..9d3d81eb0a 100644 --- a/relay/relay.go +++ b/relay/relay.go @@ -380,6 +380,7 @@ func (r *Relay) handleEvents(ctx context.Context, reader2 reader.Reader, transfo } } +// reSetupMeta re-setup the metadata when switching to a new upstream master server. func (r *Relay) reSetupMeta() error { uuid, err := utils.GetServerUUID(r.db, r.cfg.Flavor) if err != nil { diff --git a/relay/relay_test.go b/relay/relay_test.go index 339111a379..8e9e3a875b 100644 --- a/relay/relay_test.go +++ b/relay/relay_test.go @@ -15,6 +15,11 @@ package relay import ( "context" + "database/sql" + "fmt" + "os" + "path/filepath" + "strconv" "testing" "time" @@ -25,6 +30,7 @@ import ( "github.com/siddontang/go-mysql/replication" "github.com/pingcap/dm/pkg/binlog/event" + "github.com/pingcap/dm/pkg/utils" "github.com/pingcap/dm/relay/reader" "github.com/pingcap/dm/relay/transformer" "github.com/pingcap/dm/relay/writer" @@ -39,6 +45,25 @@ func TestSuite(t *testing.T) { type testRelaySuite struct { } +func openDBForTest() (*sql.DB, error) { + host := os.Getenv("MYSQL_HOST") + if host == "" { + host = "127.0.0.1" + } + port, _ := strconv.Atoi(os.Getenv("MYSQL_PORT")) + if port == 0 { + port = 3306 + } + user := os.Getenv("MYSQL_USER") + if user == "" { + user = "root" + } + password := os.Getenv("MYSQL_PSWD") + + dsn := fmt.Sprintf("%s:%s@tcp(%s:%d)/?charset=utf8mb4", user, password, host, port) + return sql.Open("mysql", dsn) +} + // mockReader is used only for relay testing. type mockReader struct { result reader.Result @@ -221,3 +246,51 @@ func (t *testRelaySuite) TestHandleEvent(c *C) { c.Fatalf("ignorable event for writer not ignored") } } + +func (t *testRelaySuite) TestReSetupMeta(c *C) { + var ( + relayCfg = &Config{ + RelayDir: c.MkDir(), + Flavor: gmysql.MySQLFlavor, + } + r = NewRelay(relayCfg).(*Relay) + ) + // empty metadata + c.Assert(r.meta.Load(), IsNil) + t.verifyMetadata(c, r, "", minCheckpoint, "", nil) + + // open connected DB and get its UUID + db, err := openDBForTest() + c.Assert(err, IsNil) + r.db = db + uuid, err := utils.GetServerUUID(r.db, r.cfg.Flavor) + c.Assert(err, IsNil) + + // re-setup meta with start pos adjusted + r.cfg.EnableGTID = true + r.cfg.BinlogGTID = "24ecd093-8cec-11e9-aa0d-0242ac170002:1-23" + r.cfg.BinLogName = "mysql-bin.000005" + c.Assert(r.reSetupMeta(), IsNil) + uuid001 := fmt.Sprintf("%s.000001", uuid) + t.verifyMetadata(c, r, uuid001, minCheckpoint, r.cfg.BinlogGTID, []string{uuid001}) + + // re-setup meta again, often happen when connecting a server behind a VIP. + c.Assert(r.reSetupMeta(), IsNil) + uuid002 := fmt.Sprintf("%s.000002", uuid) + t.verifyMetadata(c, r, uuid002, minCheckpoint, r.cfg.BinlogGTID, []string{uuid001, uuid002}) + +} + +func (t *testRelaySuite) verifyMetadata(c *C, r *Relay, uuidExpected string, + posExpected gmysql.Position, gsStrExpected string, UUIDsExpected []string) { + uuid, pos := r.meta.Pos() + _, gs := r.meta.GTID() + c.Assert(uuid, Equals, uuidExpected) + c.Assert(pos, DeepEquals, posExpected) + c.Assert(gs.String(), Equals, gsStrExpected) + + indexFile := filepath.Join(r.cfg.RelayDir, utils.UUIDIndexFilename) + UUIDs, err := utils.ParseUUIDIndex(indexFile) + c.Assert(err, IsNil) + c.Assert(UUIDs, DeepEquals, UUIDsExpected) +} diff --git a/relay/util_test.go b/relay/util_test.go index ca1d853471..a993f6ad8a 100644 --- a/relay/util_test.go +++ b/relay/util_test.go @@ -16,9 +16,6 @@ package relay import ( "database/sql" "fmt" - "os" - "strconv" - . "github.com/pingcap/check" gmysql "github.com/siddontang/go-mysql/mysql" @@ -28,30 +25,11 @@ import ( var _ = Suite(&testUtilSuite{}) type testUtilSuite struct { - host string - port int - user string - password string - db *sql.DB + db *sql.DB } func (t *testUtilSuite) SetUpSuite(c *C) { - t.host = os.Getenv("MYSQL_HOST") - if t.host == "" { - t.host = "127.0.0.1" - } - t.port, _ = strconv.Atoi(os.Getenv("MYSQL_PORT")) - if t.port == 0 { - t.port = 3306 - } - t.user = os.Getenv("MYSQL_USER") - if t.user == "" { - t.user = "root" - } - t.password = os.Getenv("MYSQL_PSWD") - - dsn := fmt.Sprintf("%s:%s@tcp(%s:%d)/?charset=utf8mb4", t.user, t.password, t.host, t.port) - db, err := sql.Open("mysql", dsn) + db, err := openDBForTest() c.Assert(err, IsNil) t.db = db } From 67c2f7c0fc447647a2857e2f72b9f3a55f135653 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Tue, 18 Jun 2019 20:09:02 +0800 Subject: [PATCH 11/24] relay: test processing --- relay/relay_test.go | 75 +++++++++++++++++++++++++++++++++++++++++++-- relay/util_test.go | 17 +++++----- 2 files changed, 80 insertions(+), 12 deletions(-) diff --git a/relay/relay_test.go b/relay/relay_test.go index 8e9e3a875b..23dbdbece7 100644 --- a/relay/relay_test.go +++ b/relay/relay_test.go @@ -17,9 +17,11 @@ import ( "context" "database/sql" "fmt" + "io/ioutil" "os" "path/filepath" "strconv" + "sync" "testing" "time" @@ -29,6 +31,8 @@ import ( gmysql "github.com/siddontang/go-mysql/mysql" "github.com/siddontang/go-mysql/replication" + "github.com/pingcap/dm/dm/config" + "github.com/pingcap/dm/pkg/binlog" "github.com/pingcap/dm/pkg/binlog/event" "github.com/pingcap/dm/pkg/utils" "github.com/pingcap/dm/relay/reader" @@ -45,7 +49,7 @@ func TestSuite(t *testing.T) { type testRelaySuite struct { } -func openDBForTest() (*sql.DB, error) { +func getDBConfigForTest() config.DBConfig { host := os.Getenv("MYSQL_HOST") if host == "" { host = "127.0.0.1" @@ -59,8 +63,18 @@ func openDBForTest() (*sql.DB, error) { user = "root" } password := os.Getenv("MYSQL_PSWD") + return config.DBConfig{ + Host: host, + Port: port, + User: user, + Password: password, + } +} + +func openDBForTest() (*sql.DB, error) { + cfg := getDBConfigForTest() - dsn := fmt.Sprintf("%s:%s@tcp(%s:%d)/?charset=utf8mb4", user, password, host, port) + dsn := fmt.Sprintf("%s:%s@tcp(%s:%d)/?charset=utf8mb4", cfg.User, cfg.Password, cfg.Host, cfg.Port) return sql.Open("mysql", dsn) } @@ -263,6 +277,10 @@ func (t *testRelaySuite) TestReSetupMeta(c *C) { db, err := openDBForTest() c.Assert(err, IsNil) r.db = db + defer func() { + r.db.Close() + r.db = nil + }() uuid, err := utils.GetServerUUID(r.db, r.cfg.Flavor) c.Assert(err, IsNil) @@ -294,3 +312,56 @@ func (t *testRelaySuite) verifyMetadata(c *C, r *Relay, uuidExpected string, c.Assert(err, IsNil) c.Assert(UUIDs, DeepEquals, UUIDsExpected) } + +func (t *testRelaySuite) TestProcess(c *C) { + var ( + dbCfg = getDBConfigForTest() + relayCfg = &Config{ + EnableGTID: true, + Flavor: gmysql.MySQLFlavor, + RelayDir: c.MkDir(), + ServerID: 12321, + From: DBConfig{ + Host: dbCfg.Host, + Port: dbCfg.Port, + User: dbCfg.User, + Password: dbCfg.Password, + }, + } + r = NewRelay(relayCfg).(*Relay) + ) + db, err := openDBForTest() + c.Assert(err, IsNil) + r.db = db + defer func() { + r.db.Close() + r.db = nil + }() + + ctx, cancel := context.WithCancel(context.Background()) + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + err = r.process(ctx) + c.Assert(err, IsNil) + }() + + time.Sleep(3 * time.Second) // waiting for get events from upstream + cancel() // stop processing + wg.Wait() + + // check whether have binlog file in relay directory + // and check for events already done in `TestHandleEvent` + uuid, err := utils.GetServerUUID(r.db, r.cfg.Flavor) + c.Assert(err, IsNil) + files, err := ioutil.ReadDir(filepath.Join(relayCfg.RelayDir, fmt.Sprintf("%s.000001", uuid))) + c.Assert(err, IsNil) + var binlogFileCount int + for _, f := range files { + if binlog.VerifyFilename(f.Name()) { + binlogFileCount++ + } + } + c.Assert(binlogFileCount, Greater, 0) +} diff --git a/relay/util_test.go b/relay/util_test.go index a993f6ad8a..9be72c5f6d 100644 --- a/relay/util_test.go +++ b/relay/util_test.go @@ -14,8 +14,8 @@ package relay import ( - "database/sql" "fmt" + . "github.com/pingcap/check" gmysql "github.com/siddontang/go-mysql/mysql" @@ -25,32 +25,29 @@ import ( var _ = Suite(&testUtilSuite{}) type testUtilSuite struct { - db *sql.DB } -func (t *testUtilSuite) SetUpSuite(c *C) { +func (t *testUtilSuite) TestIsNewServer(c *C) { db, err := openDBForTest() c.Assert(err, IsNil) - t.db = db -} + defer db.Close() -func (t *testUtilSuite) TestIsNewServer(c *C) { flavor := gmysql.MySQLFlavor // no prevUUID, is new server. - isNew, err := isNewServer("", t.db, flavor) + isNew, err := isNewServer("", db, flavor) c.Assert(err, IsNil) c.Assert(isNew, IsTrue) // different server - isNew, err = isNewServer("not-exists-uuid.000001", t.db, flavor) + isNew, err = isNewServer("not-exists-uuid.000001", db, flavor) c.Assert(err, IsNil) c.Assert(isNew, IsTrue) // the same server - currUUID, err := utils.GetServerUUID(t.db, flavor) + currUUID, err := utils.GetServerUUID(db, flavor) c.Assert(err, IsNil) - isNew, err = isNewServer(fmt.Sprintf("%s.000001", currUUID), t.db, flavor) + isNew, err = isNewServer(fmt.Sprintf("%s.000001", currUUID), db, flavor) c.Assert(err, IsNil) c.Assert(isNew, IsFalse) } From f255895ba4a95bbbcfffa69b8ca9907e7c72dfb1 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Tue, 18 Jun 2019 20:15:33 +0800 Subject: [PATCH 12/24] relay: try fix CI --- relay/relay_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay/relay_test.go b/relay/relay_test.go index 23dbdbece7..3ec6895917 100644 --- a/relay/relay_test.go +++ b/relay/relay_test.go @@ -317,7 +317,7 @@ func (t *testRelaySuite) TestProcess(c *C) { var ( dbCfg = getDBConfigForTest() relayCfg = &Config{ - EnableGTID: true, + EnableGTID: false, // position mode, so auto-positioning can work Flavor: gmysql.MySQLFlavor, RelayDir: c.MkDir(), ServerID: 12321, From 93ec22e7acff92dad4a892cc1af9d404d6ac561a Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Wed, 19 Jun 2019 10:25:16 +0800 Subject: [PATCH 13/24] reader: try fix CI --- pkg/binlog/reader/tcp_test.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pkg/binlog/reader/tcp_test.go b/pkg/binlog/reader/tcp_test.go index 411c3fa761..fddcb131bd 100644 --- a/pkg/binlog/reader/tcp_test.go +++ b/pkg/binlog/reader/tcp_test.go @@ -53,6 +53,8 @@ type testTCPReaderSuite struct { user string password string db *sql.DB + initPos gmysql.Position + initGs gtid.Set } func (t *testTCPReaderSuite) SetUpSuite(c *C) { @@ -95,6 +97,9 @@ func (t *testTCPReaderSuite) setUpData(c *C) { _, err = t.db.Exec(query) c.Assert(err, IsNil) + // save init position and GTID sets + t.initPos, t.initGs, err = utils.GetMasterStatus(t.db, flavor) + // execute some SQL statements to generate binlog events. query = fmt.Sprintf("CREATE DATABASE `%s`", dbName) _, err = t.db.Exec(query) @@ -122,7 +127,7 @@ func (t *testTCPReaderSuite) TestSyncPos(c *C) { UseDecimal: true, VerifyChecksum: true, } - pos gmysql.Position // empty position + pos = t.initPos // use the initial position ) // the first reader @@ -232,9 +237,8 @@ func (t *testTCPReaderSuite) TestSyncGTID(c *C) { err = r.StartSyncByGTID(gSet) c.Assert(err, NotNil) - // empty GTID set - gSet, err = gtid.ParserGTID(flavor, "") - c.Assert(err, IsNil) + // use the initial position + gSet = t.initGs // prepare err = r.StartSyncByGTID(gSet) From 9f22aaae51237578b331f30d030742a5c0192fc5 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Wed, 19 Jun 2019 10:54:07 +0800 Subject: [PATCH 14/24] reader: try fix CI --- pkg/binlog/reader/tcp_test.go | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/pkg/binlog/reader/tcp_test.go b/pkg/binlog/reader/tcp_test.go index fddcb131bd..aab1d9e624 100644 --- a/pkg/binlog/reader/tcp_test.go +++ b/pkg/binlog/reader/tcp_test.go @@ -53,8 +53,6 @@ type testTCPReaderSuite struct { user string password string db *sql.DB - initPos gmysql.Position - initGs gtid.Set } func (t *testTCPReaderSuite) SetUpSuite(c *C) { @@ -92,13 +90,23 @@ func (t *testTCPReaderSuite) setUpData(c *C) { query := fmt.Sprintf("DROP DATABASE `%s`", dbName) _, err := t.db.Exec(query) - // delete previous binlog files/events. - query = "RESET MASTER" - _, err = t.db.Exec(query) - c.Assert(err, IsNil) + maxRetryCount := 5 + for i := 0; i < maxRetryCount; i++ { + // delete previous binlog files/events. if other test cases writing events, they may be failed. + query = "RESET MASTER" + _, err = t.db.Exec(query) + c.Assert(err, IsNil) - // save init position and GTID sets - t.initPos, t.initGs, err = utils.GetMasterStatus(t.db, flavor) + // check whether other test cases have wrote any events. + time.Sleep(time.Second) + _, gs, err2 := utils.GetMasterStatus(t.db, flavor) + c.Assert(err2, IsNil) + if len(gs.String()) > 0 { + time.Sleep(5 * time.Second) // some events exist now, try again later. + } else { + break + } + } // execute some SQL statements to generate binlog events. query = fmt.Sprintf("CREATE DATABASE `%s`", dbName) @@ -127,7 +135,7 @@ func (t *testTCPReaderSuite) TestSyncPos(c *C) { UseDecimal: true, VerifyChecksum: true, } - pos = t.initPos // use the initial position + pos gmysql.Position // empty position ) // the first reader @@ -237,8 +245,9 @@ func (t *testTCPReaderSuite) TestSyncGTID(c *C) { err = r.StartSyncByGTID(gSet) c.Assert(err, NotNil) - // use the initial position - gSet = t.initGs + // empty GTID set + gSet, err = gtid.ParserGTID(flavor, "") + c.Assert(err, IsNil) // prepare err = r.StartSyncByGTID(gSet) From 9e108d7fd422bbd354b9830d06d5859c724aaa82 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Wed, 19 Jun 2019 11:09:51 +0800 Subject: [PATCH 15/24] relay: try fix CI --- relay/relay_test.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/relay/relay_test.go b/relay/relay_test.go index 3ec6895917..85d48311c5 100644 --- a/relay/relay_test.go +++ b/relay/relay_test.go @@ -344,7 +344,10 @@ func (t *testRelaySuite) TestProcess(c *C) { go func() { defer wg.Done() err = r.process(ctx) - c.Assert(err, IsNil) + if !utils.IsErrBinlogPurged(err) { + // we can tolerate `ERROR 1236` caused by `RESET MASTER` in other test cases. + c.Assert(err, IsNil) + } }() time.Sleep(3 * time.Second) // waiting for get events from upstream From 8e83cf1f640a61880ed394920b040d0490a83cc9 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Wed, 19 Jun 2019 11:29:38 +0800 Subject: [PATCH 16/24] *: writer return value rather than pointer --- relay/relay_test.go | 8 ++--- relay/writer/file.go | 76 +++++++++++++++++++-------------------- relay/writer/file_test.go | 66 +++++++++------------------------- relay/writer/writer.go | 4 +-- 4 files changed, 61 insertions(+), 93 deletions(-) diff --git a/relay/relay_test.go b/relay/relay_test.go index 85d48311c5..5d09924f9e 100644 --- a/relay/relay_test.go +++ b/relay/relay_test.go @@ -116,13 +116,13 @@ func (w *mockWriter) Close() error { return nil } -func (w *mockWriter) Recover() (*writer.RecoverResult, error) { - return nil, nil +func (w *mockWriter) Recover() (writer.RecoverResult, error) { + return writer.RecoverResult{}, nil } -func (w *mockWriter) WriteEvent(ev *replication.BinlogEvent) (*writer.Result, error) { +func (w *mockWriter) WriteEvent(ev *replication.BinlogEvent) (writer.Result, error) { w.latestEvent = ev // hold it - return &w.result, w.err + return w.result, w.err } func (w *mockWriter) Flush() error { diff --git a/relay/writer/file.go b/relay/writer/file.go index 018fd6f620..1308fd1d92 100644 --- a/relay/writer/file.go +++ b/relay/writer/file.go @@ -97,24 +97,24 @@ func (w *FileWriter) Close() error { } // Recover implements Writer.Recover. -func (w *FileWriter) Recover() (*RecoverResult, error) { +func (w *FileWriter) Recover() (RecoverResult, error) { w.mu.Lock() defer w.mu.Unlock() if w.stage != common.StagePrepared { - return nil, errors.Errorf("stage %s, expect %s, please start the writer first", w.stage, common.StagePrepared) + return RecoverResult{}, errors.Errorf("stage %s, expect %s, please start the writer first", w.stage, common.StagePrepared) } return w.doRecovering() } // WriteEvent implements Writer.WriteEvent. -func (w *FileWriter) WriteEvent(ev *replication.BinlogEvent) (*Result, error) { +func (w *FileWriter) WriteEvent(ev *replication.BinlogEvent) (Result, error) { w.mu.Lock() defer w.mu.Unlock() if w.stage != common.StagePrepared { - return nil, errors.Errorf("stage %s, expect %s, please start the writer first", w.stage, common.StagePrepared) + return Result{}, errors.Errorf("stage %s, expect %s, please start the writer first", w.stage, common.StagePrepared) } switch ev.Event.(type) { @@ -157,19 +157,19 @@ func (w *FileWriter) offset() int64 { // 2. open/create a new binlog file // 3. write the binlog file header if not exists // 4. write the FormatDescriptionEvent if not exists one -func (w *FileWriter) handleFormatDescriptionEvent(ev *replication.BinlogEvent) (*Result, error) { +func (w *FileWriter) handleFormatDescriptionEvent(ev *replication.BinlogEvent) (Result, error) { // close the previous binlog file if w.out != nil { log.Infof("[relay] closing previous underlying binlog writer with status %v", w.out.Status()) err := w.out.Close() if err != nil { - return nil, errors.Annotate(err, "close previous underlying binlog writer") + return Result{}, errors.Annotate(err, "close previous underlying binlog writer") } } // verify filename if !binlog.VerifyFilename(w.filename.Get()) { - return nil, errors.NotValidf("binlog filename %s", w.filename.Get()) + return Result{}, errors.NotValidf("binlog filename %s", w.filename.Get()) } // open/create a new binlog file @@ -180,7 +180,7 @@ func (w *FileWriter) handleFormatDescriptionEvent(ev *replication.BinlogEvent) ( out := bw.NewFileWriter(outCfg) err := out.Start() if err != nil { - return nil, errors.Annotatef(err, "start underlying binlog writer for %s", filename) + return Result{}, errors.Annotatef(err, "start underlying binlog writer for %s", filename) } w.out = out.(*bw.FileWriter) log.Infof("[relay] open underlying binlog writer with status %v", w.out.Status()) @@ -188,26 +188,26 @@ func (w *FileWriter) handleFormatDescriptionEvent(ev *replication.BinlogEvent) ( // write the binlog file header if not exists exist, err := checkBinlogHeaderExist(filename) if err != nil { - return nil, errors.Annotatef(err, "check binlog file header for %s", filename) + return Result{}, errors.Annotatef(err, "check binlog file header for %s", filename) } else if !exist { err = w.out.Write(replication.BinLogFileHeader) if err != nil { - return nil, errors.Annotatef(err, "write binlog file header for %s", filename) + return Result{}, errors.Annotatef(err, "write binlog file header for %s", filename) } } // write the FormatDescriptionEvent if not exists one exist, err = checkFormatDescriptionEventExist(filename) if err != nil { - return nil, errors.Annotatef(err, "check FormatDescriptionEvent for %s", filename) + return Result{}, errors.Annotatef(err, "check FormatDescriptionEvent for %s", filename) } else if !exist { err = w.out.Write(ev.RawData) if err != nil { - return nil, errors.Annotatef(err, "write FormatDescriptionEvent %+v for %s", ev.Header, filename) + return Result{}, errors.Annotatef(err, "write FormatDescriptionEvent %+v for %s", ev.Header, filename) } } - return &Result{ + return Result{ Ignore: exist, // ignore if exists }, nil } @@ -220,10 +220,10 @@ func (w *FileWriter) handleFormatDescriptionEvent(ev *replication.BinlogEvent) ( // NOTE: we do not create a new binlog file when received a RotateEvent, // instead, we create a new binlog file when received a FormatDescriptionEvent. // because a binlog file without any events has no meaning. -func (w *FileWriter) handleRotateEvent(ev *replication.BinlogEvent) (result *Result, err error) { +func (w *FileWriter) handleRotateEvent(ev *replication.BinlogEvent) (result Result, err error) { rotateEv, ok := ev.Event.(*replication.RotateEvent) if !ok { - return nil, errors.NotValidf("except RotateEvent, but got %+v", ev.Header) + return result, errors.NotValidf("except RotateEvent, but got %+v", ev.Header) } var currFile = w.filename.Get() @@ -244,27 +244,27 @@ func (w *FileWriter) handleRotateEvent(ev *replication.BinlogEvent) (result *Res // write the RotateEvent if not fake if ev.Header.Timestamp == 0 || ev.Header.LogPos == 0 { // skip fake rotate event - return &Result{ + return Result{ Ignore: true, }, nil } else if w.out == nil { // if not open a binlog file yet, then non-fake RotateEvent can't be handled - return nil, errors.Errorf("non-fake RotateEvent %+v received, but no binlog file opened", ev.Header) + return result, errors.Errorf("non-fake RotateEvent %+v received, but no binlog file opened", ev.Header) } result, err = w.handlePotentialHoleOrDuplicate(ev) if err != nil { - return nil, errors.Trace(err) + return result, errors.Trace(err) } else if result.Ignore { return result, nil } err = w.out.Write(ev.RawData) if err != nil { - return nil, errors.Annotatef(err, "write RotateEvent %+v for %s", ev.Header, filepath.Join(w.cfg.RelayDir, currFile)) + return result, errors.Annotatef(err, "write RotateEvent %+v for %s", ev.Header, filepath.Join(w.cfg.RelayDir, currFile)) } - return &Result{ + return Result{ Ignore: false, }, nil } @@ -273,27 +273,27 @@ func (w *FileWriter) handleRotateEvent(ev *replication.BinlogEvent) (result *Res // 1. handle a potential hole if exists // 2. handle any duplicate events if exist // 3. write the non-duplicate event -func (w *FileWriter) handleEventDefault(ev *replication.BinlogEvent) (*Result, error) { +func (w *FileWriter) handleEventDefault(ev *replication.BinlogEvent) (Result, error) { result, err := w.handlePotentialHoleOrDuplicate(ev) if err != nil { - return nil, errors.Trace(err) + return Result{}, errors.Trace(err) } else if result.Ignore { return result, nil } // write the non-duplicate event err = w.out.Write(ev.RawData) - return &Result{ + return Result{ Ignore: false, }, errors.Annotatef(err, "write event %+v", ev.Header) } // handlePotentialHoleOrDuplicate combines handleFileHoleExist and handleDuplicateEventsExist. -func (w *FileWriter) handlePotentialHoleOrDuplicate(ev *replication.BinlogEvent) (*Result, error) { +func (w *FileWriter) handlePotentialHoleOrDuplicate(ev *replication.BinlogEvent) (Result, error) { // handle a potential hole mayDuplicate, err := w.handleFileHoleExist(ev) if err != nil { - return nil, errors.Annotatef(err, "handle a potential hole in %s before %+v", + return Result{}, errors.Annotatef(err, "handle a potential hole in %s before %+v", w.filename.Get(), ev.Header) } @@ -301,7 +301,7 @@ func (w *FileWriter) handlePotentialHoleOrDuplicate(ev *replication.BinlogEvent) // handle any duplicate events if exist result, err2 := w.handleDuplicateEventsExist(ev) if err2 != nil { - return nil, errors.Annotatef(err2, "handle a potential duplicate event %+v in %s", + return Result{}, errors.Annotatef(err2, "handle a potential duplicate event %+v in %s", ev.Header, w.filename.Get()) } if result.Ignore { @@ -310,7 +310,7 @@ func (w *FileWriter) handlePotentialHoleOrDuplicate(ev *replication.BinlogEvent) } } - return &Result{ + return Result{ Ignore: false, }, nil } @@ -354,16 +354,16 @@ func (w *FileWriter) handleFileHoleExist(ev *replication.BinlogEvent) (bool, err } // handleDuplicateEventsExist tries to handle a potential duplicate event in the binlog file. -func (w *FileWriter) handleDuplicateEventsExist(ev *replication.BinlogEvent) (*Result, error) { +func (w *FileWriter) handleDuplicateEventsExist(ev *replication.BinlogEvent) (Result, error) { filename := filepath.Join(w.cfg.RelayDir, w.filename.Get()) duplicate, err := checkIsDuplicateEvent(filename, ev) if err != nil { - return nil, errors.Annotatef(err, "check event %+v whether duplicate in %s", ev.Header, filename) + return Result{}, errors.Annotatef(err, "check event %+v whether duplicate in %s", ev.Header, filename) } else if duplicate { log.Infof("[relay] event %+v is duplicate in %s", ev.Header, w.filename.Get()) } - return &Result{ + return Result{ Ignore: duplicate, }, nil } @@ -376,41 +376,41 @@ func (w *FileWriter) handleDuplicateEventsExist(ev *replication.BinlogEvent) (*R // 3. truncate any incomplete events/transactions // now, we think a transaction finished if we received a XIDEvent or DDL in QueryEvent // NOTE: handle cases when file size > 4GB -func (w *FileWriter) doRecovering() (*RecoverResult, error) { +func (w *FileWriter) doRecovering() (RecoverResult, error) { filename := filepath.Join(w.cfg.RelayDir, w.filename.Get()) // get latest pos/GTID set for all completed transactions from the file latestPos, latestGTIDs, err := getTxnPosGTIDs(filename, w.parser) if err != nil { - return nil, errors.Annotatef(err, "get latest pos/GTID set from %s", filename) + return RecoverResult{}, errors.Annotatef(err, "get latest pos/GTID set from %s", filename) } // in most cases, we think the file is fine, so compare the size is simpler. fs, err := os.Stat(filename) if err != nil { - return nil, errors.Annotatef(err, "get stat for %s", filename) + return RecoverResult{}, errors.Annotatef(err, "get stat for %s", filename) } if fs.Size() == latestPos { - return &RecoverResult{ + return RecoverResult{ Recovered: false, // no recovering for the file LatestPos: gmysql.Position{Name: w.filename.Get(), Pos: uint32(latestPos)}, LatestGTIDs: latestGTIDs, }, nil } else if fs.Size() < latestPos { - return nil, errors.Errorf("latest pos %d greater than file size %d, should not happen", latestPos, fs.Size()) + return RecoverResult{}, errors.Errorf("latest pos %d greater than file size %d, should not happen", latestPos, fs.Size()) } // truncate the file f, err := os.OpenFile(filename, os.O_WRONLY, 0644) if err != nil { - return nil, errors.Annotatef(err, "open %s", filename) + return RecoverResult{}, errors.Annotatef(err, "open %s", filename) } defer f.Close() err = f.Truncate(latestPos) if err != nil { - return nil, errors.Annotatef(err, "truncate %s to %d", filename, latestPos) + return RecoverResult{}, errors.Annotatef(err, "truncate %s to %d", filename, latestPos) } - return &RecoverResult{ + return RecoverResult{ Recovered: true, LatestPos: gmysql.Position{Name: w.filename.Get(), Pos: uint32(latestPos)}, LatestGTIDs: latestGTIDs, diff --git a/relay/writer/file_test.go b/relay/writer/file_test.go index 620b2a4e13..859f75fb9f 100644 --- a/relay/writer/file_test.go +++ b/relay/writer/file_test.go @@ -67,12 +67,10 @@ func (t *testFileWriterSuite) TestInterfaceMethods(c *check.C) { c.Assert(w, check.NotNil) // not prepared - rres, err := w.Recover() + _, err := w.Recover() c.Assert(err, check.ErrorMatches, fmt.Sprintf(".*%s.*", common.StageNew)) - c.Assert(rres, check.IsNil) - res, err := w.WriteEvent(ev) + _, err = w.WriteEvent(ev) c.Assert(err, check.ErrorMatches, fmt.Sprintf(".*%s.*", common.StageNew)) - c.Assert(res, check.IsNil) err = w.Flush() c.Assert(err, check.ErrorMatches, fmt.Sprintf(".*%s.*", common.StageNew)) @@ -86,14 +84,13 @@ func (t *testFileWriterSuite) TestInterfaceMethods(c *check.C) { c.Assert(err, check.ErrorMatches, ".*no underlying writer opened.*") // recover - rres, err = w.Recover() + _, err = w.Recover() c.Assert(err, check.ErrorMatches, ".*no such file or directory.*") - c.Assert(rres, check.IsNil) // write event - res, err = w.WriteEvent(ev) + res, err := w.WriteEvent(ev) c.Assert(err, check.IsNil) - c.Assert(res, check.NotNil) + c.Assert(res.Ignore, check.IsFalse) // flush buffered data c.Assert(w.Flush(), check.IsNil) @@ -120,45 +117,41 @@ func (t *testFileWriterSuite) TestRelayDir(c *check.C) { w1 := NewFileWriter(cfg, t.parser) defer w1.Close() c.Assert(w1.Start(), check.IsNil) - result, err := w1.WriteEvent(ev) + _, err = w1.WriteEvent(ev) c.Assert(err, check.ErrorMatches, ".*not valid.*") - c.Assert(result, check.IsNil) // invalid dir cfg.RelayDir = "invalid\x00path" w2 := NewFileWriter(cfg, t.parser) defer w2.Close() c.Assert(w2.Start(), check.IsNil) - result, err = w2.WriteEvent(ev) + _, err = w2.WriteEvent(ev) c.Assert(err, check.ErrorMatches, ".*not valid.*") - c.Assert(result, check.IsNil) // valid directory, but no filename specified cfg.RelayDir = c.MkDir() w3 := NewFileWriter(cfg, t.parser) defer w3.Close() c.Assert(w3.Start(), check.IsNil) - result, err = w3.WriteEvent(ev) + _, err = w3.WriteEvent(ev) c.Assert(err, check.ErrorMatches, ".*not valid.*") - c.Assert(result, check.IsNil) // valid directory, but invalid filename cfg.Filename = "test-mysql-bin.666abc" w4 := NewFileWriter(cfg, t.parser) defer w4.Close() c.Assert(w4.Start(), check.IsNil) - result, err = w4.WriteEvent(ev) + _, err = w4.WriteEvent(ev) c.Assert(err, check.ErrorMatches, ".*not valid.*") - c.Assert(result, check.IsNil) // valid directory, valid filename cfg.Filename = "test-mysql-bin.000001" w5 := NewFileWriter(cfg, t.parser) defer w5.Close() c.Assert(w5.Start(), check.IsNil) - result, err = w5.WriteEvent(ev) + result, err := w5.WriteEvent(ev) c.Assert(err, check.IsNil) - c.Assert(result, check.NotNil) + c.Assert(result.Ignore, check.IsFalse) } func (t *testFileWriterSuite) TestFormatDescriptionEvent(c *check.C) { @@ -183,7 +176,6 @@ func (t *testFileWriterSuite) TestFormatDescriptionEvent(c *check.C) { c.Assert(w.Start(), check.IsNil) result, err := w.WriteEvent(formatDescEv) c.Assert(err, check.IsNil) - c.Assert(result, check.NotNil) c.Assert(result.Ignore, check.IsFalse) fileSize := int64(len(replication.BinLogFileHeader) + len(formatDescEv.RawData)) t.verifyFilenameOffset(c, w, cfg.Filename, fileSize) @@ -192,7 +184,6 @@ func (t *testFileWriterSuite) TestFormatDescriptionEvent(c *check.C) { // write FormatDescriptionEvent again, ignore result, err = w.WriteEvent(formatDescEv) c.Assert(err, check.IsNil) - c.Assert(result, check.NotNil) c.Assert(result.Ignore, check.IsTrue) t.verifyFilenameOffset(c, w, cfg.Filename, fileSize) @@ -201,7 +192,6 @@ func (t *testFileWriterSuite) TestFormatDescriptionEvent(c *check.C) { c.Assert(err, check.IsNil) result, err = w.WriteEvent(queryEv) c.Assert(err, check.IsNil) - c.Assert(result, check.NotNil) c.Assert(result.Ignore, check.IsFalse) fileSize += int64(len(queryEv.RawData)) t.verifyFilenameOffset(c, w, cfg.Filename, fileSize) @@ -210,7 +200,6 @@ func (t *testFileWriterSuite) TestFormatDescriptionEvent(c *check.C) { // write FormatDescriptionEvent again, ignore result, err = w.WriteEvent(formatDescEv) c.Assert(err, check.IsNil) - c.Assert(result, check.NotNil) c.Assert(result.Ignore, check.IsTrue) t.verifyFilenameOffset(c, w, cfg.Filename, fileSize) @@ -283,23 +272,20 @@ func (t *testFileWriterSuite) TestRotateEventWithFormatDescriptionEvent(c *check w1 := NewFileWriter(cfg, t.parser) defer w1.Close() c.Assert(w1.Start(), check.IsNil) - result, err := w1.WriteEvent(rotateEv) + _, err = w1.WriteEvent(rotateEv) c.Assert(err, check.ErrorMatches, ".*no binlog file opened.*") - c.Assert(result, check.IsNil) // 2. fake RotateEvent before FormatDescriptionEvent cfg.RelayDir = c.MkDir() // use a new relay directory w2 := NewFileWriter(cfg, t.parser) defer w2.Close() c.Assert(w2.Start(), check.IsNil) - result, err = w2.WriteEvent(fakeRotateEv) + result, err := w2.WriteEvent(fakeRotateEv) c.Assert(err, check.IsNil) - c.Assert(result, check.NotNil) c.Assert(result.Ignore, check.IsTrue) // ignore fake RotateEvent result, err = w2.WriteEvent(formatDescEv) c.Assert(err, check.IsNil) - c.Assert(result, check.NotNil) c.Assert(result.Ignore, check.IsFalse) fileSize := int64(len(replication.BinLogFileHeader) + len(formatDescEv.RawData)) @@ -322,12 +308,10 @@ func (t *testFileWriterSuite) TestRotateEventWithFormatDescriptionEvent(c *check defer w3.Close() c.Assert(w3.Start(), check.IsNil) result, err = w3.WriteEvent(formatDescEv) - c.Assert(err, check.IsNil) c.Assert(result, check.NotNil) c.Assert(result.Ignore, check.IsFalse) result, err = w3.WriteEvent(fakeRotateEv) - c.Assert(err, check.IsNil) c.Assert(result, check.NotNil) c.Assert(result.Ignore, check.IsTrue) @@ -349,27 +333,23 @@ func (t *testFileWriterSuite) TestRotateEventWithFormatDescriptionEvent(c *check defer w4.Close() c.Assert(w4.Start(), check.IsNil) result, err = w4.WriteEvent(formatDescEv) - c.Assert(err, check.IsNil) c.Assert(result, check.NotNil) c.Assert(result.Ignore, check.IsFalse) // try to write a rotateEv with hole exists - result, err = w4.WriteEvent(holeRotateEv) + _, err = w4.WriteEvent(holeRotateEv) c.Assert(err, check.ErrorMatches, ".*required dummy event size.*is too small.*") - c.Assert(result, check.IsNil) result, err = w4.WriteEvent(rotateEv) c.Assert(err, check.IsNil) - c.Assert(result, check.NotNil) c.Assert(result.Ignore, check.IsFalse) fileSize += int64(len(rotateEv.RawData)) t.verifyFilenameOffset(c, w4, nextFilename, fileSize) // write again, duplicate, but we already rotated and new binlog file not created - result, err = w4.WriteEvent(rotateEv) + _, err = w4.WriteEvent(rotateEv) c.Assert(err, check.ErrorMatches, ".*no such file or directory.*") - c.Assert(result, check.IsNil) // cfg.Filename should contain both one FormatDescriptionEvent and one RotateEvent, next file should be empty filename1 = filepath.Join(cfg.RelayDir, cfg.Filename) @@ -442,7 +422,6 @@ func (t *testFileWriterSuite) TestWriteMultiEvents(c *check.C) { for _, ev := range allEvents { result, err2 := w.WriteEvent(ev) c.Assert(err2, check.IsNil) - c.Assert(result, check.NotNil) c.Assert(result.Ignore, check.IsFalse) // no event is ignored } @@ -478,16 +457,14 @@ func (t *testFileWriterSuite) TestHandleFileHoleExist(c *check.C) { // write the FormatDescriptionEvent, no hole exists result, err := w.WriteEvent(formatDescEv) c.Assert(err, check.IsNil) - c.Assert(result, check.NotNil) c.Assert(result.Ignore, check.IsFalse) // hole exits, but the size is too small, invalid latestPos = formatDescEv.Header.LogPos + event.MinUserVarEventLen - 1 queryEv, err := event.GenQueryEvent(header, latestPos, 0, 0, 0, nil, []byte("schema"), []byte("BEGIN")) c.Assert(err, check.IsNil) - result, err = w.WriteEvent(queryEv) + _, err = w.WriteEvent(queryEv) c.Assert(err, check.ErrorMatches, ".*generate dummy event.*") - c.Assert(result, check.IsNil) // hole exits, and the size is enough latestPos = formatDescEv.Header.LogPos + event.MinUserVarEventLen @@ -495,7 +472,6 @@ func (t *testFileWriterSuite) TestHandleFileHoleExist(c *check.C) { c.Assert(err, check.IsNil) result, err = w.WriteEvent(queryEv) c.Assert(err, check.IsNil) - c.Assert(result, check.NotNil) c.Assert(result.Ignore, check.IsFalse) fileSize := int64(queryEv.Header.LogPos) t.verifyFilenameOffset(c, w, cfg.Filename, fileSize) @@ -547,7 +523,6 @@ func (t *testFileWriterSuite) TestHandleDuplicateEventsExist(c *check.C) { c.Assert(err, check.IsNil) result, err := w.WriteEvent(formatDescEv) c.Assert(err, check.IsNil) - c.Assert(result, check.NotNil) c.Assert(result.Ignore, check.IsFalse) latestPos = formatDescEv.Header.LogPos @@ -556,22 +531,19 @@ func (t *testFileWriterSuite) TestHandleDuplicateEventsExist(c *check.C) { c.Assert(err, check.IsNil) result, err = w.WriteEvent(queryEv) c.Assert(err, check.IsNil) - c.Assert(result, check.NotNil) c.Assert(result.Ignore, check.IsFalse) // write the QueryEvent again, duplicate result, err = w.WriteEvent(queryEv) c.Assert(err, check.IsNil) - c.Assert(result, check.NotNil) c.Assert(result.Ignore, check.IsTrue) // write a start/end pos mismatched event latestPos-- queryEv, err = event.GenQueryEvent(header, latestPos, 0, 0, 0, nil, []byte("schema"), []byte("BEGIN")) c.Assert(err, check.IsNil) - result, err = w.WriteEvent(queryEv) + _, err = w.WriteEvent(queryEv) c.Assert(err, check.ErrorMatches, ".*handle a potential duplicate event.*") - c.Assert(result, check.IsNil) } func (t *testFileWriterSuite) TestRecoverMySQL(c *check.C) { @@ -617,7 +589,6 @@ func (t *testFileWriterSuite) TestRecoverMySQL(c *check.C) { // try recover, but in fact do nothing result, err := w.Recover() c.Assert(err, check.IsNil) - c.Assert(result, check.NotNil) c.Assert(result.Recovered, check.IsFalse) c.Assert(result.LatestPos, check.DeepEquals, expectedPos) c.Assert(result.LatestGTIDs, check.DeepEquals, expectedGTIDs) @@ -648,7 +619,6 @@ func (t *testFileWriterSuite) TestRecoverMySQL(c *check.C) { // try recover, truncate the incomplete event result, err = w.Recover() c.Assert(err, check.IsNil) - c.Assert(result, check.NotNil) c.Assert(result.Recovered, check.IsTrue) c.Assert(result.LatestPos, check.DeepEquals, expectedPos) c.Assert(result.LatestGTIDs, check.DeepEquals, expectedGTIDs) @@ -677,7 +647,6 @@ func (t *testFileWriterSuite) TestRecoverMySQL(c *check.C) { // try recover, truncate the incomplete transaction result, err = w.Recover() c.Assert(err, check.IsNil) - c.Assert(result, check.NotNil) c.Assert(result.Recovered, check.IsTrue) c.Assert(result.LatestPos, check.DeepEquals, expectedPos) c.Assert(result.LatestGTIDs, check.DeepEquals, expectedGTIDs) @@ -709,7 +678,6 @@ func (t *testFileWriterSuite) TestRecoverMySQL(c *check.C) { c.Assert(err, check.IsNil) result, err = w.Recover() c.Assert(err, check.IsNil) - c.Assert(result, check.NotNil) c.Assert(result.Recovered, check.IsFalse) c.Assert(result.LatestPos, check.DeepEquals, expectedPos) c.Assert(result.LatestGTIDs, check.DeepEquals, expectedGTIDs) diff --git a/relay/writer/writer.go b/relay/writer/writer.go index 8ef598706f..5e09feca1f 100644 --- a/relay/writer/writer.go +++ b/relay/writer/writer.go @@ -54,11 +54,11 @@ type Writer interface { // It is often used to recover a binlog file with some corrupt/incomplete binlog events/transactions at the end of the file. // It is not safe for concurrent use by multiple goroutines. // It should be called before writing to the file. - Recover() (*RecoverResult, error) + Recover() (RecoverResult, error) // WriteEvent writes an binlog event's data into disk or any other places. // It is not safe for concurrent use by multiple goroutines. - WriteEvent(ev *replication.BinlogEvent) (*Result, error) + WriteEvent(ev *replication.BinlogEvent) (Result, error) // Flush flushes the buffered data to a stable storage or sends through the network. // It is not safe for concurrent use by multiple goroutines. From a18e3954fbcf81ef5ad3dea9fb2e62a6203b9a9c Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Wed, 19 Jun 2019 17:33:50 +0800 Subject: [PATCH 17/24] *: recover relay log file before starting to read binlog events from upstream --- relay/relay.go | 55 ++++++++++++++- relay/relay_test.go | 137 ++++++++++++++++++++++++++++++++++++++ relay/writer/file.go | 11 +-- relay/writer/file_test.go | 32 ++++++++- 4 files changed, 228 insertions(+), 7 deletions(-) diff --git a/relay/relay.go b/relay/relay.go index 9d3d81eb0a..7e496653cb 100644 --- a/relay/relay.go +++ b/relay/relay.go @@ -242,9 +242,13 @@ func (r *Relay) process(parentCtx context.Context) error { } } else { r.updateMetricsRelaySubDirIndex() + // if not a new server, try to recover the latest relay log file. + err = r.tryRecoverLatestFile(parser2) + if err != nil { + return errors.Trace(err) + } } - // TODO: do recover before reading from upstream. reader2, err := r.setUpReader() if err != nil { return errors.Trace(err) @@ -274,6 +278,55 @@ func (r *Relay) process(parentCtx context.Context) error { return errors.Trace(r.handleEvents(parentCtx, reader2, transformer2, writer2)) } +// tryRecoverLatestFile tries to recover latest relay log file with corrupt/incomplete binlog events/transactions. +func (r *Relay) tryRecoverLatestFile(parser2 *parser.Parser) error { + var ( + uuid, latestPos = r.meta.Pos() + _, latestGTID = r.meta.GTID() + ) + + if r.cfg.EnableGTID && latestPos.Compare(minCheckpoint) <= 0 { + log.Warnf("[relay] no corresponding position specified for GTID sets %s, skip recovering", latestGTID) + return nil + } + + // setup a special writer to do the recovering + cfg := &writer.FileConfig{ + RelayDir: r.meta.Dir(), + Filename: latestPos.Name, + } + writer2 := writer.NewFileWriter(cfg, parser2) + err := writer2.Start() + if err != nil { + return errors.Annotatef(err, "start recover writer for UUID %s with config %+v", uuid, cfg) + } + defer func() { + err2 := writer2.Close() + if err2 != nil { + log.Errorf("[relay] close recover writer for UUID %s with config %+v error %v", uuid, cfg, err2) + } + }() + log.Infof("[relay] started recover writer for UUID %s with config %+v", uuid, cfg) + + result, err := writer2.Recover() + if err == nil { + if result.Recovered { + log.Warnf("[relay] relay log file recovered from position %s to %s, GTID sets %v to %v", + latestPos, result.LatestPos, latestGTID, result.LatestGTIDs) + err = r.meta.Save(result.LatestPos, result.LatestGTIDs) + if err != nil { + return errors.Annotate(err, "save meta after recovered") + } + } else if result.LatestPos.Compare(latestPos) > 0 || + (result.LatestGTIDs != nil && result.LatestGTIDs.Contain(latestGTID)) { + log.Warnf("[relay] relay log file have more events after position %s (until %s), GTID sets %v (until %v)", + latestPos, result.LatestPos, latestGTID, result.LatestGTIDs) + } + + } + return errors.Annotatef(err, "recover for UUID %s with config %+v", uuid, cfg) +} + // handleEvents handles binlog events, including: // 1. read events from upstream // 2. transform events diff --git a/relay/relay_test.go b/relay/relay_test.go index 5d09924f9e..4044066fbd 100644 --- a/relay/relay_test.go +++ b/relay/relay_test.go @@ -14,6 +14,7 @@ package relay import ( + "bytes" "context" "database/sql" "fmt" @@ -34,6 +35,7 @@ import ( "github.com/pingcap/dm/dm/config" "github.com/pingcap/dm/pkg/binlog" "github.com/pingcap/dm/pkg/binlog/event" + "github.com/pingcap/dm/pkg/gtid" "github.com/pingcap/dm/pkg/utils" "github.com/pingcap/dm/relay/reader" "github.com/pingcap/dm/relay/transformer" @@ -129,6 +131,141 @@ func (w *mockWriter) Flush() error { return nil } +func (t *testRelaySuite) TestTryRecoverLatestFile(c *C) { + var ( + uuid = "24ecd093-8cec-11e9-aa0d-0242ac170002" + uuidWithSuffix = fmt.Sprintf("%s.000001", uuid) + previousGTIDSetStr = "3ccc475b-2343-11e7-be21-6c0b84d59f30:1-14,53bfca22-690d-11e7-8a62-18ded7a37b78:1-495,406a3f61-690d-11e7-87c5-6c92bf46f384:123-456" + latestGTIDStr1 = "3ccc475b-2343-11e7-be21-6c0b84d59f30:14" + latestGTIDStr2 = "53bfca22-690d-11e7-8a62-18ded7a37b78:495" + filename = "mysql-bin.000001" + startPos = gmysql.Position{Name: filename, Pos: 123} + + parser2 = parser.New() + relayCfg = &Config{ + RelayDir: c.MkDir(), + Flavor: gmysql.MySQLFlavor, + } + r = NewRelay(relayCfg).(*Relay) + ) + c.Assert(r.meta.Load(), IsNil) + + // no file specified, no need to recover + c.Assert(r.tryRecoverLatestFile(parser2), IsNil) + + // save position into meta + c.Assert(r.meta.AddDir(uuid, &startPos, nil), IsNil) + + // relay log file does not exists, no need to recover + c.Assert(r.tryRecoverLatestFile(parser2), IsNil) + + // use a generator to generate some binlog events + previousGTIDSet, err := gtid.ParserGTID(relayCfg.Flavor, previousGTIDSetStr) + c.Assert(err, IsNil) + latestGTID1, err := gtid.ParserGTID(relayCfg.Flavor, latestGTIDStr1) + c.Assert(err, IsNil) + latestGTID2, err := gtid.ParserGTID(relayCfg.Flavor, latestGTIDStr2) + c.Assert(err, IsNil) + g, _, data := genBinlogEventsWithGTIDs(c, relayCfg.Flavor, previousGTIDSet, latestGTID1, latestGTID2) + + // write events into relay log file + err = ioutil.WriteFile(filepath.Join(r.meta.Dir(), filename), data, 0600) + c.Assert(err, IsNil) + + // all events/transactions are complete, no need to recover + c.Assert(r.tryRecoverLatestFile(parser2), IsNil) + // now, we do not update position/GTID set in meta if not recovered + t.verifyMetadata(c, r, uuidWithSuffix, startPos, "", []string{uuidWithSuffix}) + + // write some invalid data into the relay log file + f, err := os.OpenFile(filepath.Join(r.meta.Dir(), filename), os.O_WRONLY|os.O_APPEND, 0600) + c.Assert(err, IsNil) + defer f.Close() + _, err = f.Write([]byte("invalid event data")) + c.Assert(err, IsNil) + + // invalid data truncated, meta updated + c.Assert(r.tryRecoverLatestFile(parser2), IsNil) + _, latestPos := r.meta.Pos() + c.Assert(latestPos, DeepEquals, gmysql.Position{Name: filename, Pos: g.LatestPos}) + _, latestGTIDs := r.meta.GTID() + c.Assert(latestGTIDs.Contain(g.LatestGTID), IsTrue) // verifyMetadata is not enough + + // in GTID mode and without filename specified, we can not do real recovering now. + c.Assert(r.meta.Save(minCheckpoint, latestGTIDs), IsNil) + r.cfg.EnableGTID = true + c.Assert(r.tryRecoverLatestFile(parser2), IsNil) + _, latestPos = r.meta.Pos() + c.Assert(latestPos, DeepEquals, minCheckpoint) + _, latestGTIDs = r.meta.GTID() + c.Assert(latestGTIDs.Contain(g.LatestGTID), IsTrue) +} + +// genBinlogEventsWithGTIDs generates some binlog events used by testFileUtilSuite and testFileWriterSuite. +// now, its generated events including 3 DDL and 10 DML. +func genBinlogEventsWithGTIDs(c *C, flavor string, previousGTIDSet, latestGTID1, latestGTID2 gtid.Set) (*event.Generator, []*replication.BinlogEvent, []byte) { + var ( + serverID uint32 = 11 + latestPos uint32 + latestXID uint64 = 10 + + allEvents = make([]*replication.BinlogEvent, 0, 50) + allData bytes.Buffer + ) + + // use a binlog event generator to generate some binlog events. + g, err := event.NewGenerator(flavor, serverID, latestPos, latestGTID1, previousGTIDSet, latestXID) + c.Assert(err, IsNil) + + // file header with FormatDescriptionEvent and PreviousGTIDsEvent + events, data, err := g.GenFileHeader() + c.Assert(err, IsNil) + allEvents = append(allEvents, events...) + allData.Write(data) + + // CREATE DATABASE/TABLE, 3 DDL + queries := []string{ + "CREATE DATABASE `db`", + "CREATE TABLE `db`.`tbl1` (c1 INT)", + "CREATE TABLE `db`.`tbl2` (c1 INT)", + } + for _, query := range queries { + events, data, err = g.GenDDLEvents("db", query) + c.Assert(err, IsNil) + allEvents = append(allEvents, events...) + allData.Write(data) + } + + // DMLs, 10 DML + g.LatestGTID = latestGTID2 // use another latest GTID with different SID/DomainID + var ( + tableID uint64 = 8 + columnType = []byte{gmysql.MYSQL_TYPE_LONG} + eventType = replication.WRITE_ROWS_EVENTv2 + schema = "db" + table = "tbl1" + ) + for i := 0; i < 10; i++ { + insertRows := make([][]interface{}, 0, 1) + insertRows = append(insertRows, []interface{}{int32(i)}) + dmlData := []*event.DMLData{ + { + TableID: tableID, + Schema: schema, + Table: table, + ColumnType: columnType, + Rows: insertRows, + }, + } + events, data, err = g.GenDMLEvents(eventType, dmlData) + c.Assert(err, IsNil) + allEvents = append(allEvents, events...) + allData.Write(data) + } + + return g, allEvents, allData.Bytes() +} + func (t *testRelaySuite) TestHandleEvent(c *C) { // NOTE: we can test metrics later. var ( diff --git a/relay/writer/file.go b/relay/writer/file.go index 1308fd1d92..2d648267f3 100644 --- a/relay/writer/file.go +++ b/relay/writer/file.go @@ -378,6 +378,13 @@ func (w *FileWriter) handleDuplicateEventsExist(ev *replication.BinlogEvent) (Re // NOTE: handle cases when file size > 4GB func (w *FileWriter) doRecovering() (RecoverResult, error) { filename := filepath.Join(w.cfg.RelayDir, w.filename.Get()) + fs, err := os.Stat(filename) + if (err != nil && os.IsNotExist(err)) || (err == nil && len(w.filename.Get()) == 0) { + return RecoverResult{}, nil // no file need to recover + } else if err != nil { + return RecoverResult{}, errors.Annotatef(err, "get stat for %s", filename) + } + // get latest pos/GTID set for all completed transactions from the file latestPos, latestGTIDs, err := getTxnPosGTIDs(filename, w.parser) if err != nil { @@ -385,10 +392,6 @@ func (w *FileWriter) doRecovering() (RecoverResult, error) { } // in most cases, we think the file is fine, so compare the size is simpler. - fs, err := os.Stat(filename) - if err != nil { - return RecoverResult{}, errors.Annotatef(err, "get stat for %s", filename) - } if fs.Size() == latestPos { return RecoverResult{ Recovered: false, // no recovering for the file diff --git a/relay/writer/file_test.go b/relay/writer/file_test.go index 859f75fb9f..2930fd58e7 100644 --- a/relay/writer/file_test.go +++ b/relay/writer/file_test.go @@ -84,8 +84,9 @@ func (t *testFileWriterSuite) TestInterfaceMethods(c *check.C) { c.Assert(err, check.ErrorMatches, ".*no underlying writer opened.*") // recover - _, err = w.Recover() - c.Assert(err, check.ErrorMatches, ".*no such file or directory.*") + rres, err := w.Recover() + c.Assert(err, check.IsNil) + c.Assert(rres.Recovered, check.IsFalse) // write event res, err := w.WriteEvent(ev) @@ -690,3 +691,30 @@ func (t *testFileWriterSuite) TestRecoverMySQL(c *check.C) { c.Assert(err, check.IsNil) c.Assert(fileData, check.DeepEquals, allData.Bytes()) } + +func (t *testFileWriterSuite) TestRecoverMySQLNone(c *check.C) { + var ( + cfg = &FileConfig{ + RelayDir: c.MkDir(), + } + ) + + w1 := NewFileWriter(cfg, t.parser) + defer w1.Close() + c.Assert(w1.Start(), check.IsNil) + + // no file specified to recover + result, err := w1.Recover() + c.Assert(err, check.IsNil) + c.Assert(result.Recovered, check.IsFalse) + + cfg.Filename = "mysql-bin.000001" + w2 := NewFileWriter(cfg, t.parser) + defer w2.Close() + c.Assert(w2.Start(), check.IsNil) + + // file not exist, no need to recover + result, err = w2.Recover() + c.Assert(err, check.IsNil) + c.Assert(result.Recovered, check.IsFalse) +} From 6c3fab5c440efc069bd43a66c762a980909b9c2a Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Fri, 21 Jun 2019 12:03:59 +0800 Subject: [PATCH 18/24] *: address comments, add a `WaitSomething` function --- dm/worker/relay_test.go | 4 +++- dm/worker/server_test.go | 20 +++++--------------- pkg/binlog/reader/tcp_test.go | 12 ++++++------ pkg/utils/util.go | 14 ++++++++++++++ pkg/utils/util_test.go | 31 +++++++++++++++++++++++++++++++ 5 files changed, 59 insertions(+), 22 deletions(-) diff --git a/dm/worker/relay_test.go b/dm/worker/relay_test.go index fbfdc323e9..2abc207112 100644 --- a/dm/worker/relay_test.go +++ b/dm/worker/relay_test.go @@ -15,6 +15,7 @@ package worker import ( "context" + "time" . "github.com/pingcap/check" "github.com/pingcap/errors" @@ -22,6 +23,7 @@ import ( "github.com/pingcap/dm/dm/config" "github.com/pingcap/dm/dm/pb" pkgstreamer "github.com/pingcap/dm/pkg/streamer" + "github.com/pingcap/dm/pkg/utils" "github.com/pingcap/dm/relay" "github.com/pingcap/dm/relay/purger" ) @@ -306,7 +308,7 @@ func (t *testRelay) testStop(c *C, holder *realRelayHolder) { } func waitRelayStage(holder *realRelayHolder, expect pb.Stage, backoff int) bool { - return waitSomething(backoff, func() bool { + return utils.WaitSomething(backoff, 10*time.Millisecond, func() bool { return holder.Stage() == expect }) } diff --git a/dm/worker/server_test.go b/dm/worker/server_test.go index d3abb6621b..5dead67dce 100644 --- a/dm/worker/server_test.go +++ b/dm/worker/server_test.go @@ -21,8 +21,10 @@ import ( "time" . "github.com/pingcap/check" - "github.com/pingcap/dm/dm/pb" "google.golang.org/grpc" + + "github.com/pingcap/dm/dm/pb" + "github.com/pingcap/dm/pkg/utils" ) func TestServer(t *testing.T) { @@ -53,7 +55,7 @@ func (t *testServer) TestServer(c *C) { c.Assert(err1, IsNil) }() - c.Assert(waitSomething(30, func() bool { + c.Assert(utils.WaitSomething(30, 10*time.Millisecond, func() bool { return !s.closed.Get() }), IsTrue) @@ -124,7 +126,7 @@ func (t *testServer) TestServer(c *C) { // close s.Close() - c.Assert(waitSomething(10, func() bool { + c.Assert(utils.WaitSomething(30, 10*time.Millisecond, func() bool { return s.closed.Get() }), IsTrue) @@ -146,15 +148,3 @@ func (t *testServer) createClient(c *C) pb.WorkerClient { c.Assert(err, IsNil) return pb.NewWorkerClient(conn) } - -func waitSomething(backoff int, fn func() bool) bool { - for i := 0; i < backoff; i++ { - if fn() { - return true - } - - time.Sleep(10 * time.Millisecond) - } - - return false -} diff --git a/pkg/binlog/reader/tcp_test.go b/pkg/binlog/reader/tcp_test.go index aab1d9e624..0a1d47b281 100644 --- a/pkg/binlog/reader/tcp_test.go +++ b/pkg/binlog/reader/tcp_test.go @@ -90,23 +90,23 @@ func (t *testTCPReaderSuite) setUpData(c *C) { query := fmt.Sprintf("DROP DATABASE `%s`", dbName) _, err := t.db.Exec(query) - maxRetryCount := 5 - for i := 0; i < maxRetryCount; i++ { + backoff := 5 + waitTime := 5 * time.Second + waitFn := func() bool { // delete previous binlog files/events. if other test cases writing events, they may be failed. query = "RESET MASTER" _, err = t.db.Exec(query) c.Assert(err, IsNil) - // check whether other test cases have wrote any events. time.Sleep(time.Second) _, gs, err2 := utils.GetMasterStatus(t.db, flavor) c.Assert(err2, IsNil) if len(gs.String()) > 0 { - time.Sleep(5 * time.Second) // some events exist now, try again later. - } else { - break + return false // some events exist now, try again later. } + return true } + utils.WaitSomething(backoff, waitTime, waitFn) // execute some SQL statements to generate binlog events. query = fmt.Sprintf("CREATE DATABASE `%s`", dbName) diff --git a/pkg/utils/util.go b/pkg/utils/util.go index 60b2c8531a..c3a90fc09f 100644 --- a/pkg/utils/util.go +++ b/pkg/utils/util.go @@ -18,6 +18,7 @@ import ( "os" "strconv" "strings" + "time" "github.com/pingcap/errors" "github.com/siddontang/go-mysql/mysql" @@ -76,3 +77,16 @@ func CompareBinlogPos(a, b mysql.Position, deviation float64) int { return 1 } + +// WaitSomething waits for something done with `true`. +func WaitSomething(backoff int, waitTime time.Duration, fn func() bool) bool { + for i := 0; i < backoff; i++ { + if fn() { + return true + } + + time.Sleep(waitTime) + } + + return false +} diff --git a/pkg/utils/util_test.go b/pkg/utils/util_test.go index 0665856b43..ae73756310 100644 --- a/pkg/utils/util_test.go +++ b/pkg/utils/util_test.go @@ -14,6 +14,8 @@ package utils import ( + "time" + . "github.com/pingcap/check" "github.com/siddontang/go-mysql/mysql" ) @@ -142,3 +144,32 @@ func (t *testUtilsSuite) TestCompareBinlogPos(c *C) { } } + +func (t *testUtilsSuite) TestWaitSomething(c *C) { + var ( + backoff = 10 + waitTime = 10 * time.Millisecond + count = 0 + ) + + // wait fail + f1 := func() bool { + count++ + return false + } + c.Assert(WaitSomething(backoff, waitTime, f1), IsFalse) + c.Assert(count, Equals, backoff) + + count = 0 // reset + // wait success + f2 := func() bool { + count++ + if count >= 5 { + return true + } + return false + } + + c.Assert(WaitSomething(backoff, waitTime, f2), IsTrue) + c.Assert(count, Equals, 5) +} From 201f96028d56a0aaa1c17373381a391bdbe8e75a Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Fri, 21 Jun 2019 15:03:35 +0800 Subject: [PATCH 19/24] relay: address comments --- relay/relay.go | 10 +++++----- relay/relay_test.go | 3 +-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/relay/relay.go b/relay/relay.go index 7e496653cb..827708e61f 100644 --- a/relay/relay.go +++ b/relay/relay.go @@ -285,8 +285,8 @@ func (r *Relay) tryRecoverLatestFile(parser2 *parser.Parser) error { _, latestGTID = r.meta.GTID() ) - if r.cfg.EnableGTID && latestPos.Compare(minCheckpoint) <= 0 { - log.Warnf("[relay] no corresponding position specified for GTID sets %s, skip recovering", latestGTID) + if latestPos.Compare(minCheckpoint) <= 0 { + log.Warnf("[relay] no relay log file need to recover, position %s, GTID sets %v", latestPos, latestGTID) return nil } @@ -315,7 +315,7 @@ func (r *Relay) tryRecoverLatestFile(parser2 *parser.Parser) error { latestPos, result.LatestPos, latestGTID, result.LatestGTIDs) err = r.meta.Save(result.LatestPos, result.LatestGTIDs) if err != nil { - return errors.Annotate(err, "save meta after recovered") + return errors.Annotatef(err, "save position %s, GTID sets %v after recovered", result.LatestPos, result.LatestGTIDs) } } else if result.LatestPos.Compare(latestPos) > 0 || (result.LatestGTIDs != nil && result.LatestGTIDs.Contain(latestGTID)) { @@ -402,7 +402,7 @@ func (r *Relay) handleEvents(ctx context.Context, reader2 reader.Reader, transfo lastPos.Pos = tResult.LogPos err = lastGTID.Set(tResult.GTIDSet) if err != nil { - log.Errorf("[relay] update last GTID set to %v error %v", tResult.GTIDSet, err) + return errors.Annotatef(err, "update last GTID set to %v", tResult.GTIDSet) } if !r.cfg.EnableGTID { // if go-mysql set RawModeEnabled to true @@ -427,7 +427,7 @@ func (r *Relay) handleEvents(ctx context.Context, reader2 reader.Reader, transfo if needSavePos { err = r.meta.Save(lastPos, lastGTID) if err != nil { - return errors.Trace(err) + return errors.Annotatef(err, "save position %s, GTID sets %v into meta", lastPos, lastGTID) } } } diff --git a/relay/relay_test.go b/relay/relay_test.go index 4044066fbd..e4dcbeb5a0 100644 --- a/relay/relay_test.go +++ b/relay/relay_test.go @@ -191,9 +191,8 @@ func (t *testRelaySuite) TestTryRecoverLatestFile(c *C) { _, latestGTIDs := r.meta.GTID() c.Assert(latestGTIDs.Contain(g.LatestGTID), IsTrue) // verifyMetadata is not enough - // in GTID mode and without filename specified, we can not do real recovering now. + // no relay log file need to recover c.Assert(r.meta.Save(minCheckpoint, latestGTIDs), IsNil) - r.cfg.EnableGTID = true c.Assert(r.tryRecoverLatestFile(parser2), IsNil) _, latestPos = r.meta.Pos() c.Assert(latestPos, DeepEquals, minCheckpoint) From 2e8ed95bdf2068df4cb2088fa68e6dd9b243737a Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Fri, 21 Jun 2019 16:08:51 +0800 Subject: [PATCH 20/24] relay: check GTID sets `Equal` before `Contain` --- relay/relay.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay/relay.go b/relay/relay.go index 827708e61f..87f027c9cd 100644 --- a/relay/relay.go +++ b/relay/relay.go @@ -318,7 +318,7 @@ func (r *Relay) tryRecoverLatestFile(parser2 *parser.Parser) error { return errors.Annotatef(err, "save position %s, GTID sets %v after recovered", result.LatestPos, result.LatestGTIDs) } } else if result.LatestPos.Compare(latestPos) > 0 || - (result.LatestGTIDs != nil && result.LatestGTIDs.Contain(latestGTID)) { + (result.LatestGTIDs != nil && !result.LatestGTIDs.Equal(latestGTID) && result.LatestGTIDs.Contain(latestGTID)) { log.Warnf("[relay] relay log file have more events after position %s (until %s), GTID sets %v (until %v)", latestPos, result.LatestPos, latestGTID, result.LatestGTIDs) } From 071bc72611e21e93629c04f28fb439ccd9cb4f38 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Fri, 21 Jun 2019 18:18:47 +0800 Subject: [PATCH 21/24] *: address comments --- relay/reader/error.go | 10 ---------- relay/reader/error_test.go | 13 ------------- relay/reader/reader.go | 24 +++++++++++++---------- relay/reader/reader_test.go | 21 ++++++++------------ relay/relay.go | 17 +++------------- relay/relay_test.go | 39 ++++++++----------------------------- 6 files changed, 33 insertions(+), 91 deletions(-) diff --git a/relay/reader/error.go b/relay/reader/error.go index 40d38c715a..db00892d18 100644 --- a/relay/reader/error.go +++ b/relay/reader/error.go @@ -19,16 +19,6 @@ import ( "github.com/pingcap/errors" ) -// isIgnorableError checks whether the error is ignorable. -func isIgnorableError(err error) bool { - err = errors.Cause(err) - switch err { - case context.Canceled: - return true - } - return false -} - // isRetryableError checks whether the error is retryable. func isRetryableError(err error) bool { err = errors.Cause(err) diff --git a/relay/reader/error_test.go b/relay/reader/error_test.go index 095bc3a297..f7eef0e287 100644 --- a/relay/reader/error_test.go +++ b/relay/reader/error_test.go @@ -27,19 +27,6 @@ var ( type testErrorSuite struct { } -func (t *testErrorSuite) TestIgnorable(c *check.C) { - err := errors.New("custom error") - c.Assert(isIgnorableError(err), check.IsFalse) - - cases := []error{ - context.Canceled, - errors.Annotate(context.Canceled, "annotated"), - } - for _, cs := range cases { - c.Assert(isIgnorableError(cs), check.IsTrue) - } -} - func (t *testErrorSuite) TestRetryable(c *check.C) { err := errors.New("custom error") c.Assert(isRetryableError(err), check.IsFalse) diff --git a/relay/reader/reader.go b/relay/reader/reader.go index 34bf3cf2f7..a39fbb8151 100644 --- a/relay/reader/reader.go +++ b/relay/reader/reader.go @@ -16,6 +16,7 @@ package reader import ( "context" "sync" + "time" "github.com/pingcap/errors" "github.com/siddontang/go-mysql/mysql" @@ -27,11 +28,14 @@ import ( "github.com/pingcap/dm/pkg/log" ) +const ( + // event timeout when trying to read events from upstream master server. + eventTimeout = 1 * time.Hour +) + // Result represents a read operation result. type Result struct { - Event *replication.BinlogEvent - ErrIgnorable bool // the error is ignorable - ErrRetryable bool // the error is retryable + Event *replication.BinlogEvent } // Reader reads binlog events from a upstream master server. @@ -48,7 +52,7 @@ type Reader interface { Close() error // GetEvent gets the binlog event one by one, it will block if no event can be read. - // You can pass a context (like Cancel or Timeout) to break the block. + // You can pass a context (like Cancel) to break the block. GetEvent(ctx context.Context) (Result, error) } @@ -121,7 +125,6 @@ func (r *reader) Close() error { } // GetEvent implements Reader.GetEvent. -// If some ignorable error occurred, the returned event and error both are nil. // NOTE: can only close the reader after this returned. func (r *reader) GetEvent(ctx context.Context) (Result, error) { r.mu.RLock() @@ -133,14 +136,15 @@ func (r *reader) GetEvent(ctx context.Context) (Result, error) { } for { - ev, err := r.in.GetEvent(ctx) - // NOTE: add retryable error support if needed later + ctx2, cancel2 := context.WithTimeout(ctx, eventTimeout) + ev, err := r.in.GetEvent(ctx2) + cancel2() + if err == nil { result.Event = ev - } else if isIgnorableError(err) { - result.ErrIgnorable = true } else if isRetryableError(err) { - result.ErrRetryable = true + log.Infof("[relay] get retryable error %v when reading binlog event", err) + continue } return result, errors.Trace(err) } diff --git a/relay/reader/reader_test.go b/relay/reader/reader_test.go index 4ea5e246dc..9502786d0f 100644 --- a/relay/reader/reader_test.go +++ b/relay/reader/reader_test.go @@ -122,18 +122,13 @@ func (t *testReaderSuite) TestGetEventWithError(c *check.C) { errOther := errors.New("other error") in := []error{ - context.Canceled, // ignorable - context.DeadlineExceeded, // retryable + context.Canceled, + context.DeadlineExceeded, // retried without return errOther, } - expected := []Result{ - { - ErrIgnorable: true, - }, - { - ErrRetryable: true, - }, - {}, + expected := []error{ + context.Canceled, + errOther, } err := r.Start() @@ -149,11 +144,11 @@ func (t *testReaderSuite) TestGetEventWithError(c *check.C) { } }() - results := make([]Result, 0, len(expected)) + results := make([]error, 0, len(expected)) for { - result, err2 := r.GetEvent(ctx) + _, err2 := r.GetEvent(ctx) c.Assert(err2, check.NotNil) - results = append(results, result) + results = append(results, errors.Cause(err2)) if err2 == errOther { break // all received } diff --git a/relay/relay.go b/relay/relay.go index 87f027c9cd..1aa0056c20 100644 --- a/relay/relay.go +++ b/relay/relay.go @@ -42,20 +42,16 @@ import ( ) var ( - // ErrNoIncompleteEventFound represents no incomplete event found in relay log file - ErrNoIncompleteEventFound = errors.New("no incomplete event found in relay log file") // used to fill RelayLogInfo fakeTaskName = "relay" ) const ( - eventTimeout = 1 * time.Hour slaveReadTimeout = 1 * time.Minute // slave read binlog data timeout, ref: https://dev.mysql.com/doc/refman/8.0/en/replication-options-slave.html#sysvar_slave_net_timeout masterHeartbeatPeriod = 30 * time.Second // master server send heartbeat period: ref: `MASTER_HEARTBEAT_PERIOD` in https://dev.mysql.com/doc/refman/8.0/en/change-master-to.html flushMetaInterval = 30 * time.Second getMasterStatusInterval = 30 * time.Second trimUUIDsInterval = 1 * time.Hour - binlogHeaderSize = 4 showStatusConnectionTimeout = "1m" // dumpFlagSendAnnotateRowsEvent (BINLOG_SEND_ANNOTATE_ROWS_EVENT) request the MariaDB master to send Annotate_rows_log_event back. @@ -340,21 +336,14 @@ func (r *Relay) handleEvents(ctx context.Context, reader2 reader.Reader, transfo for { // 1. reader events from upstream server - ctx2, cancel2 := context.WithTimeout(ctx, eventTimeout) readTimer := time.Now() - rResult, err := reader2.GetEvent(ctx2) - cancel2() + rResult, err := reader2.GetEvent(ctx) binlogReadDurationHistogram.Observe(time.Since(readTimer).Seconds()) if err != nil { - if rResult.ErrIgnorable { - log.Infof("[relay] get ignorable error %v when reading binlog event", err) + switch errors.Cause(err) { + case context.Canceled: return nil - } else if rResult.ErrRetryable { - log.Infof("[relay] get retryable error %v when reading binlog event", err) - continue - } - switch err { case replication.ErrChecksumMismatch: relayLogDataCorruptionCounter.Inc() case replication.ErrSyncClosed, replication.ErrNeedSyncAgain: diff --git a/relay/relay_test.go b/relay/relay_test.go index e4dcbeb5a0..45e48225a2 100644 --- a/relay/relay_test.go +++ b/relay/relay_test.go @@ -97,7 +97,7 @@ func (r *mockReader) Close() error { func (r *mockReader) GetEvent(ctx context.Context) (reader.Result, error) { select { case <-ctx.Done(): - return reader.Result{ErrIgnorable: true}, ctx.Err() + return reader.Result{}, ctx.Err() default: } return r.result, r.err @@ -296,23 +296,14 @@ func (t *testRelaySuite) TestHandleEvent(c *C) { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond) defer cancel() - // reader return with ignorable error - reader2.result.ErrIgnorable = true - reader2.err = errors.New("reader error for testing") - // return with `nil` - err := r.handleEvents(ctx, reader2, transformer2, writer2) - c.Assert(err, IsNil) - - // reader return with non-ignorable error - reader2.result.ErrIgnorable = false - // return with the annotated reader error + // reader return with an error for _, reader2.err = range []error{ errors.New("reader error for testing"), replication.ErrChecksumMismatch, replication.ErrSyncClosed, replication.ErrNeedSyncAgain, } { - err = r.handleEvents(ctx, reader2, transformer2, writer2) + err := r.handleEvents(ctx, reader2, transformer2, writer2) c.Assert(errors.Cause(err), Equals, reader2.err) } @@ -323,13 +314,13 @@ func (t *testRelaySuite) TestHandleEvent(c *C) { // writer return error writer2.err = errors.New("writer error for testing") // return with the annotated writer error - err = r.handleEvents(ctx, reader2, transformer2, writer2) + err := r.handleEvents(ctx, reader2, transformer2, writer2) c.Assert(errors.Cause(err), Equals, writer2.err) // writer without error writer2.err = nil err = r.handleEvents(ctx, reader2, transformer2, writer2) // returned when ctx timeout - c.Assert(err, IsNil) + c.Assert(errors.Cause(err), Equals, ctx.Err()) // check written event c.Assert(writer2.latestEvent, Equals, reader2.result.Event) // check meta @@ -344,7 +335,7 @@ func (t *testRelaySuite) TestHandleEvent(c *C) { // write a QueryEvent with GTID sets reader2.result.Event = queryEv err = r.handleEvents(ctx2, reader2, transformer2, writer2) - c.Assert(err, IsNil) + c.Assert(errors.Cause(err), Equals, ctx.Err()) // check written event c.Assert(writer2.latestEvent, Equals, reader2.result.Event) // check meta @@ -354,21 +345,7 @@ func (t *testRelaySuite) TestHandleEvent(c *C) { c.Assert(pos.Pos, Equals, queryEv.Header.LogPos) c.Assert(gs.Origin(), DeepEquals, queryEv2.GSet) // got GTID sets - // reader return retryable error - reader2.result.ErrRetryable = true - reader2.err = errors.New("reader error for testing") - ctx3, cancel3 := context.WithTimeout(context.Background(), 10*time.Millisecond) - defer cancel3() - err = r.handleEvents(ctx3, reader2, transformer2, writer2) - c.Assert(err, IsNil) - select { - case <-ctx3.Done(): - default: - c.Fatalf("retryable error for reader not retried") - } - // transformer return ignorable for the event - reader2.result.ErrIgnorable = false reader2.err = nil reader2.result.Event = &replication.BinlogEvent{ Header: &replication.EventHeader{EventType: replication.HEARTBEAT_EVENT}, @@ -376,7 +353,7 @@ func (t *testRelaySuite) TestHandleEvent(c *C) { ctx4, cancel4 := context.WithTimeout(context.Background(), 10*time.Millisecond) defer cancel4() err = r.handleEvents(ctx4, reader2, transformer2, writer2) - c.Assert(err, IsNil) + c.Assert(errors.Cause(err), Equals, ctx.Err()) select { case <-ctx4.Done(): default: @@ -389,7 +366,7 @@ func (t *testRelaySuite) TestHandleEvent(c *C) { ctx5, cancel5 := context.WithTimeout(context.Background(), 10*time.Millisecond) defer cancel5() err = r.handleEvents(ctx5, reader2, transformer2, writer2) - c.Assert(err, IsNil) + c.Assert(errors.Cause(err), Equals, ctx.Err()) select { case <-ctx5.Done(): default: From 0527d1660587a7f35404197d29bfc2650a9d561d Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Sat, 22 Jun 2019 18:19:25 +0800 Subject: [PATCH 22/24] reader: address comment, change `eventTimeout` --- relay/reader/reader.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay/reader/reader.go b/relay/reader/reader.go index a39fbb8151..4106c3baf7 100644 --- a/relay/reader/reader.go +++ b/relay/reader/reader.go @@ -30,7 +30,7 @@ import ( const ( // event timeout when trying to read events from upstream master server. - eventTimeout = 1 * time.Hour + eventTimeout = 10 * time.Minute ) // Result represents a read operation result. From 184d1b8981dd9b3840af987d1183f06227bd75f1 Mon Sep 17 00:00:00 2001 From: Xuecheng Zhang Date: Wed, 26 Jun 2019 15:14:57 +0800 Subject: [PATCH 23/24] Update relay/relay.go Co-Authored-By: amyangfei --- relay/relay.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay/relay.go b/relay/relay.go index 1aa0056c20..58cf9551d9 100644 --- a/relay/relay.go +++ b/relay/relay.go @@ -335,7 +335,7 @@ func (r *Relay) handleEvents(ctx context.Context, reader2 reader.Reader, transfo ) for { - // 1. reader events from upstream server + // 1. read events from upstream server readTimer := time.Now() rResult, err := reader2.GetEvent(ctx) binlogReadDurationHistogram.Observe(time.Since(readTimer).Seconds()) From f9b1663d8a755052f0307248dd1f9a62ead84dc5 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Wed, 26 Jun 2019 17:10:42 +0800 Subject: [PATCH 24/24] *: address comments --- dm/worker/relay_test.go | 6 ++---- pkg/binlog/reader/tcp_test.go | 5 +---- pkg/utils/util_test.go | 5 +---- relay/relay.go | 15 +++++++++++++++ 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/dm/worker/relay_test.go b/dm/worker/relay_test.go index 2abc207112..6fc5903398 100644 --- a/dm/worker/relay_test.go +++ b/dm/worker/relay_test.go @@ -60,10 +60,8 @@ func (d *DummyRelay) InjectInitError(err error) { // Process implements Process interface func (d *DummyRelay) Process(ctx context.Context, pr chan pb.ProcessResult) { - select { - case <-ctx.Done(): - pr <- d.processResult - } + <-ctx.Done() + pr <- d.processResult } // InjectProcessResult injects process result diff --git a/pkg/binlog/reader/tcp_test.go b/pkg/binlog/reader/tcp_test.go index 0a1d47b281..a4cdd8057f 100644 --- a/pkg/binlog/reader/tcp_test.go +++ b/pkg/binlog/reader/tcp_test.go @@ -101,10 +101,7 @@ func (t *testTCPReaderSuite) setUpData(c *C) { time.Sleep(time.Second) _, gs, err2 := utils.GetMasterStatus(t.db, flavor) c.Assert(err2, IsNil) - if len(gs.String()) > 0 { - return false // some events exist now, try again later. - } - return true + return gs.String() == "" // break waiting if no other case wrote any events } utils.WaitSomething(backoff, waitTime, waitFn) diff --git a/pkg/utils/util_test.go b/pkg/utils/util_test.go index ae73756310..75d154dc05 100644 --- a/pkg/utils/util_test.go +++ b/pkg/utils/util_test.go @@ -164,10 +164,7 @@ func (t *testUtilsSuite) TestWaitSomething(c *C) { // wait success f2 := func() bool { count++ - if count >= 5 { - return true - } - return false + return count >= 5 } c.Assert(WaitSomething(backoff, waitTime, f2), IsTrue) diff --git a/relay/relay.go b/relay/relay.go index 58cf9551d9..cc1c81ef3e 100644 --- a/relay/relay.go +++ b/relay/relay.go @@ -351,6 +351,9 @@ func (r *Relay) handleEvents(ctx context.Context, reader2 reader.Reader, transfo default: if utils.IsErrBinlogPurged(err) { // TODO: try auto fix GTID, and can support auto switching between upstream server later. + cfg := r.cfg.From + log.Errorf("[relay] the requested binlog files have purged in the master server or the master server behind %s:%d have switched, currently DM do no support to handle this error %v", + cfg.Host, cfg.Port, err) } binlogReadErrorCounter.Inc() } @@ -382,9 +385,11 @@ func (r *Relay) handleEvents(ctx context.Context, reader2 reader.Reader, transfo return errors.Trace(err) } else if wResult.Ignore { log.Infof("[relay] ignore event %+v by writer", e.Header) + r.tryUpdateActiveRelayLog(e, lastPos.Name) // even the event ignored we still need to try this update. continue } relayLogWriteDurationHistogram.Observe(time.Since(writeTimer).Seconds()) + r.tryUpdateActiveRelayLog(e, lastPos.Name) // wrote a event, try update the current active relay log. // 4. update meta and metrics needSavePos := tResult.CanSaveGTID @@ -422,6 +427,16 @@ func (r *Relay) handleEvents(ctx context.Context, reader2 reader.Reader, transfo } } +// tryUpdateActiveRelayLog tries to update current active relay log file. +// we should to update after received/wrote a FormatDescriptionEvent because it means switched to a new relay log file. +// NOTE: we can refactor active (writer/read) relay log mechanism later. +func (r *Relay) tryUpdateActiveRelayLog(e *replication.BinlogEvent, filename string) { + if e.Header.EventType == replication.FORMAT_DESCRIPTION_EVENT { + r.setActiveRelayLog(filename) + log.Infof("[relay] the active relay log file change to %s", filename) + } +} + // reSetupMeta re-setup the metadata when switching to a new upstream master server. func (r *Relay) reSetupMeta() error { uuid, err := utils.GetServerUUID(r.db, r.cfg.Flavor)