diff --git a/dm/dm/worker/server.go b/dm/dm/worker/server.go index cac9331fdf4..b3f9b0ce51e 100644 --- a/dm/dm/worker/server.go +++ b/dm/dm/worker/server.go @@ -89,6 +89,7 @@ func NewServer(cfg *Config) *Server { } // Start starts to serving. +// this function should only exit when can't dail DM-master, for other errors it should not exit. func (s *Server) Start() error { log.L().Info("starting dm-worker server") RegistryMetrics() diff --git a/dm/dm/worker/source_worker.go b/dm/dm/worker/source_worker.go index 9ea3e9385cc..b9125c5c1b2 100644 --- a/dm/dm/worker/source_worker.go +++ b/dm/dm/worker/source_worker.go @@ -301,11 +301,11 @@ func (w *SourceWorker) EnableRelay() (err error) { defer dcancel() minLoc, err1 := getMinLocInAllSubTasks(dctx, subTaskCfgs) if err1 != nil { - return err1 + w.l.Error("meet error when EnableRelay", zap.Error(err1)) } if minLoc != nil { - log.L().Info("get min location in all subtasks", zap.Stringer("location", *minLoc)) + w.l.Info("get min location in all subtasks", zap.Stringer("location", *minLoc)) w.cfg.RelayBinLogName = binlog.AdjustPosition(minLoc.Position).Name w.cfg.RelayBinlogGTID = minLoc.GTIDSetStr() // set UUIDSuffix when bound to a source diff --git a/dm/tests/new_relay/run.sh b/dm/tests/new_relay/run.sh index 92e6b1053b8..1a30f297e7e 100755 --- a/dm/tests/new_relay/run.sh +++ b/dm/tests/new_relay/run.sh @@ -43,6 +43,46 @@ function test_cant_dail_upstream() { cleanup_data $TEST_NAME } +function test_cant_dail_downstream() { + cleanup_data $TEST_NAME + cleanup_process + + run_dm_master $WORK_DIR/master $MASTER_PORT $cur/conf/dm-master.toml + check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT + run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml + check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT + + cp $cur/conf/source1.yaml $WORK_DIR/source1.yaml + dmctl_operate_source create $WORK_DIR/source1.yaml $SOURCE_ID1 + + run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ + "start-relay -s $SOURCE_ID1 worker1" \ + "\"result\": true" 1 + dmctl_start_task_standalone $cur/conf/dm-task.yaml "--remove-meta" + + kill_dm_worker + # kill tidb + pkill -hup tidb-server 2>/dev/null || true + wait_process_exit tidb-server + + run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml + check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT + + # make sure DM-worker doesn't exit + sleep 2 + run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ + "query-status -s $SOURCE_ID1" \ + "\"relayCatchUpMaster\": true" 1 \ + "dial tcp 127.0.0.1:4000: connect: connection refused" 1 + + # restart tidb + run_tidb_server 4000 $TIDB_PASSWORD + sleep 2 + + cleanup_process + cleanup_data $TEST_NAME +} + function test_kill_dump_connection() { cleanup_data $TEST_NAME cleanup_process @@ -83,6 +123,7 @@ function test_kill_dump_connection() { } function run() { + test_cant_dail_downstream test_cant_dail_upstream export GO_FAILPOINTS="github.com/pingcap/tiflow/dm/relay/ReportRelayLogSpaceInBackground=return(1)"