Skip to content

Commit

Permalink
dm/worker: don't exit when failed to read checkpoint in relay (pingca…
Browse files Browse the repository at this point in the history
  • Loading branch information
ti-chi-bot authored and 3AceShowHand committed Jan 13, 2022
1 parent d358c28 commit 05802b3
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 2 deletions.
1 change: 1 addition & 0 deletions dm/dm/worker/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ func NewServer(cfg *Config) *Server {
}

// Start starts to serving.
// this function should only exit when can't dail DM-master, for other errors it should not exit.
func (s *Server) Start() error {
log.L().Info("starting dm-worker server")
RegistryMetrics()
Expand Down
4 changes: 2 additions & 2 deletions dm/dm/worker/source_worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -301,11 +301,11 @@ func (w *SourceWorker) EnableRelay() (err error) {
defer dcancel()
minLoc, err1 := getMinLocInAllSubTasks(dctx, subTaskCfgs)
if err1 != nil {
return err1
w.l.Error("meet error when EnableRelay", zap.Error(err1))
}

if minLoc != nil {
log.L().Info("get min location in all subtasks", zap.Stringer("location", *minLoc))
w.l.Info("get min location in all subtasks", zap.Stringer("location", *minLoc))
w.cfg.RelayBinLogName = binlog.AdjustPosition(minLoc.Position).Name
w.cfg.RelayBinlogGTID = minLoc.GTIDSetStr()
// set UUIDSuffix when bound to a source
Expand Down
41 changes: 41 additions & 0 deletions dm/tests/new_relay/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,46 @@ function test_cant_dail_upstream() {
cleanup_data $TEST_NAME
}

function test_cant_dail_downstream() {
cleanup_data $TEST_NAME
cleanup_process

run_dm_master $WORK_DIR/master $MASTER_PORT $cur/conf/dm-master.toml
check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT
run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml
check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT

cp $cur/conf/source1.yaml $WORK_DIR/source1.yaml
dmctl_operate_source create $WORK_DIR/source1.yaml $SOURCE_ID1

run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
"start-relay -s $SOURCE_ID1 worker1" \
"\"result\": true" 1
dmctl_start_task_standalone $cur/conf/dm-task.yaml "--remove-meta"

kill_dm_worker
# kill tidb
pkill -hup tidb-server 2>/dev/null || true
wait_process_exit tidb-server

run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml
check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT

# make sure DM-worker doesn't exit
sleep 2
run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
"query-status -s $SOURCE_ID1" \
"\"relayCatchUpMaster\": true" 1 \
"dial tcp 127.0.0.1:4000: connect: connection refused" 1

# restart tidb
run_tidb_server 4000 $TIDB_PASSWORD
sleep 2

cleanup_process
cleanup_data $TEST_NAME
}

function test_kill_dump_connection() {
cleanup_data $TEST_NAME
cleanup_process
Expand Down Expand Up @@ -83,6 +123,7 @@ function test_kill_dump_connection() {
}

function run() {
test_cant_dail_downstream
test_cant_dail_upstream

export GO_FAILPOINTS="github.com/pingcap/tiflow/dm/relay/ReportRelayLogSpaceInBackground=return(1)"
Expand Down

0 comments on commit 05802b3

Please sign in to comment.