diff --git a/client/client_test.go b/client/client_test.go index 070dd90800d..581cec41aa1 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -40,7 +40,7 @@ func TestClientManager(t *testing.T) { masterCtx, masterCancel := context.WithCancel(ctx) defer masterCancel() - err = masterServer.Start(masterCtx) + err = masterServer.Run(masterCtx) require.Nil(t, err) err = manager.AddMasterClient(ctx, []string{"127.0.0.1:1992"}) diff --git a/cmd/master/main.go b/cmd/master/main.go index 8b945a362b8..a00a538db7b 100644 --- a/cmd/master/main.go +++ b/cmd/master/main.go @@ -9,8 +9,8 @@ import ( "syscall" "github.com/hanfei1991/microcosm/master" + "github.com/pingcap/errors" "github.com/pingcap/tiflow/dm/pkg/log" - "github.com/pkg/errors" "go.uber.org/zap" ) @@ -44,16 +44,10 @@ func main() { ctx, cancel := context.WithCancel(context.Background()) server, err := master.NewServer(cfg, nil) if err != nil { - log.L().Error("fail to start dm-master", zap.Error(err)) - os.Exit(2) - } - err = server.Start(ctx) - if err != nil { - log.L().Error("fail to start dm-master", zap.Error(err)) + log.L().Error("fail to start dataflow master", zap.Error(err)) os.Exit(2) } - // 4. wait for stopping the process sc := make(chan os.Signal, 1) signal.Notify(sc, syscall.SIGHUP, @@ -61,9 +55,18 @@ func main() { syscall.SIGTERM, syscall.SIGQUIT) go func() { - sig := <-sc - log.L().Info("got signal to exit", zap.Stringer("signal", sig)) - cancel() + select { + case <-ctx.Done(): + case sig := <-sc: + log.L().Info("got signal to exit", zap.Stringer("signal", sig)) + cancel() + } }() - <-ctx.Done() + + err = server.Run(ctx) + if err != nil && errors.Cause(err) != context.Canceled { + log.L().Error("run dataflow master with error", zap.Error(err)) + os.Exit(2) + } + log.L().Info("server exits normally") } diff --git a/master/campaign.go b/master/campaign.go new file mode 100644 index 00000000000..be2d25ac9f5 --- /dev/null +++ b/master/campaign.go @@ -0,0 +1,58 @@ +package master + +import ( + "context" + "time" + + "github.com/hanfei1991/microcosm/pkg/errors" + "github.com/pingcap/tiflow/dm/pkg/log" + "go.etcd.io/etcd/mvcc" + "go.uber.org/zap" + "golang.org/x/time/rate" +) + +func (s *Server) campaignLeaderLoop(ctx context.Context) error { + rl := rate.NewLimiter(rate.Every(time.Millisecond*200), 1) + for { + err := rl.Wait(ctx) + if err != nil { + return err + } + err = s.reset(ctx) + if err != nil { + return err + } + err = s.campaign(ctx) + switch err { + case nil: + case context.Canceled: + return ctx.Err() + case mvcc.ErrCompacted: + continue + default: + log.L().Warn("campaign leader failed", zap.Error(err)) + return errors.Wrap(errors.ErrMasterCampaignLeader, err) + } + // TODO: if etcd leader is different with current server, resign current + // leader to keep them same + log.L().Info("campaign leader successfully", zap.String("server-id", s.name())) + cctx, cancel := context.WithCancel(ctx) + err = s.runLeaderService(cctx) + cancel() + return err + } +} + +func (s *Server) campaign(ctx context.Context) error { + err := s.election.Campaign(ctx, s.name()) + return errors.Wrap(errors.ErrMasterCampaignLeader, err) +} + +func (s *Server) resign() { + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + err := s.election.Resign(ctx) + if err != nil { + log.L().Warn("resign leader failed", zap.Error(err)) + } +} diff --git a/master/member.go b/master/member.go new file mode 100644 index 00000000000..f21b5b6b220 --- /dev/null +++ b/master/member.go @@ -0,0 +1,65 @@ +package master + +import ( + "context" + "encoding/json" + + "github.com/hanfei1991/microcosm/pkg/adapter" + "github.com/hanfei1991/microcosm/pkg/errors" + "github.com/hanfei1991/microcosm/pkg/etcdutils" + "github.com/pingcap/tiflow/dm/pkg/log" + "go.etcd.io/etcd/clientv3/concurrency" + "go.uber.org/zap" +) + +type Member struct { + IsServLeader bool + IsEtcdLeader bool + Name string + Addrs []string +} + +func (s *Server) updateServerMasterMembers(ctx context.Context) error { + leader, err := etcdutils.GetLeaderID(ctx, s.etcdClient, adapter.MasterCampaignKey.Path()) + if err != nil { + if err == concurrency.ErrElectionNoLeader { + log.L().Warn("etcd election no leader") + } else { + return err + } + } + if leader != "" { + resp, err := s.etcdClient.Get(ctx, adapter.MasterInfoKey.Encode(leader)) + if err != nil { + return errors.Wrap(errors.ErrEtcdAPIError, err) + } + if resp.Count > 0 { + cfg := &Config{} + err = json.Unmarshal(resp.Kvs[0].Value, cfg) + if err != nil { + return err + } + leader = cfg.Etcd.Name + } + } + resp, err := s.etcdClient.MemberList(ctx) + if err != nil { + return err + } + members := make([]*Member, 0, len(resp.Members)) + for _, m := range resp.Members { + isServLeader := m.Name == leader + if isServLeader { + s.leaderName.Store(m.Name) + } + members = append(members, &Member{ + Name: m.Name, + Addrs: m.ClientURLs, + IsEtcdLeader: false, /* TODO */ + IsServLeader: isServLeader, + }) + } + s.members = members + log.L().Info("update members", zap.Any("members", members)) + return nil +} diff --git a/master/server.go b/master/server.go index 4b0b25ed53c..2fa912a2d76 100644 --- a/master/server.go +++ b/master/server.go @@ -4,9 +4,11 @@ import ( "context" "fmt" "net" + "time" "github.com/hanfei1991/microcosm/model" "github.com/hanfei1991/microcosm/pb" + "github.com/hanfei1991/microcosm/pkg/adapter" "github.com/hanfei1991/microcosm/pkg/errors" "github.com/hanfei1991/microcosm/pkg/etcdutils" "github.com/hanfei1991/microcosm/test" @@ -14,6 +16,7 @@ import ( "github.com/pingcap/tiflow/dm/pkg/etcdutil" "github.com/pingcap/tiflow/dm/pkg/log" "go.etcd.io/etcd/clientv3" + "go.etcd.io/etcd/clientv3/concurrency" "go.etcd.io/etcd/embed" "go.uber.org/atomic" "go.uber.org/zap" @@ -25,7 +28,10 @@ type Server struct { etcd *embed.Etcd etcdClient *clientv3.Client - // election *election.Election + session *concurrency.Session + election *concurrency.Election + leaderName atomic.String + members []*Member // sched scheduler executorManager *ExecutorManager @@ -64,47 +70,35 @@ func NewServer(cfg *Config, ctx *test.Context) (*Server, error) { // Heartbeat implements pb interface. func (s *Server) Heartbeat(ctx context.Context, req *pb.HeartbeatRequest) (*pb.HeartbeatResponse, error) { - if !s.initialized.Load() { - return &pb.HeartbeatResponse{ - Err: &pb.Error{ - Code: pb.ErrorCode_MasterNotReady, - }, - }, nil + err := s.apiPreCheck() + if err != nil { + return &pb.HeartbeatResponse{Err: err}, nil } return s.executorManager.HandleHeartbeat(req) } // SubmitJob passes request onto "JobManager". func (s *Server) SubmitJob(ctx context.Context, req *pb.SubmitJobRequest) (*pb.SubmitJobResponse, error) { - if !s.initialized.Load() { - return &pb.SubmitJobResponse{ - Err: &pb.Error{ - Code: pb.ErrorCode_MasterNotReady, - }, - }, nil + err := s.apiPreCheck() + if err != nil { + return &pb.SubmitJobResponse{Err: err}, nil } return s.jobManager.SubmitJob(ctx, req), nil } func (s *Server) CancelJob(ctx context.Context, req *pb.CancelJobRequest) (*pb.CancelJobResponse, error) { - if !s.initialized.Load() { - return &pb.CancelJobResponse{ - Err: &pb.Error{ - Code: pb.ErrorCode_MasterNotReady, - }, - }, nil + err := s.apiPreCheck() + if err != nil { + return &pb.CancelJobResponse{Err: err}, nil } return s.jobManager.CancelJob(ctx, req), nil } // RegisterExecutor implements grpc interface, and passes request onto executor manager. func (s *Server) RegisterExecutor(ctx context.Context, req *pb.RegisterExecutorRequest) (*pb.RegisterExecutorResponse, error) { - if !s.initialized.Load() { - return &pb.RegisterExecutorResponse{ - Err: &pb.Error{ - Code: pb.ErrorCode_MasterNotReady, - }, - }, nil + ckErr := s.apiPreCheck() + if ckErr != nil { + return &pb.RegisterExecutorResponse{Err: ckErr}, nil } // register executor to scheduler // TODO: check leader, if not leader, return notLeader error. @@ -183,6 +177,7 @@ func (s *Server) startForTest(ctx context.Context) (err error) { if err != nil { return } + s.leaderName.Store(s.name()) s.initialized.Store(true) return } @@ -195,8 +190,8 @@ func (s *Server) Stop() { } } -// Start the master-server. -func (s *Server) Start(ctx context.Context) (err error) { +// Run the master-server. +func (s *Server) Run(ctx context.Context) (err error) { if test.GlobalTestFlag { return s.startForTest(ctx) } @@ -205,26 +200,10 @@ func (s *Server) Start(ctx context.Context) (err error) { if err != nil { return } - - // start leader election - // TODO: Consider election. And Notify workers when leader changes. - // s.election, err = election.NewElection(ctx, ) - - // rebuild states from existing meta if needed - err = s.resetExecutor(ctx) - if err != nil { - return err - } - - // start background managers - s.executorManager.Start(ctx) - err = s.jobManager.Start(ctx) - if err != nil { - return - } + go s.bgUpdateServerMembers(ctx) s.initialized.Store(true) - return + return s.campaignLeaderLoop(ctx) } func (s *Server) startGrpcSrv() (err error) { @@ -264,15 +243,98 @@ func (s *Server) startGrpcSrv() (err error) { if err != nil { return } - log.L().Logger.Info("start etcd successfully") // start grpc server - s.etcdClient, err = etcdutil.CreateClient([]string{withHost(s.cfg.MasterAddr)}, nil) return } +// name is a shortcut to etcd name +func (s *Server) name() string { + return s.cfg.Etcd.Name +} + +func (s *Server) reset(ctx context.Context) error { + sess, err := concurrency.NewSession( + s.etcdClient, concurrency.WithTTL(int(s.cfg.KeepAliveTTL.Seconds()))) + if err != nil { + return errors.Wrap(errors.ErrMasterNewServer, err) + } + _, err = s.etcdClient.Put(ctx, adapter.MasterInfoKey.Encode(s.name()), + s.cfg.String(), clientv3.WithLease(sess.Lease())) + if err != nil { + return errors.Wrap(errors.ErrEtcdAPIError, err) + } + + s.session = sess + s.election = concurrency.NewElection(sess, adapter.MasterCampaignKey.Path()) + return nil +} + +func (s *Server) runLeaderService(ctx context.Context) (err error) { + // rebuild states from existing meta if needed + err = s.resetExecutor(ctx) + if err != nil { + return + } + + // start background managers + s.executorManager.Start(ctx) + err = s.jobManager.Start(ctx) + if err != nil { + return + } + + s.leaderName.Store(s.name()) + defer func() { + s.leaderName.Store("") + s.resign() + }() + + select { + case <-ctx.Done(): + return ctx.Err() + case <-s.session.Done(): + return errors.ErrMasterSessionDone.GenWithStackByArgs() + } +} + +func (s *Server) apiPreCheck() *pb.Error { + if s.leaderName.Load() != s.name() { + return &pb.Error{ + Code: pb.ErrorCode_MasterNotLeader, + NotLeader: &pb.NotLeader{ + Request: s.cfg.AdvertiseAddr, + Leader: s.leaderName.Load(), + }, + } + } + if !s.initialized.Load() { + return &pb.Error{ + Code: pb.ErrorCode_MasterNotReady, + } + } + return nil +} + +func (s *Server) bgUpdateServerMembers(ctx context.Context) { + // TODO: refine background gourtine of server master, add exit notification + ticker := time.NewTicker(time.Second * 10) + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + err := s.updateServerMasterMembers(ctx) + if err != nil { + log.L().Warn("update server members failed", zap.Error(err)) + } + } + } +} + func withHost(addr string) string { host, port, err := net.SplitHostPort(addr) if err != nil { diff --git a/pb/error.pb.go b/pb/error.pb.go index a08a5d74e5b..3f17ead98bc 100644 --- a/pb/error.pb.go +++ b/pb/error.pb.go @@ -84,16 +84,71 @@ func (ErrorCode) EnumDescriptor() ([]byte, []int) { return fileDescriptor_0579b252106fcf4a, []int{0} } +type NotLeader struct { + // The requested server address + Request string `protobuf:"bytes,1,opt,name=request,proto3" json:"request,omitempty"` + // leader address + Leader string `protobuf:"bytes,2,opt,name=leader,proto3" json:"leader,omitempty"` +} + +func (m *NotLeader) Reset() { *m = NotLeader{} } +func (m *NotLeader) String() string { return proto.CompactTextString(m) } +func (*NotLeader) ProtoMessage() {} +func (*NotLeader) Descriptor() ([]byte, []int) { + return fileDescriptor_0579b252106fcf4a, []int{0} +} +func (m *NotLeader) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *NotLeader) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_NotLeader.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *NotLeader) XXX_Merge(src proto.Message) { + xxx_messageInfo_NotLeader.Merge(m, src) +} +func (m *NotLeader) XXX_Size() int { + return m.Size() +} +func (m *NotLeader) XXX_DiscardUnknown() { + xxx_messageInfo_NotLeader.DiscardUnknown(m) +} + +var xxx_messageInfo_NotLeader proto.InternalMessageInfo + +func (m *NotLeader) GetRequest() string { + if m != nil { + return m.Request + } + return "" +} + +func (m *NotLeader) GetLeader() string { + if m != nil { + return m.Leader + } + return "" +} + type Error struct { - Code ErrorCode `protobuf:"varint,1,opt,name=code,proto3,enum=pb.ErrorCode" json:"code,omitempty"` - Message string `protobuf:"bytes,2,opt,name=message,proto3" json:"message,omitempty"` + Code ErrorCode `protobuf:"varint,1,opt,name=code,proto3,enum=pb.ErrorCode" json:"code,omitempty"` + Message string `protobuf:"bytes,2,opt,name=message,proto3" json:"message,omitempty"` + NotLeader *NotLeader `protobuf:"bytes,3,opt,name=not_leader,json=notLeader,proto3" json:"not_leader,omitempty"` } func (m *Error) Reset() { *m = Error{} } func (m *Error) String() string { return proto.CompactTextString(m) } func (*Error) ProtoMessage() {} func (*Error) Descriptor() ([]byte, []int) { - return fileDescriptor_0579b252106fcf4a, []int{0} + return fileDescriptor_0579b252106fcf4a, []int{1} } func (m *Error) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -136,35 +191,83 @@ func (m *Error) GetMessage() string { return "" } +func (m *Error) GetNotLeader() *NotLeader { + if m != nil { + return m.NotLeader + } + return nil +} + func init() { proto.RegisterEnum("pb.ErrorCode", ErrorCode_name, ErrorCode_value) + proto.RegisterType((*NotLeader)(nil), "pb.NotLeader") proto.RegisterType((*Error)(nil), "pb.Error") } func init() { proto.RegisterFile("error.proto", fileDescriptor_0579b252106fcf4a) } var fileDescriptor_0579b252106fcf4a = []byte{ - // 307 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x3c, 0x90, 0xcd, 0x4e, 0xc2, 0x40, - 0x10, 0xc7, 0xbb, 0xc8, 0xe7, 0xaa, 0xb8, 0x0c, 0x7e, 0xf4, 0xd4, 0xa0, 0x27, 0xe2, 0x81, 0x83, - 0xbe, 0x01, 0x88, 0x46, 0x23, 0x3d, 0x14, 0x7c, 0x80, 0x6e, 0x77, 0x82, 0x8d, 0xb0, 0xd3, 0x6c, - 0xb7, 0x2a, 0x6f, 0xa1, 0x6f, 0xe5, 0x91, 0xa3, 0x47, 0x03, 0xef, 0xe0, 0xd9, 0xb4, 0x02, 0xc7, - 0xf9, 0xfd, 0x7f, 0x99, 0xfc, 0x67, 0xf8, 0x3e, 0x1a, 0x43, 0xa6, 0x97, 0x18, 0xb2, 0x04, 0xa5, - 0x44, 0x5e, 0xdc, 0xf0, 0xca, 0x30, 0x47, 0x70, 0xce, 0xcb, 0x11, 0x29, 0x74, 0x59, 0x87, 0x75, - 0x9b, 0x57, 0x87, 0xbd, 0x44, 0xf6, 0x8a, 0x60, 0x40, 0x0a, 0x83, 0x22, 0x02, 0x97, 0xd7, 0xe6, - 0x98, 0xa6, 0xe1, 0x14, 0xdd, 0x52, 0x87, 0x75, 0x1b, 0xc1, 0x76, 0xbc, 0xfc, 0x65, 0xbc, 0xb1, - 0xb3, 0xa1, 0xce, 0xcb, 0x3e, 0x69, 0x14, 0x0e, 0xb4, 0xf9, 0xd1, 0x28, 0x4c, 0x2d, 0x1a, 0x9f, - 0xec, 0x23, 0x86, 0x0a, 0x8d, 0x60, 0x39, 0x7c, 0xd2, 0x2f, 0x9a, 0xde, 0xf4, 0xf0, 0x1d, 0xa3, - 0xcc, 0x92, 0x11, 0x25, 0x38, 0xe1, 0x2d, 0x9f, 0xec, 0x50, 0x53, 0x36, 0x7d, 0x0e, 0x30, 0xa5, - 0xcc, 0x44, 0x28, 0xf6, 0xe0, 0x94, 0xc3, 0x38, 0x93, 0x0f, 0x24, 0xc7, 0x99, 0x9c, 0xc7, 0xf6, - 0x36, 0x8c, 0x67, 0xa8, 0x44, 0x39, 0xd7, 0x27, 0x34, 0x97, 0xa9, 0x25, 0x8d, 0xbb, 0x2d, 0x95, - 0x1c, 0xff, 0xeb, 0xfd, 0x2c, 0x9e, 0xa9, 0x8d, 0x5d, 0x85, 0x33, 0xde, 0x2e, 0xc0, 0x9d, 0x49, - 0xa2, 0x01, 0x69, 0xbd, 0x09, 0x6a, 0xe0, 0xf2, 0xe3, 0x7b, 0xfd, 0x1a, 0xce, 0x62, 0x35, 0x42, - 0x1b, 0x8e, 0x2d, 0x19, 0x9c, 0x2c, 0x12, 0x14, 0x75, 0x00, 0xde, 0xdc, 0x35, 0x0f, 0x30, 0x54, - 0x0b, 0xd1, 0x80, 0x16, 0x3f, 0xd8, 0x16, 0xcf, 0x6f, 0x15, 0x9f, 0x7e, 0xdf, 0xfd, 0x5a, 0x79, - 0x6c, 0xb9, 0xf2, 0xd8, 0xcf, 0xca, 0x63, 0x1f, 0x6b, 0xcf, 0x59, 0xae, 0x3d, 0xe7, 0x7b, 0xed, - 0x39, 0xb2, 0x5a, 0xfc, 0xf8, 0xfa, 0x2f, 0x00, 0x00, 0xff, 0xff, 0x31, 0x9e, 0xa1, 0xe4, 0x72, - 0x01, 0x00, 0x00, + // 356 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x3c, 0x91, 0xbd, 0x8e, 0xda, 0x40, + 0x14, 0x85, 0x3d, 0xfc, 0xfb, 0x92, 0x90, 0x61, 0x48, 0x88, 0x2b, 0x8b, 0x50, 0xa1, 0x28, 0xa2, + 0x48, 0xea, 0x34, 0x20, 0x12, 0x25, 0x0a, 0x2e, 0x0c, 0xa9, 0x23, 0xff, 0x5c, 0x11, 0x6b, 0xcd, + 0x5c, 0xef, 0x78, 0xbc, 0xbb, 0xbc, 0xc5, 0xee, 0x5b, 0x6d, 0x49, 0xb9, 0xe5, 0x0a, 0xde, 0x61, + 0xeb, 0x95, 0x8d, 0xed, 0x6e, 0xee, 0x39, 0xe7, 0x7e, 0x73, 0x46, 0x03, 0x7d, 0x54, 0x8a, 0xd4, + 0x3c, 0x51, 0xa4, 0x49, 0x34, 0x12, 0x7f, 0xfa, 0x1d, 0x4c, 0x87, 0xf4, 0x1f, 0xf4, 0x42, 0x54, + 0xc2, 0x82, 0xae, 0xc2, 0xeb, 0x0c, 0x53, 0x6d, 0xb1, 0x09, 0x9b, 0x99, 0x6e, 0x35, 0x8a, 0x31, + 0x74, 0xe2, 0x22, 0x63, 0x35, 0x0a, 0xa3, 0x9c, 0xa6, 0x0a, 0xda, 0xab, 0x9c, 0x28, 0x3e, 0x41, + 0x2b, 0xa0, 0x10, 0x8b, 0xbd, 0xc1, 0xd7, 0xb7, 0xf3, 0xc4, 0x9f, 0x17, 0xc6, 0x92, 0x42, 0x74, + 0x0b, 0x2b, 0xa7, 0xef, 0x31, 0x4d, 0xbd, 0x1d, 0x96, 0x90, 0x6a, 0x14, 0x5f, 0x00, 0x24, 0xe9, + 0x7f, 0xe5, 0x0d, 0xcd, 0x09, 0x9b, 0xf5, 0x2f, 0x88, 0xba, 0x9a, 0x6b, 0xca, 0xea, 0xf8, 0xf9, + 0x85, 0x81, 0x59, 0xb3, 0x45, 0x0f, 0x5a, 0x0e, 0x49, 0xe4, 0x86, 0x18, 0xc1, 0xbb, 0xb5, 0x97, + 0x6a, 0x54, 0xf5, 0x16, 0x67, 0xb9, 0xf8, 0x57, 0x5e, 0x49, 0xba, 0x95, 0xab, 0x3b, 0x0c, 0x32, + 0x4d, 0x8a, 0x37, 0xc4, 0x07, 0x18, 0x3a, 0xa4, 0x57, 0x92, 0xb2, 0xdd, 0x7f, 0x17, 0x53, 0xca, + 0x54, 0x80, 0xbc, 0x29, 0xc6, 0x20, 0x36, 0x99, 0xff, 0x9b, 0xfc, 0x4d, 0xe6, 0xef, 0x23, 0xfd, + 0xc3, 0x8b, 0x62, 0x0c, 0x79, 0x2b, 0x8f, 0x6f, 0x69, 0xef, 0xa7, 0x9a, 0x24, 0xd6, 0x94, 0x76, + 0x2e, 0x5f, 0xe2, 0x8b, 0x2c, 0x8a, 0xc3, 0x32, 0xdd, 0x11, 0x1f, 0x61, 0x54, 0x08, 0x3f, 0x55, + 0x12, 0x2c, 0x49, 0xca, 0xd2, 0xe8, 0x0a, 0x0b, 0xde, 0xff, 0x92, 0x37, 0x5e, 0x1c, 0x85, 0x6b, + 0xd4, 0xde, 0x46, 0x93, 0xc2, 0xed, 0x21, 0x41, 0xde, 0x13, 0x02, 0x06, 0x75, 0x73, 0x17, 0xbd, + 0xf0, 0xc0, 0x4d, 0x31, 0x84, 0x37, 0x55, 0xf1, 0xfc, 0xad, 0xfc, 0xc1, 0x59, 0x58, 0x8f, 0x27, + 0x9b, 0x1d, 0x4f, 0x36, 0x7b, 0x3e, 0xd9, 0xec, 0xfe, 0x6c, 0x1b, 0xc7, 0xb3, 0x6d, 0x3c, 0x9d, + 0x6d, 0xc3, 0xef, 0x14, 0x1f, 0xfa, 0xed, 0x35, 0x00, 0x00, 0xff, 0xff, 0xd1, 0x44, 0x30, 0x2f, + 0xdf, 0x01, 0x00, 0x00, +} + +func (m *NotLeader) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *NotLeader) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *NotLeader) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if len(m.Leader) > 0 { + i -= len(m.Leader) + copy(dAtA[i:], m.Leader) + i = encodeVarintError(dAtA, i, uint64(len(m.Leader))) + i-- + dAtA[i] = 0x12 + } + if len(m.Request) > 0 { + i -= len(m.Request) + copy(dAtA[i:], m.Request) + i = encodeVarintError(dAtA, i, uint64(len(m.Request))) + i-- + dAtA[i] = 0xa + } + return len(dAtA) - i, nil } func (m *Error) Marshal() (dAtA []byte, err error) { @@ -187,6 +290,18 @@ func (m *Error) MarshalToSizedBuffer(dAtA []byte) (int, error) { _ = i var l int _ = l + if m.NotLeader != nil { + { + size, err := m.NotLeader.MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintError(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0x1a + } if len(m.Message) > 0 { i -= len(m.Message) copy(dAtA[i:], m.Message) @@ -213,6 +328,23 @@ func encodeVarintError(dAtA []byte, offset int, v uint64) int { dAtA[offset] = uint8(v) return base } +func (m *NotLeader) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + l = len(m.Request) + if l > 0 { + n += 1 + l + sovError(uint64(l)) + } + l = len(m.Leader) + if l > 0 { + n += 1 + l + sovError(uint64(l)) + } + return n +} + func (m *Error) Size() (n int) { if m == nil { return 0 @@ -226,6 +358,10 @@ func (m *Error) Size() (n int) { if l > 0 { n += 1 + l + sovError(uint64(l)) } + if m.NotLeader != nil { + l = m.NotLeader.Size() + n += 1 + l + sovError(uint64(l)) + } return n } @@ -235,6 +371,120 @@ func sovError(x uint64) (n int) { func sozError(x uint64) (n int) { return sovError(uint64((x << 1) ^ uint64((int64(x) >> 63)))) } +func (m *NotLeader) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowError + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: NotLeader: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: NotLeader: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Request", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowError + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthError + } + postIndex := iNdEx + intStringLen + if postIndex < 0 { + return ErrInvalidLengthError + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Request = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Leader", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowError + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthError + } + postIndex := iNdEx + intStringLen + if postIndex < 0 { + return ErrInvalidLengthError + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Leader = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipError(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthError + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} func (m *Error) Unmarshal(dAtA []byte) error { l := len(dAtA) iNdEx := 0 @@ -315,6 +565,42 @@ func (m *Error) Unmarshal(dAtA []byte) error { } m.Message = string(dAtA[iNdEx:postIndex]) iNdEx = postIndex + case 3: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field NotLeader", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowError + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthError + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthError + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.NotLeader == nil { + m.NotLeader = &NotLeader{} + } + if err := m.NotLeader.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex default: iNdEx = preIndex skippy, err := skipError(dAtA[iNdEx:]) diff --git a/pkg/adapter/keyadapter.go b/pkg/adapter/keyadapter.go index 919c59b6d93..f2479a00618 100644 --- a/pkg/adapter/keyadapter.go +++ b/pkg/adapter/keyadapter.go @@ -9,6 +9,8 @@ import ( ) var ( + MasterCampaignKey KeyAdapter = keyHexEncoderDecoder("/data-flow/master/leader") + MasterInfoKey KeyAdapter = keyHexEncoderDecoder("/data-flow/master/info") ExecutorInfoKeyAdapter KeyAdapter = keyHexEncoderDecoder("/data-flow/executor/info") JobKeyAdapter KeyAdapter = keyHexEncoderDecoder("/data-flow/job") ) diff --git a/pkg/errors/errors.go b/pkg/errors/errors.go index dcc5e089a38..d159c4939dc 100644 --- a/pkg/errors/errors.go +++ b/pkg/errors/errors.go @@ -31,6 +31,7 @@ var ( ErrDecodeEtcdKeyFail = errors.Normalize("failed to decode etcd key: %s", errors.RFCCodeText("DFLOW:ErrDecodeEtcdKeyFail")) ErrInvalidMetaStoreKey = errors.Normalize("invalid metastore key %s", errors.RFCCodeText("DFLOW:ErrInvalidMetaStoreKey")) ErrInvalidMetaStoreKeyTp = errors.Normalize("invalid metastore key type %s", errors.RFCCodeText("DFLOW:ErrInvalidMetaStoreKeyTp")) + ErrEtcdAPIError = errors.Normalize("etcd api returns error", errors.RFCCodeText("DFLOW:ErrEtcdAPIError")) // master related errors ErrMasterConfigParseFlagSet = errors.Normalize("parse config flag set failed", errors.RFCCodeText("DFLOW:ErrMasterConfigParseFlagSet")) @@ -42,6 +43,9 @@ var ( ErrMasterStartEmbedEtcdFail = errors.Normalize("failed to start embed etcd", errors.RFCCodeText("DFLOW:ErrMasterStartEmbedEtcdFail")) ErrMasterParseURLFail = errors.Normalize("failed to parse URL %s", errors.RFCCodeText("DFLOW:ErrMasterParseURLFail")) ErrMasterScheduleMissTask = errors.Normalize("task %d is not found after scheduling", errors.RFCCodeText("DFLOW:ErrMasterScheduleMissTask")) + ErrMasterNewServer = errors.Normalize("master create new server failed", errors.RFCCodeText("DFLOW:ErrMasterNewServer")) + ErrMasterCampaignLeader = errors.Normalize("master campaign to be leader failed", errors.RFCCodeText("DFLOW:ErrMasterCampaignLeader")) + ErrMasterSessionDone = errors.Normalize("master session is done", errors.RFCCodeText("DFLOW:ErrMasterSessionDone")) // executor related errors ErrExecutorConfigParseFlagSet = errors.Normalize("parse config flag set failed", errors.RFCCodeText("DFLOW:ErrExecutorConfigParseFlagSet")) diff --git a/pkg/etcdutils/service.go b/pkg/etcdutils/service.go index 347f6e78c06..5e528fdb6ce 100644 --- a/pkg/etcdutils/service.go +++ b/pkg/etcdutils/service.go @@ -1,10 +1,13 @@ package etcdutils import ( + "context" "net/http" "time" "github.com/hanfei1991/microcosm/pkg/errors" + "go.etcd.io/etcd/clientv3" + "go.etcd.io/etcd/clientv3/concurrency" "go.etcd.io/etcd/embed" "google.golang.org/grpc" ) @@ -44,3 +47,14 @@ func StartEtcd(etcdCfg *embed.Config, } return e, nil } + +func GetLeaderID(ctx context.Context, cli *clientv3.Client, campKey string) (string, error) { + resp, err := cli.Get(ctx, campKey, clientv3.WithFirstCreate()...) + if err != nil { + return "", errors.Wrap(errors.ErrEtcdAPIError, err) + } + if len(resp.Kvs) == 0 { + return "", concurrency.ErrElectionNoLeader + } + return string(resp.Kvs[0].Value), nil +} diff --git a/proto/error.proto b/proto/error.proto index b12c0b56f7d..24b5e39b48d 100644 --- a/proto/error.proto +++ b/proto/error.proto @@ -25,7 +25,16 @@ enum ErrorCode { UnknownError = 10001; } +message NotLeader { + // The requested server address + string request = 1; + // leader address + string leader = 2; +} + message Error { ErrorCode code = 1; string message = 2; + + NotLeader not_leader = 3; } diff --git a/test/util_test.go b/test/util_test.go index 82c1653aade..fa3775c3db5 100644 --- a/test/util_test.go +++ b/test/util_test.go @@ -27,7 +27,7 @@ func (c *MiniCluster) CreateMaster(cfg *master.Config) (*test.Context, error) { func (c *MiniCluster) AsyncStartMaster() error { ctx := context.Background() masterCtx, masterCancel := context.WithCancel(ctx) - err := c.master.Start(masterCtx) + err := c.master.Run(masterCtx) c.masterCancel = masterCancel return err }