From 52772d8e150bd372aa40dc176a671648e9ed4a6a Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Mon, 3 Aug 2020 19:54:02 +0800 Subject: [PATCH] *: refine member/tso/adapter logs (#2707) Signed-off-by: Ryan Leung --- pkg/dashboard/adapter/manager.go | 5 +- pkg/errors/errors.go | 115 ------------------ pkg/errs/errs.go | 51 ++++++++ .../errors_test.go => errs/errs_test.go} | 17 +-- server/member/leader.go | 17 +-- server/tso/tso.go | 7 +- 6 files changed, 77 insertions(+), 135 deletions(-) delete mode 100644 pkg/errors/errors.go create mode 100644 pkg/errs/errs.go rename pkg/{errors/errors_test.go => errs/errs_test.go} (82%) diff --git a/pkg/dashboard/adapter/manager.go b/pkg/dashboard/adapter/manager.go index f469a1ae6f5..79c798ecfbe 100644 --- a/pkg/dashboard/adapter/manager.go +++ b/pkg/dashboard/adapter/manager.go @@ -24,6 +24,7 @@ import ( "go.uber.org/zap" "github.com/pingcap/log" + "github.com/pingcap/pd/v4/pkg/errs" "github.com/pingcap/pd/v4/pkg/logutil" "github.com/pingcap/pd/v4/server" "github.com/pingcap/pd/v4/server/cluster" @@ -195,7 +196,7 @@ func (m *Manager) startService() { return } if err := m.service.Start(m.ctx); err != nil { - log.Error("Can not start dashboard server", zap.Error(err)) + log.Error("Can not start dashboard server", zap.Error(errs.ErrStartDashboard.FastGenByArgs()), zap.NamedError("cause", err)) } else { log.Info("Dashboard server is started") } @@ -206,7 +207,7 @@ func (m *Manager) stopService() { return } if err := m.service.Stop(context.Background()); err != nil { - log.Error("Stop dashboard server error", zap.Error(err)) + log.Error("Stop dashboard server error", zap.Error(errs.ErrStopDashboard.FastGenByArgs()), zap.NamedError("cause", err)) } else { log.Info("Dashboard server is stopped") } diff --git a/pkg/errors/errors.go b/pkg/errors/errors.go deleted file mode 100644 index 9e0c42235f9..00000000000 --- a/pkg/errors/errors.go +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2020 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// See the License for the specific language governing permissions and -// limitations under the License. - -package errors - -import "github.com/pingcap/errors" - -var ( - reg = errors.NewRegistry("PD") - // ClassIO is IO error class - ClassIO = reg.RegisterErrorClass(1, "io") - // ClassNetwork is network error class - ClassNetwork = reg.RegisterErrorClass(2, "network") - // ClassStorage is storage error class - ClassStorage = reg.RegisterErrorClass(3, "storage") - // ClassInternal is internal error class - ClassInternal = reg.RegisterErrorClass(4, "internal") - // ClassFormat is format error class - ClassFormat = reg.RegisterErrorClass(5, "format") - // ClassOther is other error class - ClassOther = reg.RegisterErrorClass(6, "other") -) - -var ( - // ErrIORead is io error - ErrIORead = ClassIO.DefineError().TextualCode("ErrIORead").MessageTemplate("io read error").Build() - // ErrHTTPErrorResponse is http error response error - ErrHTTPErrorResponse = ClassNetwork.DefineError().TextualCode("ErrHTTPErrorResponse").MessageTemplate("make http error response fail").Build() - // ErrHTTPRedirect is http redirect error - ErrHTTPRedirect = ClassNetwork.DefineError().TextualCode("ErrHTTPRedirect").MessageTemplate("http redirect many times").Build() - // ErrHTTPRequestURL is http request url error - ErrHTTPRequestURL = ClassNetwork.DefineError().TextualCode("ErrHTTPRequestURL").MessageTemplate("wrong url in user http request").Build() - // ErrGRPCTso is grpc tso error - ErrGRPCTso = ClassNetwork.DefineError().TextualCode("ErrGRPCTso").MessageTemplate("grpc tso request fail").Build() - // ErrGRPCHeartbeat is grpc heartbeat error - ErrGRPCHeartbeat = ClassNetwork.DefineError().TextualCode("ErrGRPCHeartbeat").MessageTemplate("grpc heartbeat request fail").Build() - // ErrGRPCClose is grpc close error - ErrGRPCClose = ClassNetwork.DefineError().TextualCode("ErrGRPCClose").MessageTemplate("grpc close connection fail").Build() - // ErrGRPCSend is grpc send error - ErrGRPCSend = ClassNetwork.DefineError().TextualCode("ErrGRPCSend").MessageTemplate("grpc send message fail").Build() - // ErrStorageLoad is storage load error - ErrStorageLoad = ClassStorage.DefineError().TextualCode("ErrStorageLoad").MessageTemplate("load config from storage error").Build() - // ErrStorageSave is storage save error - ErrStorageSave = ClassStorage.DefineError().TextualCode("ErrStorageSave").MessageTemplate("save config to storage error").Build() - // ErrStorageDelete is storage delete error - ErrStorageDelete = ClassStorage.DefineError().TextualCode("ErrStorageDelete").MessageTemplate("delete config from storage error").Build() - // ErrStorageEtcdLoad is etcd storage load error - ErrStorageEtcdLoad = ClassStorage.DefineError().TextualCode("ErrStorageEtcdLoad").MessageTemplate("load config from storage etcd error").Build() - // ErrStorageEtcdSave is etcd storage save error - ErrStorageEtcdSave = ClassStorage.DefineError().TextualCode("ErrStorageEtcdSave").MessageTemplate("save config to storage etcd error").Build() - // ErrStorageEtcdDelete is etcd storage delete error - ErrStorageEtcdDelete = ClassStorage.DefineError().TextualCode("ErrStorageEtcdDelete").MessageTemplate("delete config from storage etcd error").Build() - // ErrInternalSchedulerDuplicate is scheduler duplicate - ErrInternalSchedulerDuplicate = ClassInternal.DefineError().TextualCode("ErrInternalSchedulerDuplicate").MessageTemplate("duplicate scheduler found").Build() - // ErrInternalSchedulerNotFound is scheduler not found - ErrInternalSchedulerNotFound = ClassInternal.DefineError().TextualCode("ErrInternalSchedulerNotFound").MessageTemplate("scheduler not found").Build() - // ErrInternalSchedulerConfig is scheduler config error - ErrInternalSchedulerConfig = ClassInternal.DefineError().TextualCode("ErrInternalSchedulerConfig").MessageTemplate("wrong scheduler config").Build() - // ErrInternalRuleInvalid is invalid rule - ErrInternalRuleInvalid = ClassInternal.DefineError().TextualCode("ErrInternalRuleInvalid").MessageTemplate("invalid rule found").Build() - // ErrInternalRuleDuplicate is duplicate rule - ErrInternalRuleDuplicate = ClassInternal.DefineError().TextualCode("ErrInternalRuleDuplicate").MessageTemplate("duplicate rule found").Build() - // ErrInternalRuleMismatch is rule mismatch - ErrInternalRuleMismatch = ClassInternal.DefineError().TextualCode("ErrInternalRuleMismatch").MessageTemplate("rule key mismatch").Build() - // ErrInternalOperatorNotFound is operator not found - ErrInternalOperatorNotFound = ClassInternal.DefineError().TextualCode("ErrInternalOperatorNotFound").MessageTemplate("operator not found").Build() - // ErrInternalOperatorMerge is operator merge error - ErrInternalOperatorMerge = ClassInternal.DefineError().TextualCode("ErrInternalOperatorMerge").MessageTemplate("merge operator should be pair").Build() - // ErrInternalOperatorNotStart is operator not start - ErrInternalOperatorNotStart = ClassInternal.DefineError().TextualCode("ErrInternalOperatorNotStart").MessageTemplate("operator not start").Build() - // ErrInternalOperatorNotEnd is operator not end - ErrInternalOperatorNotEnd = ClassInternal.DefineError().TextualCode("ErrInternalOperatorNotEnd").MessageTemplate("operator not end").Build() - // ErrInternalOperatorStepUnknown is operator step unknown - ErrInternalOperatorStepUnknown = ClassInternal.DefineError().TextualCode("ErrInternalStepUnknown").MessageTemplate("operator step is unknown").Build() - // ErrInternalStoreNotFound is store not found - ErrInternalStoreNotFound = ClassInternal.DefineError().TextualCode("ErrInternalStoreNotFound").MessageTemplate("store id %d not found").Build() - // ErrInternalClusterVersionChange is cluster version change error - ErrInternalClusterVersionChange = ClassInternal.DefineError().TextualCode("ErrInternalClusterVersionChange").MessageTemplate("cluster version change same time").Build() - // ErrInternalRegionKey is region key error - ErrInternalRegionKey = ClassInternal.DefineError().TextualCode("ErrInternalRegionKey").MessageTemplate("wrong region key range").Build() - // ErrInternalCacheRegionOverflow is cache region overflow - ErrInternalCacheRegionOverflow = ClassInternal.DefineError().TextualCode("ErrInternalCacheRegionOverflow").MessageTemplate("cache region overflow").Build() - // ErrInternalVersionFeatureNotExist is version feature not exist - ErrInternalVersionFeatureNotExist = ClassInternal.DefineError().TextualCode("ErrInternalVersionFeatureNotExist").MessageTemplate("version feature not exist").Build() - // ErrFormatParseCmd is parse cmd error - ErrFormatParseCmd = ClassFormat.DefineError().TextualCode("ErrFormatParseCmd").MessageTemplate("parse cmd error").Build() - // ErrFormatParseClusterVersion is parse cluster version error - ErrFormatParseClusterVersion = ClassFormat.DefineError().TextualCode("ErrFormatParseClusterVersion").MessageTemplate("parse cluster version error").Build() - // ErrFormatParseURL is parse url error - ErrFormatParseURL = ClassFormat.DefineError().TextualCode("ErrFormatParseURL").MessageTemplate("parse url error").Build() - // ErrFormatParseHistoryIndex is parse history index error - ErrFormatParseHistoryIndex = ClassFormat.DefineError().TextualCode("ErrFormatParseHistoryIndex").MessageTemplate("parse history index error").Build() - // ErrOtherInitLog is init log error - ErrOtherInitLog = ClassOther.DefineError().TextualCode("ErrOtherInitLog").MessageTemplate("init log fail").Build() - // ErrOtherDashboardServer is dashboard server error - ErrOtherDashboardServer = ClassOther.DefineError().TextualCode("ErrOtherDashboardServer").MessageTemplate("dashboard server error").Build() - // ErrOtherPrometheusPush is prometheus push error - ErrOtherPrometheusPush = ClassOther.DefineError().TextualCode("ErrOtherPrometheusPush").MessageTemplate("push to prometheus error").Build() - // ErrOtherPluginLoadActionUnknown is plugin action unknown - ErrOtherPluginLoadActionUnknown = ClassOther.DefineError().TextualCode("ErrOtherPluginLoadActionUnknown").MessageTemplate("unknown action to load plugin").Build() - // ErrOtherPluginFuncNotFound is plugin func not found - ErrOtherPluginFuncNotFound = ClassOther.DefineError().TextualCode("ErrOtherPluginFuncNotFound").MessageTemplate("plugin function not found").Build() - // ErrOtherSystemTime is system time error - ErrOtherSystemTime = ClassOther.DefineError().TextualCode("ErrOtherSystemTime").MessageTemplate("system time error").Build() -) diff --git a/pkg/errs/errs.go b/pkg/errs/errs.go new file mode 100644 index 00000000000..06b9d7e4f45 --- /dev/null +++ b/pkg/errs/errs.go @@ -0,0 +1,51 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package errs + +import "github.com/pingcap/errors" + +var ( + reg = errors.NewRegistry("PD") + // ClassTSO defines tso error class + ClassTSO = reg.RegisterErrorClass(1, "tso") + // ClassAdaptor defines adapter error class + ClassAdaptor = reg.RegisterErrorClass(2, "adapter") + // ClassMember defines member error class + ClassMember = reg.RegisterErrorClass(3, "member") +) + +// tso errors +var ( + ErrInvalidTimestamp = ClassTSO.DefineError().TextualCode("ErrInvalidTimestamp").MessageTemplate("invalid timestamp").Build() + ErrLogicOverflow = ClassTSO.DefineError().TextualCode("ErrLogicOverflow").MessageTemplate("logic part overflow").Build() + ErrIncorrectSystemTime = ClassTSO.DefineError().TextualCode("ErrIncorrectSystemTime").MessageTemplate("incorrect system time").Build() +) + +// adapter errors +var ( + ErrStartDashboard = ClassAdaptor.DefineError().TextualCode("ErrStartDashboard").MessageTemplate("fail to start dashboard").Build() + ErrStopDashboard = ClassAdaptor.DefineError().TextualCode("ErrStopDashboard").MessageTemplate("fail to stop dashboard").Build() +) + +// member errors +var ( + ErretcdLeaderNotFound = ClassMember.DefineError().TextualCode("ErretcdLeaderNotFound").MessageTemplate("etcd leader not found").Build() + ErrGetLeader = ClassMember.DefineError().TextualCode("ErrGetLeader").MessageTemplate("fail to get leader").Build() + ErrDeleteLeaderKey = ClassMember.DefineError().TextualCode("ErrDeleteLeaderKey").MessageTemplate("fail to delete leader key").Build() + ErrLoadLeaderPriority = ClassMember.DefineError().TextualCode("ErrLoadLeaderPriority").MessageTemplate("fail to load leader priority").Build() + ErrLoadetcdLeaderPriority = ClassMember.DefineError().TextualCode("ErrLoadetcdLeaderPriority").MessageTemplate("fail to load etcd leader priority").Build() + ErrTransferetcdLeader = ClassMember.DefineError().TextualCode("ErrTransferetcdLeader").MessageTemplate("fail to transfer etcd leader").Build() + ErrWatcherCancel = ClassMember.DefineError().TextualCode("ErrWatcherCancel").MessageTemplate("watcher canceled").Build() + ErrMarshalLeader = ClassMember.DefineError().TextualCode("ErrMarshalLeader").MessageTemplate("fail to marshal leader").Build() +) diff --git a/pkg/errors/errors_test.go b/pkg/errs/errs_test.go similarity index 82% rename from pkg/errors/errors_test.go rename to pkg/errs/errs_test.go index 23fbe7f5738..e4e0b120379 100644 --- a/pkg/errors/errors_test.go +++ b/pkg/errs/errs_test.go @@ -11,15 +11,17 @@ // See the License for the specific language governing permissions and // limitations under the License. -package errors +package errs import ( "bytes" + "errors" "fmt" - "github.com/pingcap/log" - "go.uber.org/zap" "strings" "testing" + + "github.com/pingcap/log" + "go.uber.org/zap" ) // testingWriter is a WriteSyncer that writes to the the messages. @@ -79,10 +81,11 @@ func TestError(t *testing.T) { lg := newZapTestLogger(conf) log.ReplaceGlobals(lg.Logger, nil) - rfc := `[error="[PD:format:ErrFormatParseHistoryIndex] parse history index error"]` - log.Error(ErrFormatParseHistoryIndex.MessageTemplate(), zap.Error(ErrFormatParseHistoryIndex.FastGenByArgs())) + rfc := `[error="[PD:tso:ErrInvalidTimestamp] invalid timestamp"]` + log.Error("test", zap.Error(ErrInvalidTimestamp.FastGenByArgs())) lg.Contain(t, rfc) - rfc = `[error="[PD:internal:ErrInternalStoreNotFound] store id 1 not found"]` - log.Error(ErrInternalStoreNotFound.MessageTemplate(), zap.Error(ErrInternalStoreNotFound.FastGenByArgs(1))) + cause := `[cause="test err"]` + log.Error("test", zap.Error(ErrInvalidTimestamp.FastGenByArgs()), zap.NamedError("cause", errors.New("test err"))) lg.Contain(t, rfc) + lg.Contain(t, cause) } diff --git a/server/member/leader.go b/server/member/leader.go index 47754133ddd..11b9bf5b92d 100644 --- a/server/member/leader.go +++ b/server/member/leader.go @@ -28,6 +28,7 @@ import ( "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/log" + "github.com/pingcap/pd/v4/pkg/errs" "github.com/pingcap/pd/v4/pkg/etcdutil" "github.com/pingcap/pd/v4/server/config" "github.com/pingcap/pd/v4/server/kv" @@ -131,14 +132,14 @@ func (m *Member) GetLeaderPath() string { // CheckLeader checks returns true if it is needed to check later. func (m *Member) CheckLeader(name string) (*pdpb.Member, int64, bool) { if m.GetEtcdLeader() == 0 { - log.Error("no etcd leader, check leader later") + log.Error("no etcd leader, check leader later", zap.Error(errs.ErretcdLeaderNotFound.FastGenByArgs())) time.Sleep(200 * time.Millisecond) return nil, 0, true } leader, rev, err := getLeader(m.client, m.GetLeaderPath()) if err != nil { - log.Error("get leader meet error", zap.Error(err)) + log.Error("get leader meet error", zap.Error(errs.ErrGetLeader.FastGenByArgs()), zap.NamedError("cause", err)) time.Sleep(200 * time.Millisecond) return nil, 0, true } @@ -148,7 +149,7 @@ func (m *Member) CheckLeader(name string) (*pdpb.Member, int64, bool) { // in previous CampaignLeader. we can delete and campaign again. log.Warn("the leader has not changed, delete and campaign again", zap.Stringer("old-leader", leader)) if err = m.deleteLeaderKey(); err != nil { - log.Error("delete leader key meet error", zap.Error(err)) + log.Error("delete leader key meet error", zap.Error(errs.ErrDeleteLeaderKey.FastGenByArgs()), zap.NamedError("cause", err)) time.Sleep(200 * time.Millisecond) return nil, 0, true } @@ -165,18 +166,18 @@ func (m *Member) CheckPriority(ctx context.Context) { } myPriority, err := m.GetMemberLeaderPriority(m.ID()) if err != nil { - log.Error("failed to load leader priority", zap.Error(err)) + log.Error("failed to load leader priority", zap.Error(errs.ErrLoadLeaderPriority.FastGenByArgs()), zap.NamedError("cause", err)) return } leaderPriority, err := m.GetMemberLeaderPriority(etcdLeader) if err != nil { - log.Error("failed to load etcd leader priority", zap.Error(err)) + log.Error("failed to load etcd leader priority", zap.Error(errs.ErrLoadetcdLeaderPriority.FastGenByArgs()), zap.NamedError("cause", err)) return } if myPriority > leaderPriority { err := m.MoveEtcdLeader(ctx, etcdLeader, m.ID()) if err != nil { - log.Error("failed to transfer etcd leader", zap.Error(err)) + log.Error("failed to transfer etcd leader", zap.Error(errs.ErrTransferetcdLeader.FastGenByArgs()), zap.NamedError("cause", err)) } else { log.Info("transfer etcd leader", zap.Uint64("from", etcdLeader), @@ -227,7 +228,7 @@ func (m *Member) MemberInfo(cfg *config.Config, name string, rootPath string) { data, err := leader.Marshal() if err != nil { // can't fail, so panic here. - log.Fatal("marshal leader meet error", zap.Stringer("leader", leader), zap.Error(err)) + log.Fatal("marshal leader meet error", zap.Stringer("leader", leader), zap.Error(errs.ErrMarshalLeader.FastGenByArgs()), zap.NamedError("cause", err)) } m.member = leader m.memberValue = string(data) @@ -475,7 +476,7 @@ func (m *Member) WatchLeader(serverCtx context.Context, leader *pdpb.Member, rev break } if wresp.Canceled { - log.Error("leader watcher is canceled with", zap.Int64("revision", revision), zap.Error(wresp.Err())) + log.Error("leader watcher is canceled with", zap.Int64("revision", revision), zap.Error(errs.ErrWatcherCancel.FastGenByArgs()), zap.NamedError("cause", wresp.Err())) return } diff --git a/server/tso/tso.go b/server/tso/tso.go index bf2ca3f565f..f5f57af087e 100644 --- a/server/tso/tso.go +++ b/server/tso/tso.go @@ -23,6 +23,7 @@ import ( "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/log" + "github.com/pingcap/pd/v4/pkg/errs" "github.com/pingcap/pd/v4/pkg/etcdutil" "github.com/pingcap/pd/v4/pkg/tsoutil" "github.com/pingcap/pd/v4/pkg/typeutil" @@ -144,7 +145,7 @@ func (t *TimestampOracle) SyncTimestamp(lease *member.LeaderLease) error { // If the current system time minus the saved etcd timestamp is less than `updateTimestampGuard`, // the timestamp allocation will start from the saved etcd timestamp temporarily. if typeutil.SubTimeByWallClock(next, last) < updateTimestampGuard { - log.Error("system time may be incorrect", zap.Time("last", last), zap.Time("next", next)) + log.Error("system time may be incorrect", zap.Time("last", last), zap.Time("next", next), zap.Error(errs.ErrIncorrectSystemTime.FastGenByArgs())) next = last.Add(updateTimestampGuard) } @@ -299,7 +300,7 @@ func (t *TimestampOracle) GetRespTS(count uint32) (pdpb.Timestamp, error) { time.Sleep(200 * time.Millisecond) continue } - log.Error("invalid timestamp", zap.Any("timestamp", current)) + log.Error("invalid timestamp", zap.Any("timestamp", current), zap.Error(errs.ErrInvalidTimestamp.FastGenByArgs())) return pdpb.Timestamp{}, errors.New("can not get timestamp, may be not leader") } @@ -308,7 +309,7 @@ func (t *TimestampOracle) GetRespTS(count uint32) (pdpb.Timestamp, error) { if resp.Logical >= maxLogical { log.Error("logical part outside of max logical interval, please check ntp time", zap.Reflect("response", resp), - zap.Int("retry-count", i)) + zap.Int("retry-count", i), zap.Error(errs.ErrLogicOverflow.FastGenByArgs())) tsoCounter.WithLabelValues("logical_overflow").Inc() time.Sleep(UpdateTimestampStep) continue