Skip to content

Commit

Permalink
Refine the log errs in scheduler (#2705) (#2900)
Browse files Browse the repository at this point in the history
Signed-off-by: ti-srebot <ti-srebot@pingcap.com>
Signed-off-by: lhy1024 <admin@liudos.us>
  • Loading branch information
ti-srebot authored Sep 4, 2020
1 parent 463e0f1 commit 30e7167
Show file tree
Hide file tree
Showing 20 changed files with 163 additions and 105 deletions.
21 changes: 21 additions & 0 deletions pkg/errs/errno.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,24 @@ var (
ErrEtcdKVSave = errors.Normalize("etcd KV save failed", errors.RFCCodeText("PD:kv:ErrEtcdKVSave"))
ErrEtcdKVRemove = errors.Normalize("etcd KV remove failed", errors.RFCCodeText("PD:kv:ErrEtcdKVRemove"))
)

// scheduler errors
var (
ErrGetSourceStore = errors.Normalize("failed to get the source store", errors.RFCCodeText("PD:scheduler:ErrGetSourceStore"))
ErrSchedulerExisted = errors.Normalize("scheduler existed", errors.RFCCodeText("PD:scheduler:ErrSchedulerExisted"))
ErrSchedulerNotFound = errors.Normalize("scheduler not found", errors.RFCCodeText("PD:scheduler:ErrSchedulerNotFound"))
ErrScheduleConfigNotExist = errors.Normalize("the config does not exist", errors.RFCCodeText("PD:scheduler:ErrScheduleConfigNotExist"))
ErrSchedulerConfig = errors.Normalize("wrong scheduler config %s", errors.RFCCodeText("PD:scheduler:ErrSchedulerConfig"))
ErrCacheOverflow = errors.Normalize("cache overflow", errors.RFCCodeText("PD:scheduler:ErrCacheOverflow"))
ErrInternalGrowth = errors.Normalize("unknown interval growth type error", errors.RFCCodeText("PD:scheduler:ErrInternalGrowth"))
)

// strconv errors
var (
ErrStrconvParseUint = errors.Normalize("parse uint error", errors.RFCCodeText("PD:strconv:ErrStrconvParseUint"))
)

// url errors
var (
ErrQueryUnescape = errors.Normalize("inverse transformation of QueryEscape error", errors.RFCCodeText("PD:url:ErrQueryUnescape"))
)
12 changes: 9 additions & 3 deletions pkg/errs/errs.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,13 @@ import (
)

// ZapError is used to make the log output eaiser.
func ZapError(err *errors.Error, causeError error) zap.Field {
e := err.Wrap(causeError).FastGenWithCause()
return zap.Field{Key: "error", Type: zapcore.ErrorType, Interface: e}
func ZapError(err error, causeError ...error) zap.Field {
if e, ok := err.(*errors.Error); ok {
if len(causeError) >= 1 {
err = e.Wrap(causeError[0]).FastGenWithCause()
} else {
err = e.FastGenByArgs()
}
}
return zap.Field{Key: "error", Type: zapcore.ErrorType, Interface: err}
}
35 changes: 35 additions & 0 deletions pkg/errs/errs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,38 @@ func (s *testErrorSuite) TestError(c *C) {
fmt.Println(lg.Message())
c.Assert(strings.Contains(lg.Message(), rfc), IsTrue)
}

func (s *testErrorSuite) TestErrorEqual(c *C) {
err1 := ErrSchedulerNotFound.FastGenByArgs()
err2 := ErrSchedulerNotFound.FastGenByArgs()
c.Assert(errors.ErrorEqual(err1, err2), IsTrue)

err := errors.New("test")
err1 = ErrSchedulerNotFound.Wrap(err).FastGenWithCause()
err2 = ErrSchedulerNotFound.Wrap(err).FastGenWithCause()
c.Assert(errors.ErrorEqual(err1, err2), IsTrue)

err1 = ErrSchedulerNotFound.FastGenByArgs()
err2 = ErrSchedulerNotFound.Wrap(err).FastGenWithCause()
c.Assert(errors.ErrorEqual(err1, err2), IsFalse)

err3 := errors.New("test")
err4 := errors.New("test")
err1 = ErrSchedulerNotFound.Wrap(err3).FastGenWithCause()
err2 = ErrSchedulerNotFound.Wrap(err4).FastGenWithCause()
c.Assert(errors.ErrorEqual(err1, err2), IsTrue)

err3 = errors.New("test1")
err4 = errors.New("test")
err1 = ErrSchedulerNotFound.Wrap(err3).FastGenWithCause()
err2 = ErrSchedulerNotFound.Wrap(err4).FastGenWithCause()
c.Assert(errors.ErrorEqual(err1, err2), IsFalse)
}

func (s *testErrorSuite) TestZapError(c *C) {
err := errors.New("test")
log.Info("test", ZapError(err))
err1 := ErrSchedulerNotFound
log.Info("test", ZapError(err1))
log.Info("test", ZapError(err1, err))
}
14 changes: 8 additions & 6 deletions server/api/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ import (
"strings"

"github.com/gorilla/mux"
"github.com/pingcap/errors"
"github.com/tikv/pd/pkg/apiutil"
"github.com/tikv/pd/pkg/errs"
"github.com/tikv/pd/server"
"github.com/tikv/pd/server/schedulers"
"github.com/unrolled/render"
Expand Down Expand Up @@ -147,13 +149,13 @@ func (h *schedulerHandler) Post(w http.ResponseWriter, r *http.Request) {
return
}
err := h.AddGrantLeaderScheduler(uint64(storeID))
if err == schedulers.ErrSchedulerExisted {
if errors.ErrorEqual(err, errs.ErrSchedulerExisted.FastGenByArgs()) {
if err := h.redirectSchedulerUpdate(schedulers.GrantLeaderName, storeID); err != nil {
h.r.JSON(w, http.StatusInternalServerError, err.Error())
return
}
}
if err != nil && err != schedulers.ErrSchedulerExisted {
if err != nil && !errors.ErrorEqual(err, errs.ErrSchedulerExisted.FastGenByArgs()) {
h.r.JSON(w, http.StatusInternalServerError, err.Error())
return
}
Expand All @@ -164,13 +166,13 @@ func (h *schedulerHandler) Post(w http.ResponseWriter, r *http.Request) {
return
}
err := h.AddEvictLeaderScheduler(uint64(storeID))
if err == schedulers.ErrSchedulerExisted {
if errors.ErrorEqual(err, errs.ErrSchedulerExisted.FastGenByArgs()) {
if err := h.redirectSchedulerUpdate(schedulers.EvictLeaderName, storeID); err != nil {
h.r.JSON(w, http.StatusInternalServerError, err.Error())
return
}
}
if err != nil && err != schedulers.ErrSchedulerExisted {
if err != nil && !errors.ErrorEqual(err, errs.ErrSchedulerExisted.FastGenByArgs()) {
h.r.JSON(w, http.StatusInternalServerError, err.Error())
return
}
Expand Down Expand Up @@ -250,7 +252,7 @@ func (h *schedulerHandler) Delete(w http.ResponseWriter, r *http.Request) {
}

func (h *schedulerHandler) handleErr(w http.ResponseWriter, err error) {
if err == schedulers.ErrSchedulerNotFound {
if errors.ErrorEqual(err, errs.ErrSchedulerNotFound.FastGenByArgs()) {
h.r.JSON(w, http.StatusNotFound, err.Error())
} else {
h.r.JSON(w, http.StatusInternalServerError, err.Error())
Expand All @@ -263,7 +265,7 @@ func (h *schedulerHandler) redirectSchedulerDelete(name, schedulerName string) e
url := fmt.Sprintf("%s/%s/%s/delete/%s", h.GetAddr(), schedulerConfigPrefix, schedulerName, args[0])
resp, err := doDelete(h.svr.GetHTTPClient(), url)
if resp.StatusCode != 200 {
return schedulers.ErrSchedulerNotFound
return errs.ErrSchedulerNotFound.FastGenByArgs()
}
if err != nil {
return err
Expand Down
4 changes: 2 additions & 2 deletions server/cluster/cluster_worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ import (
"github.com/pingcap/kvproto/pkg/pdpb"
"github.com/pingcap/log"
"github.com/pkg/errors"
"github.com/tikv/pd/pkg/errs"
"github.com/tikv/pd/server/core"
"github.com/tikv/pd/server/schedule"
"github.com/tikv/pd/server/schedulers"
"go.uber.org/zap"
)

Expand Down Expand Up @@ -112,7 +112,7 @@ func (c *RaftCluster) HandleAskBatchSplit(request *pdpb.AskBatchSplitRequest) (*
for i := 0; i < int(splitCount); i++ {
newRegionID, err := c.id.Alloc()
if err != nil {
return nil, schedulers.ErrSchedulerNotFound
return nil, errs.ErrSchedulerNotFound.FastGenByArgs()
}

peerIDs := make([]uint64, len(request.Region.Peers))
Expand Down
13 changes: 7 additions & 6 deletions server/cluster/coordinator.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@ import (
"sync/atomic"
"time"

"github.com/pingcap/errors"
"github.com/pingcap/log"
"github.com/pkg/errors"
"github.com/tikv/pd/pkg/errs"
"github.com/tikv/pd/pkg/keyutil"
"github.com/tikv/pd/pkg/logutil"
"github.com/tikv/pd/server/config"
Expand Down Expand Up @@ -293,7 +294,7 @@ func (c *coordinator) run() {
}

log.Info("create scheduler", zap.String("scheduler-name", s.GetName()))
if err = c.addScheduler(s, schedulerCfg.Args...); err != nil && err != schedulers.ErrSchedulerExisted {
if err = c.addScheduler(s, schedulerCfg.Args...); err != nil && !errors.ErrorEqual(err, errs.ErrSchedulerExisted.FastGenByArgs()) {
log.Error("can not add scheduler", zap.String("scheduler-name", s.GetName()), zap.Error(err))
} else {
// Only records the valid scheduler config.
Expand Down Expand Up @@ -533,7 +534,7 @@ func (c *coordinator) addScheduler(scheduler schedule.Scheduler, args ...string)
defer c.Unlock()

if _, ok := c.schedulers[scheduler.GetName()]; ok {
return schedulers.ErrSchedulerExisted
return errs.ErrSchedulerExisted.FastGenByArgs()
}

s := newScheduleController(c, scheduler)
Expand All @@ -556,7 +557,7 @@ func (c *coordinator) removeScheduler(name string) error {
}
s, ok := c.schedulers[name]
if !ok {
return schedulers.ErrSchedulerNotFound
return errs.ErrSchedulerNotFound.FastGenByArgs()
}

s.Stop()
Expand Down Expand Up @@ -588,7 +589,7 @@ func (c *coordinator) pauseOrResumeScheduler(name string, t int64) error {
if name != "all" {
sc, ok := c.schedulers[name]
if !ok {
return schedulers.ErrSchedulerNotFound
return errs.ErrSchedulerNotFound.FastGenByArgs()
}
s = append(s, sc)
} else {
Expand All @@ -615,7 +616,7 @@ func (c *coordinator) isSchedulerPaused(name string) (bool, error) {
}
s, ok := c.schedulers[name]
if !ok {
return false, schedulers.ErrSchedulerNotFound
return false, errs.ErrSchedulerNotFound.FastGenByArgs()
}
return s.IsPaused(), nil
}
Expand Down
20 changes: 10 additions & 10 deletions server/schedulers/adjacent_region.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import (

"github.com/pingcap/kvproto/pkg/metapb"
"github.com/pingcap/log"
"github.com/pkg/errors"
"github.com/tikv/pd/pkg/errs"
"github.com/tikv/pd/server/core"
"github.com/tikv/pd/server/schedule"
"github.com/tikv/pd/server/schedule/filter"
Expand All @@ -47,16 +47,16 @@ func init() {
return func(v interface{}) error {
conf, ok := v.(*balanceAdjacentRegionConfig)
if !ok {
return ErrScheduleConfigNotExist
return errs.ErrScheduleConfigNotExist.FastGenByArgs()
}
if len(args) == 2 {
leaderLimit, err := strconv.ParseUint(args[0], 10, 64)
if err != nil {
return errors.WithStack(err)
return errs.ErrStrconvParseUint.Wrap(err).FastGenWithCause()
}
peerLimit, err := strconv.ParseUint(args[1], 10, 64)
if err != nil {
return errors.WithStack(err)
return errs.ErrStrconvParseUint.Wrap(err).FastGenWithCause()
}
conf.LeaderLimit = leaderLimit
conf.PeerLimit = peerLimit
Expand Down Expand Up @@ -233,7 +233,7 @@ func (l *balanceAdjacentRegionScheduler) process(cluster opt.Cluster) []*operato

defer func() {
if l.cacheRegions.len() < 0 {
log.Fatal("cache overflow", zap.String("scheduler", l.GetName()))
log.Fatal("cache overflow", zap.String("scheduler", l.GetName()), errs.ZapError(errs.ErrCacheOverflow))
}
l.cacheRegions.head = head + 1
l.lastKey = r2.GetStartKey()
Expand Down Expand Up @@ -262,10 +262,10 @@ func (l *balanceAdjacentRegionScheduler) unsafeToBalance(cluster opt.Cluster, re
if !opt.IsRegionReplicated(cluster, region) {
return true
}
storeID := region.GetLeader().GetStoreId()
store := cluster.GetStore(storeID)
leaderStoreID := region.GetLeader().GetStoreId()
store := cluster.GetStore(leaderStoreID)
if store == nil {
log.Error("failed to get the store", zap.Uint64("store-id", storeID))
log.Error("failed to get the store", zap.Uint64("store-id", leaderStoreID), errs.ZapError(errs.ErrGetSourceStore))
return true
}
s := l.selector.SelectSource(cluster, []*core.StoreInfo{store})
Expand Down Expand Up @@ -300,7 +300,7 @@ func (l *balanceAdjacentRegionScheduler) disperseLeader(cluster opt.Cluster, bef
}
op, err := operator.CreateTransferLeaderOperator("balance-adjacent-leader", cluster, before, before.GetLeader().GetStoreId(), target.GetID(), operator.OpAdjacent)
if err != nil {
log.Debug("fail to create transfer leader operator", zap.Error(err))
log.Debug("fail to create transfer leader operator", errs.ZapError(err))
return nil
}
op.SetPriorityLevel(core.LowPriority)
Expand All @@ -317,7 +317,7 @@ func (l *balanceAdjacentRegionScheduler) dispersePeer(cluster opt.Cluster, regio
stores := cluster.GetRegionStores(region)
source := cluster.GetStore(leaderStoreID)
if source == nil {
log.Error("failed to get the source store", zap.Uint64("store-id", leaderStoreID))
log.Error("failed to get the source store", zap.Uint64("store-id", leaderStoreID), errs.ZapError(errs.ErrGetSourceStore))
return nil
}
var scoreGuard filter.Filter
Expand Down
14 changes: 7 additions & 7 deletions server/schedulers/balance_leader.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ import (
"strconv"

"github.com/pingcap/log"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/tikv/pd/pkg/errs"
"github.com/tikv/pd/server/core"
"github.com/tikv/pd/server/schedule"
"github.com/tikv/pd/server/schedule/filter"
Expand All @@ -42,11 +42,11 @@ func init() {
return func(v interface{}) error {
conf, ok := v.(*balanceLeaderSchedulerConfig)
if !ok {
return ErrScheduleConfigNotExist
return errs.ErrScheduleConfigNotExist.FastGenByArgs()
}
ranges, err := getKeyRanges(args)
if err != nil {
return errors.WithStack(err)
return err
}
conf.Ranges = ranges
conf.Name = BalanceLeaderName
Expand Down Expand Up @@ -78,7 +78,7 @@ type balanceLeaderScheduler struct {

// newBalanceLeaderScheduler creates a scheduler that tends to keep leaders on
// each store balanced.
func newBalanceLeaderScheduler(opController *schedule.OperatorController, conf *balanceLeaderSchedulerConfig, opts ...BalanceLeaderCreateOption) schedule.Scheduler {
func newBalanceLeaderScheduler(opController *schedule.OperatorController, conf *balanceLeaderSchedulerConfig, options ...BalanceLeaderCreateOption) schedule.Scheduler {
base := NewBaseScheduler(opController)

s := &balanceLeaderScheduler{
Expand All @@ -87,8 +87,8 @@ func newBalanceLeaderScheduler(opController *schedule.OperatorController, conf *
opController: opController,
counter: balanceLeaderCounter,
}
for _, opt := range opts {
opt(s)
for _, option := range options {
option(s)
}
s.filters = []filter.Filter{
filter.StoreStateFilter{ActionScope: s.GetName(), TransferLeader: true},
Expand Down Expand Up @@ -280,7 +280,7 @@ func (l *balanceLeaderScheduler) createOperator(cluster opt.Cluster, region *cor

op, err := operator.CreateTransferLeaderOperator(BalanceLeaderType, cluster, region, region.GetLeader().GetStoreId(), targetID, operator.OpBalance)
if err != nil {
log.Debug("fail to create balance leader operator", zap.Error(err))
log.Debug("fail to create balance leader operator", errs.ZapError(err))
return nil
}
sourceLabel := strconv.FormatUint(sourceID, 10)
Expand Down
8 changes: 4 additions & 4 deletions server/schedulers/balance_region.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ import (

"github.com/pingcap/kvproto/pkg/metapb"
"github.com/pingcap/log"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/tikv/pd/pkg/errs"
"github.com/tikv/pd/server/core"
"github.com/tikv/pd/server/schedule"
"github.com/tikv/pd/server/schedule/checker"
Expand All @@ -35,11 +35,11 @@ func init() {
return func(v interface{}) error {
conf, ok := v.(*balanceRegionSchedulerConfig)
if !ok {
return ErrScheduleConfigNotExist
return errs.ErrScheduleConfigNotExist.FastGenByArgs()
}
ranges, err := getKeyRanges(args)
if err != nil {
return errors.WithStack(err)
return err
}
conf.Ranges = ranges
conf.Name = BalanceRegionName
Expand Down Expand Up @@ -191,7 +191,7 @@ func (s *balanceRegionScheduler) transferPeer(cluster opt.Cluster, region *core.
sourceStoreID := oldPeer.GetStoreId()
source := cluster.GetStore(sourceStoreID)
if source == nil {
log.Error("failed to get the source store", zap.Uint64("store-id", sourceStoreID))
log.Error("failed to get the source store", zap.Uint64("store-id", sourceStoreID), errs.ZapError(errs.ErrGetSourceStore))
return nil
}
exclude := make(map[uint64]struct{})
Expand Down
3 changes: 2 additions & 1 deletion server/schedulers/base_scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"time"

"github.com/pingcap/log"
"github.com/tikv/pd/pkg/errs"
"github.com/tikv/pd/server/schedule"
"github.com/tikv/pd/server/schedule/opt"
)
Expand Down Expand Up @@ -50,7 +51,7 @@ func intervalGrow(x time.Duration, maxInterval time.Duration, typ intervalGrowth
case zeroGrowth:
return x
default:
log.Fatal("unknown interval growth type")
log.Fatal("type error", errs.ZapError(errs.ErrInternalGrowth))
}
return 0
}
Expand Down
Loading

0 comments on commit 30e7167

Please sign in to comment.