From 768e97026d7c9e5e3891074dfd44e204b3fb196a Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Wed, 30 Jun 2021 19:23:26 +0800 Subject: [PATCH] This is an automated cherry-pick of #3787 Signed-off-by: Ryan Leung --- server/api/scheduler.go | 22 ++++++++-------------- server/api/scheduler_test.go | 24 +++++++++++++++++++++--- server/cluster/coordinator.go | 16 +++++++--------- server/config/persist_options.go | 8 +++++++- server/schedulers/evict_leader.go | 27 +++++++++++++++++++++++++++ server/schedulers/grant_leader.go | 23 +++++++++++++++++++++++ 6 files changed, 93 insertions(+), 27 deletions(-) diff --git a/server/api/scheduler.go b/server/api/scheduler.go index e064273c96d..2ba790b2c41 100644 --- a/server/api/scheduler.go +++ b/server/api/scheduler.go @@ -252,15 +252,11 @@ func (h *schedulerHandler) Delete(w http.ResponseWriter, r *http.Request) { name := mux.Vars(r)["name"] switch { case strings.HasPrefix(name, schedulers.EvictLeaderName) && name != schedulers.EvictLeaderName: - if err := h.redirectSchedulerDelete(name, schedulers.EvictLeaderName); err != nil { - h.handleErr(w, err) - return - } + h.redirectSchedulerDelete(w, name, schedulers.EvictLeaderName) + return case strings.HasPrefix(name, schedulers.GrantLeaderName) && name != schedulers.GrantLeaderName: - if err := h.redirectSchedulerDelete(name, schedulers.GrantLeaderName); err != nil { - h.handleErr(w, err) - return - } + h.redirectSchedulerDelete(w, name, schedulers.GrantLeaderName) + return default: if err := h.RemoveScheduler(name); err != nil { h.handleErr(w, err) @@ -278,18 +274,16 @@ func (h *schedulerHandler) handleErr(w http.ResponseWriter, err error) { } } -func (h *schedulerHandler) redirectSchedulerDelete(name, schedulerName string) error { +func (h *schedulerHandler) redirectSchedulerDelete(w http.ResponseWriter, name, schedulerName string) { args := strings.Split(name, "-") args = args[len(args)-1:] url := fmt.Sprintf("%s/%s/%s/delete/%s", h.GetAddr(), schedulerConfigPrefix, schedulerName, args[0]) resp, err := doDelete(h.svr.GetHTTPClient(), url) if err != nil { - return err - } - if resp.StatusCode != http.StatusOK { - return errs.ErrSchedulerNotFound.FastGenByArgs() + h.r.JSON(w, resp.StatusCode, err.Error()) + return } - return nil + h.r.JSON(w, resp.StatusCode, nil) } // FIXME: details of input json body params diff --git a/server/api/scheduler_test.go b/server/api/scheduler_test.go index e4e98ca4bd2..c2ef036d27c 100644 --- a/server/api/scheduler_test.go +++ b/server/api/scheduler_test.go @@ -19,6 +19,7 @@ import ( "time" . "github.com/pingcap/check" + "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/metapb" "github.com/tikv/pd/server" _ "github.com/tikv/pd/server/schedulers" @@ -56,16 +57,27 @@ func (s *testScheduleSuite) TestOriginAPI(c *C) { body, err := json.Marshal(input) c.Assert(err, IsNil) c.Assert(postJSON(testDialClient, addURL, body), IsNil) + rc := s.svr.GetRaftCluster() + c.Assert(rc.GetSchedulers(), HasLen, 1) + resp := make(map[string]interface{}) + listURL := fmt.Sprintf("%s%s%s/%s/list", s.svr.GetAddr(), apiPrefix, server.SchedulerConfigHandlerPath, "evict-leader-scheduler") + c.Assert(readJSON(testDialClient, listURL, &resp), IsNil) + c.Assert(resp["store-id-ranges"], HasLen, 1) input1 := make(map[string]interface{}) input1["name"] = "evict-leader-scheduler" input1["store_id"] = 2 body, err = json.Marshal(input1) c.Assert(err, IsNil) + c.Assert(failpoint.Enable("github.com/tikv/pd/server/schedulers/persistFail", "return(true)"), IsNil) + c.Assert(postJSON(testDialClient, addURL, body), NotNil) + c.Assert(rc.GetSchedulers(), HasLen, 1) + resp = make(map[string]interface{}) + c.Assert(readJSON(testDialClient, listURL, &resp), IsNil) + c.Assert(resp["store-id-ranges"], HasLen, 1) + c.Assert(failpoint.Disable("github.com/tikv/pd/server/schedulers/persistFail"), IsNil) c.Assert(postJSON(testDialClient, addURL, body), IsNil) - rc := s.svr.GetRaftCluster() c.Assert(rc.GetSchedulers(), HasLen, 1) - resp := make(map[string]interface{}) - listURL := fmt.Sprintf("%s%s%s/%s/list", s.svr.GetAddr(), apiPrefix, server.SchedulerConfigHandlerPath, "evict-leader-scheduler") + resp = make(map[string]interface{}) c.Assert(readJSON(testDialClient, listURL, &resp), IsNil) c.Assert(resp["store-id-ranges"], HasLen, 2) deleteURL := fmt.Sprintf("%s/%s", s.urlPrefix, "evict-leader-scheduler-1") @@ -76,8 +88,14 @@ func (s *testScheduleSuite) TestOriginAPI(c *C) { c.Assert(readJSON(testDialClient, listURL, &resp1), IsNil) c.Assert(resp1["store-id-ranges"], HasLen, 1) deleteURL = fmt.Sprintf("%s/%s", s.urlPrefix, "evict-leader-scheduler-2") + c.Assert(failpoint.Enable("github.com/tikv/pd/server/config/persistFail", "return(true)"), IsNil) res, err := doDelete(testDialClient, deleteURL) c.Assert(err, IsNil) + c.Assert(res.StatusCode, Equals, 500) + c.Assert(rc.GetSchedulers(), HasLen, 1) + c.Assert(failpoint.Disable("github.com/tikv/pd/server/config/persistFail"), IsNil) + res, err = doDelete(testDialClient, deleteURL) + c.Assert(err, IsNil) c.Assert(res.StatusCode, Equals, 200) c.Assert(rc.GetSchedulers(), HasLen, 0) resp2 := make(map[string]interface{}) diff --git a/server/cluster/coordinator.go b/server/cluster/coordinator.go index 2714e3e1b8a..abecd2038a2 100644 --- a/server/cluster/coordinator.go +++ b/server/cluster/coordinator.go @@ -563,28 +563,26 @@ func (c *coordinator) removeScheduler(name string) error { return errs.ErrSchedulerNotFound.FastGenByArgs() } - s.Stop() - schedulerStatusGauge.WithLabelValues(name, "allow").Set(0) - delete(c.schedulers, name) - - var err error opt := c.cluster.opt - - if err = opt.RemoveSchedulerCfg(s.Ctx(), name); err != nil { + if err := opt.RemoveSchedulerCfg(s.Ctx(), name); err != nil { log.Error("can not remove scheduler", zap.String("scheduler-name", name), errs.ZapError(err)) return err } - if err = opt.Persist(c.cluster.storage); err != nil { + if err := opt.Persist(c.cluster.storage); err != nil { log.Error("the option can not persist scheduler config", errs.ZapError(err)) return err } - if err = c.cluster.storage.RemoveScheduleConfig(name); err != nil { + if err := c.cluster.storage.RemoveScheduleConfig(name); err != nil { log.Error("can not remove the scheduler config", errs.ZapError(err)) return err } + s.Stop() + schedulerStatusGauge.WithLabelValues(name, "allow").Set(0) + delete(c.schedulers, name) + return nil } diff --git a/server/config/persist_options.go b/server/config/persist_options.go index 5acd835d7e2..e71b9d9b1ea 100644 --- a/server/config/persist_options.go +++ b/server/config/persist_options.go @@ -24,6 +24,8 @@ import ( "unsafe" "github.com/coreos/go-semver/semver" + "github.com/pingcap/errors" + "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/log" "github.com/tikv/pd/pkg/cache" @@ -572,7 +574,11 @@ func (o *PersistOptions) Persist(storage *core.Storage) error { LabelProperty: o.GetLabelPropertyConfig(), ClusterVersion: *o.GetClusterVersion(), } - return storage.SaveConfig(cfg) + err := storage.SaveConfig(cfg) + failpoint.Inject("persistFail", func() { + err = errors.New("fail to persist") + }) + return err } // Reload reloads the configuration from the storage. diff --git a/server/schedulers/evict_leader.go b/server/schedulers/evict_leader.go index 46cf3d359b3..e6c57f6a685 100644 --- a/server/schedulers/evict_leader.go +++ b/server/schedulers/evict_leader.go @@ -20,6 +20,7 @@ import ( "github.com/gorilla/mux" "github.com/pingcap/errors" + "github.com/pingcap/failpoint" "github.com/pingcap/log" "github.com/tikv/pd/pkg/apiutil" "github.com/tikv/pd/pkg/errs" @@ -117,6 +118,9 @@ func (conf *evictLeaderSchedulerConfig) Persist() error { conf.mu.RLock() defer conf.mu.RUnlock() data, err := schedule.EncodeConfig(conf) + failpoint.Inject("persistFail", func() { + err = errors.New("fail to persist") + }) if err != nil { return err } @@ -153,6 +157,22 @@ func (conf *evictLeaderSchedulerConfig) removeStore(id uint64) (succ bool, last return succ, last } +func (conf *evictLeaderSchedulerConfig) resetStore(id uint64, keyRange []core.KeyRange) { + conf.mu.Lock() + defer conf.mu.Unlock() + conf.cluster.BlockStore(id) + conf.StoreIDWithRanges[id] = keyRange +} + +func (conf *evictLeaderSchedulerConfig) getKeyRangesByID(id uint64) []core.KeyRange { + conf.mu.RLock() + defer conf.mu.RUnlock() + if ranges, exist := conf.StoreIDWithRanges[id]; exist { + return ranges + } + return nil +} + type evictLeaderScheduler struct { *BaseScheduler conf *evictLeaderSchedulerConfig @@ -301,12 +321,15 @@ func (handler *evictLeaderHandler) UpdateConfig(w http.ResponseWriter, r *http.R idFloat, ok := input["store_id"].(float64) if ok { id = (uint64)(idFloat) + handler.config.mu.RLock() if _, exists = handler.config.StoreIDWithRanges[id]; !exists { if err := handler.config.cluster.BlockStore(id); err != nil { + handler.config.mu.RUnlock() handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } } + handler.config.mu.RUnlock() args = append(args, strconv.FormatUint(id, 10)) } @@ -320,6 +343,7 @@ func (handler *evictLeaderHandler) UpdateConfig(w http.ResponseWriter, r *http.R handler.config.BuildWithArgs(args) err := handler.config.Persist() if err != nil { + handler.config.removeStore(id) handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } @@ -340,10 +364,12 @@ func (handler *evictLeaderHandler) DeleteConfig(w http.ResponseWriter, r *http.R } var resp interface{} + keyRanges := handler.config.getKeyRangesByID(id) succ, last := handler.config.removeStore(id) if succ { err = handler.config.Persist() if err != nil { + handler.config.resetStore(id, keyRanges) handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } @@ -352,6 +378,7 @@ func (handler *evictLeaderHandler) DeleteConfig(w http.ResponseWriter, r *http.R if errors.ErrorEqual(err, errs.ErrSchedulerNotFound.FastGenByArgs()) { handler.rd.JSON(w, http.StatusNotFound, err) } else { + handler.config.resetStore(id, keyRanges) handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) } return diff --git a/server/schedulers/grant_leader.go b/server/schedulers/grant_leader.go index 4a06b82950d..c2033361cea 100644 --- a/server/schedulers/grant_leader.go +++ b/server/schedulers/grant_leader.go @@ -147,6 +147,22 @@ func (conf *grantLeaderSchedulerConfig) removeStore(id uint64) (succ bool, last return succ, last } +func (conf *grantLeaderSchedulerConfig) resetStore(id uint64, keyRange []core.KeyRange) { + conf.mu.Lock() + defer conf.mu.Unlock() + conf.cluster.BlockStore(id) + conf.StoreIDWithRanges[id] = keyRange +} + +func (conf *grantLeaderSchedulerConfig) getKeyRangesByID(id uint64) []core.KeyRange { + conf.mu.RLock() + defer conf.mu.RUnlock() + if ranges, exist := conf.StoreIDWithRanges[id]; exist { + return ranges + } + return nil +} + // grantLeaderScheduler transfers all leaders to peers in the store. type grantLeaderScheduler struct { *BaseScheduler @@ -251,12 +267,15 @@ func (handler *grantLeaderHandler) UpdateConfig(w http.ResponseWriter, r *http.R idFloat, ok := input["store_id"].(float64) if ok { id = (uint64)(idFloat) + handler.config.mu.RLock() if _, exists = handler.config.StoreIDWithRanges[id]; !exists { if err := handler.config.cluster.BlockStore(id); err != nil { + handler.config.mu.RUnlock() handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } } + handler.config.mu.RUnlock() args = append(args, strconv.FormatUint(id, 10)) } @@ -270,6 +289,7 @@ func (handler *grantLeaderHandler) UpdateConfig(w http.ResponseWriter, r *http.R handler.config.BuildWithArgs(args) err := handler.config.Persist() if err != nil { + handler.config.removeStore(id) handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } @@ -290,10 +310,12 @@ func (handler *grantLeaderHandler) DeleteConfig(w http.ResponseWriter, r *http.R } var resp interface{} + keyRanges := handler.config.getKeyRangesByID(id) succ, last := handler.config.removeStore(id) if succ { err = handler.config.Persist() if err != nil { + handler.config.resetStore(id, keyRanges) handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } @@ -302,6 +324,7 @@ func (handler *grantLeaderHandler) DeleteConfig(w http.ResponseWriter, r *http.R if errors.ErrorEqual(err, errs.ErrSchedulerNotFound.FastGenByArgs()) { handler.rd.JSON(w, http.StatusNotFound, err) } else { + handler.config.resetStore(id, keyRanges) handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) } return