Skip to content

Commit 768e970

Browse files
committed
This is an automated cherry-pick of #3787
Signed-off-by: Ryan Leung <[email protected]>
1 parent b6ac46c commit 768e970

File tree

6 files changed

+93
-27
lines changed

6 files changed

+93
-27
lines changed

server/api/scheduler.go

+8-14
Original file line numberDiff line numberDiff line change
@@ -252,15 +252,11 @@ func (h *schedulerHandler) Delete(w http.ResponseWriter, r *http.Request) {
252252
name := mux.Vars(r)["name"]
253253
switch {
254254
case strings.HasPrefix(name, schedulers.EvictLeaderName) && name != schedulers.EvictLeaderName:
255-
if err := h.redirectSchedulerDelete(name, schedulers.EvictLeaderName); err != nil {
256-
h.handleErr(w, err)
257-
return
258-
}
255+
h.redirectSchedulerDelete(w, name, schedulers.EvictLeaderName)
256+
return
259257
case strings.HasPrefix(name, schedulers.GrantLeaderName) && name != schedulers.GrantLeaderName:
260-
if err := h.redirectSchedulerDelete(name, schedulers.GrantLeaderName); err != nil {
261-
h.handleErr(w, err)
262-
return
263-
}
258+
h.redirectSchedulerDelete(w, name, schedulers.GrantLeaderName)
259+
return
264260
default:
265261
if err := h.RemoveScheduler(name); err != nil {
266262
h.handleErr(w, err)
@@ -278,18 +274,16 @@ func (h *schedulerHandler) handleErr(w http.ResponseWriter, err error) {
278274
}
279275
}
280276

281-
func (h *schedulerHandler) redirectSchedulerDelete(name, schedulerName string) error {
277+
func (h *schedulerHandler) redirectSchedulerDelete(w http.ResponseWriter, name, schedulerName string) {
282278
args := strings.Split(name, "-")
283279
args = args[len(args)-1:]
284280
url := fmt.Sprintf("%s/%s/%s/delete/%s", h.GetAddr(), schedulerConfigPrefix, schedulerName, args[0])
285281
resp, err := doDelete(h.svr.GetHTTPClient(), url)
286282
if err != nil {
287-
return err
288-
}
289-
if resp.StatusCode != http.StatusOK {
290-
return errs.ErrSchedulerNotFound.FastGenByArgs()
283+
h.r.JSON(w, resp.StatusCode, err.Error())
284+
return
291285
}
292-
return nil
286+
h.r.JSON(w, resp.StatusCode, nil)
293287
}
294288

295289
// FIXME: details of input json body params

server/api/scheduler_test.go

+21-3
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"time"
2020

2121
. "github.com/pingcap/check"
22+
"github.com/pingcap/failpoint"
2223
"github.com/pingcap/kvproto/pkg/metapb"
2324
"github.com/tikv/pd/server"
2425
_ "github.com/tikv/pd/server/schedulers"
@@ -56,16 +57,27 @@ func (s *testScheduleSuite) TestOriginAPI(c *C) {
5657
body, err := json.Marshal(input)
5758
c.Assert(err, IsNil)
5859
c.Assert(postJSON(testDialClient, addURL, body), IsNil)
60+
rc := s.svr.GetRaftCluster()
61+
c.Assert(rc.GetSchedulers(), HasLen, 1)
62+
resp := make(map[string]interface{})
63+
listURL := fmt.Sprintf("%s%s%s/%s/list", s.svr.GetAddr(), apiPrefix, server.SchedulerConfigHandlerPath, "evict-leader-scheduler")
64+
c.Assert(readJSON(testDialClient, listURL, &resp), IsNil)
65+
c.Assert(resp["store-id-ranges"], HasLen, 1)
5966
input1 := make(map[string]interface{})
6067
input1["name"] = "evict-leader-scheduler"
6168
input1["store_id"] = 2
6269
body, err = json.Marshal(input1)
6370
c.Assert(err, IsNil)
71+
c.Assert(failpoint.Enable("github.com/tikv/pd/server/schedulers/persistFail", "return(true)"), IsNil)
72+
c.Assert(postJSON(testDialClient, addURL, body), NotNil)
73+
c.Assert(rc.GetSchedulers(), HasLen, 1)
74+
resp = make(map[string]interface{})
75+
c.Assert(readJSON(testDialClient, listURL, &resp), IsNil)
76+
c.Assert(resp["store-id-ranges"], HasLen, 1)
77+
c.Assert(failpoint.Disable("github.com/tikv/pd/server/schedulers/persistFail"), IsNil)
6478
c.Assert(postJSON(testDialClient, addURL, body), IsNil)
65-
rc := s.svr.GetRaftCluster()
6679
c.Assert(rc.GetSchedulers(), HasLen, 1)
67-
resp := make(map[string]interface{})
68-
listURL := fmt.Sprintf("%s%s%s/%s/list", s.svr.GetAddr(), apiPrefix, server.SchedulerConfigHandlerPath, "evict-leader-scheduler")
80+
resp = make(map[string]interface{})
6981
c.Assert(readJSON(testDialClient, listURL, &resp), IsNil)
7082
c.Assert(resp["store-id-ranges"], HasLen, 2)
7183
deleteURL := fmt.Sprintf("%s/%s", s.urlPrefix, "evict-leader-scheduler-1")
@@ -76,8 +88,14 @@ func (s *testScheduleSuite) TestOriginAPI(c *C) {
7688
c.Assert(readJSON(testDialClient, listURL, &resp1), IsNil)
7789
c.Assert(resp1["store-id-ranges"], HasLen, 1)
7890
deleteURL = fmt.Sprintf("%s/%s", s.urlPrefix, "evict-leader-scheduler-2")
91+
c.Assert(failpoint.Enable("github.com/tikv/pd/server/config/persistFail", "return(true)"), IsNil)
7992
res, err := doDelete(testDialClient, deleteURL)
8093
c.Assert(err, IsNil)
94+
c.Assert(res.StatusCode, Equals, 500)
95+
c.Assert(rc.GetSchedulers(), HasLen, 1)
96+
c.Assert(failpoint.Disable("github.com/tikv/pd/server/config/persistFail"), IsNil)
97+
res, err = doDelete(testDialClient, deleteURL)
98+
c.Assert(err, IsNil)
8199
c.Assert(res.StatusCode, Equals, 200)
82100
c.Assert(rc.GetSchedulers(), HasLen, 0)
83101
resp2 := make(map[string]interface{})

server/cluster/coordinator.go

+7-9
Original file line numberDiff line numberDiff line change
@@ -563,28 +563,26 @@ func (c *coordinator) removeScheduler(name string) error {
563563
return errs.ErrSchedulerNotFound.FastGenByArgs()
564564
}
565565

566-
s.Stop()
567-
schedulerStatusGauge.WithLabelValues(name, "allow").Set(0)
568-
delete(c.schedulers, name)
569-
570-
var err error
571566
opt := c.cluster.opt
572-
573-
if err = opt.RemoveSchedulerCfg(s.Ctx(), name); err != nil {
567+
if err := opt.RemoveSchedulerCfg(s.Ctx(), name); err != nil {
574568
log.Error("can not remove scheduler", zap.String("scheduler-name", name), errs.ZapError(err))
575569
return err
576570
}
577571

578-
if err = opt.Persist(c.cluster.storage); err != nil {
572+
if err := opt.Persist(c.cluster.storage); err != nil {
579573
log.Error("the option can not persist scheduler config", errs.ZapError(err))
580574
return err
581575
}
582576

583-
if err = c.cluster.storage.RemoveScheduleConfig(name); err != nil {
577+
if err := c.cluster.storage.RemoveScheduleConfig(name); err != nil {
584578
log.Error("can not remove the scheduler config", errs.ZapError(err))
585579
return err
586580
}
587581

582+
s.Stop()
583+
schedulerStatusGauge.WithLabelValues(name, "allow").Set(0)
584+
delete(c.schedulers, name)
585+
588586
return nil
589587
}
590588

server/config/persist_options.go

+7-1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ import (
2424
"unsafe"
2525

2626
"github.com/coreos/go-semver/semver"
27+
"github.com/pingcap/errors"
28+
"github.com/pingcap/failpoint"
2729
"github.com/pingcap/kvproto/pkg/metapb"
2830
"github.com/pingcap/log"
2931
"github.com/tikv/pd/pkg/cache"
@@ -572,7 +574,11 @@ func (o *PersistOptions) Persist(storage *core.Storage) error {
572574
LabelProperty: o.GetLabelPropertyConfig(),
573575
ClusterVersion: *o.GetClusterVersion(),
574576
}
575-
return storage.SaveConfig(cfg)
577+
err := storage.SaveConfig(cfg)
578+
failpoint.Inject("persistFail", func() {
579+
err = errors.New("fail to persist")
580+
})
581+
return err
576582
}
577583

578584
// Reload reloads the configuration from the storage.

server/schedulers/evict_leader.go

+27
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020

2121
"github.com/gorilla/mux"
2222
"github.com/pingcap/errors"
23+
"github.com/pingcap/failpoint"
2324
"github.com/pingcap/log"
2425
"github.com/tikv/pd/pkg/apiutil"
2526
"github.com/tikv/pd/pkg/errs"
@@ -117,6 +118,9 @@ func (conf *evictLeaderSchedulerConfig) Persist() error {
117118
conf.mu.RLock()
118119
defer conf.mu.RUnlock()
119120
data, err := schedule.EncodeConfig(conf)
121+
failpoint.Inject("persistFail", func() {
122+
err = errors.New("fail to persist")
123+
})
120124
if err != nil {
121125
return err
122126
}
@@ -153,6 +157,22 @@ func (conf *evictLeaderSchedulerConfig) removeStore(id uint64) (succ bool, last
153157
return succ, last
154158
}
155159

160+
func (conf *evictLeaderSchedulerConfig) resetStore(id uint64, keyRange []core.KeyRange) {
161+
conf.mu.Lock()
162+
defer conf.mu.Unlock()
163+
conf.cluster.BlockStore(id)
164+
conf.StoreIDWithRanges[id] = keyRange
165+
}
166+
167+
func (conf *evictLeaderSchedulerConfig) getKeyRangesByID(id uint64) []core.KeyRange {
168+
conf.mu.RLock()
169+
defer conf.mu.RUnlock()
170+
if ranges, exist := conf.StoreIDWithRanges[id]; exist {
171+
return ranges
172+
}
173+
return nil
174+
}
175+
156176
type evictLeaderScheduler struct {
157177
*BaseScheduler
158178
conf *evictLeaderSchedulerConfig
@@ -301,12 +321,15 @@ func (handler *evictLeaderHandler) UpdateConfig(w http.ResponseWriter, r *http.R
301321
idFloat, ok := input["store_id"].(float64)
302322
if ok {
303323
id = (uint64)(idFloat)
324+
handler.config.mu.RLock()
304325
if _, exists = handler.config.StoreIDWithRanges[id]; !exists {
305326
if err := handler.config.cluster.BlockStore(id); err != nil {
327+
handler.config.mu.RUnlock()
306328
handler.rd.JSON(w, http.StatusInternalServerError, err.Error())
307329
return
308330
}
309331
}
332+
handler.config.mu.RUnlock()
310333
args = append(args, strconv.FormatUint(id, 10))
311334
}
312335

@@ -320,6 +343,7 @@ func (handler *evictLeaderHandler) UpdateConfig(w http.ResponseWriter, r *http.R
320343
handler.config.BuildWithArgs(args)
321344
err := handler.config.Persist()
322345
if err != nil {
346+
handler.config.removeStore(id)
323347
handler.rd.JSON(w, http.StatusInternalServerError, err.Error())
324348
return
325349
}
@@ -340,10 +364,12 @@ func (handler *evictLeaderHandler) DeleteConfig(w http.ResponseWriter, r *http.R
340364
}
341365

342366
var resp interface{}
367+
keyRanges := handler.config.getKeyRangesByID(id)
343368
succ, last := handler.config.removeStore(id)
344369
if succ {
345370
err = handler.config.Persist()
346371
if err != nil {
372+
handler.config.resetStore(id, keyRanges)
347373
handler.rd.JSON(w, http.StatusInternalServerError, err.Error())
348374
return
349375
}
@@ -352,6 +378,7 @@ func (handler *evictLeaderHandler) DeleteConfig(w http.ResponseWriter, r *http.R
352378
if errors.ErrorEqual(err, errs.ErrSchedulerNotFound.FastGenByArgs()) {
353379
handler.rd.JSON(w, http.StatusNotFound, err)
354380
} else {
381+
handler.config.resetStore(id, keyRanges)
355382
handler.rd.JSON(w, http.StatusInternalServerError, err.Error())
356383
}
357384
return

server/schedulers/grant_leader.go

+23
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,22 @@ func (conf *grantLeaderSchedulerConfig) removeStore(id uint64) (succ bool, last
147147
return succ, last
148148
}
149149

150+
func (conf *grantLeaderSchedulerConfig) resetStore(id uint64, keyRange []core.KeyRange) {
151+
conf.mu.Lock()
152+
defer conf.mu.Unlock()
153+
conf.cluster.BlockStore(id)
154+
conf.StoreIDWithRanges[id] = keyRange
155+
}
156+
157+
func (conf *grantLeaderSchedulerConfig) getKeyRangesByID(id uint64) []core.KeyRange {
158+
conf.mu.RLock()
159+
defer conf.mu.RUnlock()
160+
if ranges, exist := conf.StoreIDWithRanges[id]; exist {
161+
return ranges
162+
}
163+
return nil
164+
}
165+
150166
// grantLeaderScheduler transfers all leaders to peers in the store.
151167
type grantLeaderScheduler struct {
152168
*BaseScheduler
@@ -251,12 +267,15 @@ func (handler *grantLeaderHandler) UpdateConfig(w http.ResponseWriter, r *http.R
251267
idFloat, ok := input["store_id"].(float64)
252268
if ok {
253269
id = (uint64)(idFloat)
270+
handler.config.mu.RLock()
254271
if _, exists = handler.config.StoreIDWithRanges[id]; !exists {
255272
if err := handler.config.cluster.BlockStore(id); err != nil {
273+
handler.config.mu.RUnlock()
256274
handler.rd.JSON(w, http.StatusInternalServerError, err.Error())
257275
return
258276
}
259277
}
278+
handler.config.mu.RUnlock()
260279
args = append(args, strconv.FormatUint(id, 10))
261280
}
262281

@@ -270,6 +289,7 @@ func (handler *grantLeaderHandler) UpdateConfig(w http.ResponseWriter, r *http.R
270289
handler.config.BuildWithArgs(args)
271290
err := handler.config.Persist()
272291
if err != nil {
292+
handler.config.removeStore(id)
273293
handler.rd.JSON(w, http.StatusInternalServerError, err.Error())
274294
return
275295
}
@@ -290,10 +310,12 @@ func (handler *grantLeaderHandler) DeleteConfig(w http.ResponseWriter, r *http.R
290310
}
291311

292312
var resp interface{}
313+
keyRanges := handler.config.getKeyRangesByID(id)
293314
succ, last := handler.config.removeStore(id)
294315
if succ {
295316
err = handler.config.Persist()
296317
if err != nil {
318+
handler.config.resetStore(id, keyRanges)
297319
handler.rd.JSON(w, http.StatusInternalServerError, err.Error())
298320
return
299321
}
@@ -302,6 +324,7 @@ func (handler *grantLeaderHandler) DeleteConfig(w http.ResponseWriter, r *http.R
302324
if errors.ErrorEqual(err, errs.ErrSchedulerNotFound.FastGenByArgs()) {
303325
handler.rd.JSON(w, http.StatusNotFound, err)
304326
} else {
327+
handler.config.resetStore(id, keyRanges)
305328
handler.rd.JSON(w, http.StatusInternalServerError, err.Error())
306329
}
307330
return

0 commit comments

Comments
 (0)