From 9e1b9eb6305b2f3768fb9959b86e17a78fb75ef2 Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Tue, 14 May 2024 08:49:13 +0800 Subject: [PATCH] Fix the issue that leader change with new leader info cause invalidStore state and lead to unnecessary backoff (#1115) (#1337) * Fix the issue that leader change with nea leader info cause invalidStore state and lead to unnecessary backoff (#1115) * Fix the issue that leader change with nea leader info cause invalidStore state and lead to unnecessary backoff Signed-off-by: MyonKeminta * address comments Signed-off-by: MyonKeminta --------- Signed-off-by: MyonKeminta Co-authored-by: MyonKeminta * Fix build on go1.18 Signed-off-by: MyonKeminta --------- Signed-off-by: MyonKeminta Co-authored-by: MyonKeminta --- internal/locate/region_request.go | 15 ++++- internal/locate/region_request3_test.go | 76 +++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/internal/locate/region_request.go b/internal/locate/region_request.go index 514c76e17d..a072e13835 100644 --- a/internal/locate/region_request.go +++ b/internal/locate/region_request.go @@ -40,11 +40,13 @@ import ( "fmt" "math" "math/rand" + "reflect" "strconv" "strings" "sync" "sync/atomic" "time" + "unsafe" "go.uber.org/zap" "google.golang.org/grpc/codes" @@ -1005,6 +1007,17 @@ func (s *replicaSelector) proxyReplica() *replica { return nil } +// sliceIdentical checks whether two slices are referencing the same block of memory. Two `nil`s are also considered +// the same. +func sliceIdentical[T any](a, b []T) bool { + if len(a) != len(b) { + return false + } + aHeader := (*reflect.SliceHeader)(unsafe.Pointer(&a)) + bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&b)) + return aHeader.Data == bHeader.Data +} + func (s *replicaSelector) refreshRegionStore() { oldRegionStore := s.regionStore newRegionStore := s.region.getStore() @@ -1017,7 +1030,7 @@ func (s *replicaSelector) refreshRegionStore() { // So we just compare the address here. // When stores change, we mark this replicaSelector as invalid to let the caller // recreate a new replicaSelector. - if &oldRegionStore.stores != &newRegionStore.stores { + if !sliceIdentical(oldRegionStore.stores, newRegionStore.stores) { s.state = &invalidStore{} return } diff --git a/internal/locate/region_request3_test.go b/internal/locate/region_request3_test.go index 2cb73d0597..fdd5ae02b2 100644 --- a/internal/locate/region_request3_test.go +++ b/internal/locate/region_request3_test.go @@ -137,6 +137,82 @@ func (s *testRegionRequestToThreeStoresSuite) TestSwitchPeerWhenNoLeader() { resp, _, err := s.regionRequestSender.SendReq(bo, req, loc.Region, time.Second) s.Nil(err) s.NotNil(resp) + s.Nil(resp.GetRegionError()) +} + +func (s *testRegionRequestToThreeStoresSuite) TestSwitchPeerWhenNoLeaderErrorWithNewLeaderInfo() { + cnt := 0 + var location *KeyLocation + cli := &fnClient{fn: func(ctx context.Context, addr string, req *tikvrpc.Request, timeout time.Duration) (response *tikvrpc.Response, err error) { + cnt++ + switch cnt { + case 1: + region := s.cache.GetCachedRegionWithRLock(location.Region) + s.NotNil(region) + leaderPeerIdx := int(region.getStore().workTiKVIdx) + peers := region.meta.Peers + // return no leader with new leader info + response = &tikvrpc.Response{Resp: &kvrpcpb.GetResponse{ + RegionError: &errorpb.Error{NotLeader: &errorpb.NotLeader{ + RegionId: req.RegionId, + Leader: peers[(leaderPeerIdx+1)%len(peers)], + }}, + }} + case 2: + response = &tikvrpc.Response{Resp: &kvrpcpb.GetResponse{ + Value: []byte("a"), + }} + default: + return nil, fmt.Errorf("unexpected request") + } + return response, err + }} + + req := tikvrpc.NewRequest(tikvrpc.CmdGet, &kvrpcpb.GetRequest{Key: []byte("a")}, kvrpcpb.Context{}) + req.ReplicaReadType = kv.ReplicaReadLeader + var err error + location, err = s.cache.LocateKey(s.bo, []byte("a")) + s.Nil(err) + s.NotNil(location) + bo := retry.NewBackoffer(context.Background(), 1000) + resp, _, err := NewRegionRequestSender(s.cache, cli).SendReqCtx(bo, req, location.Region, time.Second, tikvrpc.TiKV) + s.Nil(err) + s.NotNil(resp) + regionErr, err := resp.GetRegionError() + s.Nil(err) + s.Nil(regionErr) + // It's unreasoneable to retry in upper layer, such as cop request, the upper layer will need to rebuild cop request and retry, there are some unnecessary overhead. + s.Equal(cnt, 2) + r := s.cache.GetCachedRegionWithRLock(location.Region) + s.True(r.isValid()) +} + +func (s *testRegionRequestToThreeStoresSuite) TestSliceIdentical() { + a := make([]int, 0) + b := a + s.True(sliceIdentical(a, b)) + // This is not guaranteed. + // b = make([]int, 0) + // s.False(sliceIdentical(a, b)) + + a = append(a, 1, 2, 3) + b = a + s.True(sliceIdentical(a, b)) + b = a[:2] + s.False(sliceIdentical(a, b)) + b = a[1:] + s.False(sliceIdentical(a, b)) + a = a[1:] + s.True(sliceIdentical(a, b)) + + a = nil + b = nil + + s.True(sliceIdentical(a, b)) + a = make([]int, 0) + s.False(sliceIdentical(a, b)) + a = append(a, 1) + s.False(sliceIdentical(a, b)) } func (s *testRegionRequestToThreeStoresSuite) loadAndGetLeaderStore() (*Store, string) {