Skip to content

Commit

Permalink
handle mismatch peer id (#935)
Browse files Browse the repository at this point in the history
Signed-off-by: you06 <[email protected]>
  • Loading branch information
you06 authored Aug 14, 2023
1 parent 44f5025 commit a0ac170
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 0 deletions.
14 changes: 14 additions & 0 deletions internal/locate/region_request.go
Original file line number Diff line number Diff line change
Expand Up @@ -1882,6 +1882,8 @@ func regionErrorToLabel(e *errorpb.Error) string {
return "peer_is_witness"
} else if isDeadlineExceeded(e) {
return "deadline_exceeded"
} else if e.GetMismatchPeerId() != nil {
return "mismatch_peer_id"
}
return "unknown"
}
Expand Down Expand Up @@ -2173,6 +2175,18 @@ func (s *RegionRequestSender) onRegionError(
s.replicaSelector.onDeadlineExceeded()
}

if mismatch := regionErr.GetMismatchPeerId(); mismatch != nil {
logutil.Logger(bo.GetCtx()).Warn(
"tikv reports `MismatchPeerId`, invalidate region cache",
zap.Uint64("req peer id", mismatch.GetRequestPeerId()),
zap.Uint64("store peer id", mismatch.GetStorePeerId()),
)
if s.replicaSelector != nil {
s.replicaSelector.invalidateRegion()
}
return false, nil
}

logutil.Logger(bo.GetCtx()).Debug(
"tikv reports region failed",
zap.Stringer("regionErr", regionErr),
Expand Down
49 changes: 49 additions & 0 deletions internal/locate/region_request3_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1376,3 +1376,52 @@ func (s *testRegionRequestToThreeStoresSuite) TestStaleReadFallback2Follower() {
}
}
}

func (s *testRegionRequestToThreeStoresSuite) TestReplicaReadFallbackToLeaderRegionError() {
regionLoc, err := s.cache.LocateRegionByID(s.bo, s.regionID)
s.Nil(err)
s.NotNil(regionLoc)

s.regionRequestSender.client = &fnClient{fn: func(ctx context.Context, addr string, req *tikvrpc.Request, timeout time.Duration) (response *tikvrpc.Response, err error) {
select {
case <-ctx.Done():
return nil, errors.New("timeout")
default:
}
// Return `mismatch peer id` when accesses the leader.
if addr == s.cluster.GetStore(s.storeIDs[0]).Address {
return &tikvrpc.Response{Resp: &kvrpcpb.GetResponse{RegionError: &errorpb.Error{
MismatchPeerId: &errorpb.MismatchPeerId{
RequestPeerId: 1,
StorePeerId: 2,
},
}}}, nil
}
return &tikvrpc.Response{Resp: &kvrpcpb.GetResponse{RegionError: &errorpb.Error{
DataIsNotReady: &errorpb.DataIsNotReady{},
}}}, nil
}}

region := s.cache.getRegionByIDFromCache(regionLoc.Region.GetID())
s.True(region.isValid())

req := tikvrpc.NewReplicaReadRequest(tikvrpc.CmdGet, &kvrpcpb.GetRequest{Key: []byte("key")}, kv.ReplicaReadLeader, nil)
req.ReadReplicaScope = oracle.GlobalTxnScope
req.TxnScope = oracle.GlobalTxnScope
req.EnableStaleRead()
req.ReplicaReadType = kv.ReplicaReadFollower

ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
bo := retry.NewBackoffer(ctx, -1)
s.Nil(err)
resp, _, _, err := s.regionRequestSender.SendReqCtx(bo, req, regionLoc.Region, time.Second, tikvrpc.TiKV)
s.Nil(err)
regionErr, err := resp.GetRegionError()
s.Nil(err)
s.Equal(regionErrorToLabel(regionErr), "mismatch_peer_id")
// return non-epoch-not-match region error and the upper layer can auto retry.
s.Nil(regionErr.GetEpochNotMatch())
// after region error returned, the region should be invalidated.
s.False(region.isValid())
}

0 comments on commit a0ac170

Please sign in to comment.