From de0cabb687ef59a9dc0bc6389a93c55b8f0020ba Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Fri, 1 Feb 2019 15:43:02 +0800 Subject: [PATCH] *: check region epoch strictly (#4125) Checks region epoch strictly to avoid KeyNotInRegion errors. A 3 nodes TiKV cluster with merge enabled, after commit merge, TiKV A tells TiDB with a epoch not match error contains the latest target Region info, TiDB updates its region cache and sends requests to TiKV B, and TiKV B has not applied commit merge yet, since the region epoch in request is higher than TiKV B, the request must be denied due to epoch not match, so it does not read on a stale snapshot, thus avoid the KeyNotInRegion error. Signed-off-by: Neil Shen --- Cargo.lock | 2 +- components/test_raftstore/src/cluster.rs | 4 +- components/test_raftstore/src/pd.rs | 2 +- src/import/errors.rs | 8 +- src/import/import.rs | 21 ++--- src/import/prepare.rs | 24 +++--- src/raftstore/errors.rs | 14 ++-- src/raftstore/store/fsm/apply.rs | 31 ++++--- src/raftstore/store/fsm/peer.rs | 8 +- src/raftstore/store/local_metrics.rs | 12 +-- src/raftstore/store/peer.rs | 2 +- src/raftstore/store/util.rs | 50 ++++++++---- src/raftstore/store/worker/read.rs | 2 +- src/storage/engine/metrics.rs | 4 +- src/storage/mod.rs | 8 +- tests/integrations/raftstore/test_multi.rs | 2 +- tests/integrations/raftstore/test_single.rs | 2 +- .../raftstore/test_split_region.rs | 80 ++++++++++++------- 18 files changed, 161 insertions(+), 115 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ea8dc3dd6ab..4b30cd4c223 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -890,7 +890,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.1" -source = "git+https://github.com/pingcap/kvproto.git#be0b43ee9241370928a23ff20e5ddd4dd11e274d" +source = "git+https://github.com/pingcap/kvproto.git#7e329e0c9e322cb1aad084db67267d4d5508081f" dependencies = [ "futures 0.1.23 (registry+https://github.com/rust-lang/crates.io-index)", "grpcio 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 1ab05a99de0..b8bc474fea5 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -567,7 +567,7 @@ impl Cluster { } let resp = result.unwrap(); - if resp.get_header().get_error().has_stale_epoch() { + if resp.get_header().get_error().has_epoch_not_match() { warn!("seems split, let's retry"); sleep_ms(100); continue; @@ -832,7 +832,7 @@ impl Cluster { let mut resp = write_resp.response; if resp.get_header().has_error() { let error = resp.get_header().get_error(); - if error.has_stale_epoch() + if error.has_epoch_not_match() || error.has_not_leader() || error.has_stale_command() { diff --git a/components/test_raftstore/src/pd.rs b/components/test_raftstore/src/pd.rs index 7c1153ca612..df5cdb7bf6e 100644 --- a/components/test_raftstore/src/pd.rs +++ b/components/test_raftstore/src/pd.rs @@ -610,7 +610,7 @@ fn check_stale_region(region: &metapb::Region, check_region: &metapb::Region) -> } Err(box_err!( - "stale epoch {:?}, we are now {:?}", + "epoch not match {:?}, we are now {:?}", check_epoch, epoch )) diff --git a/src/import/errors.rs b/src/import/errors.rs index d1dd90f0827..38e07754fc9 100644 --- a/src/import/errors.rs +++ b/src/import/errors.rs @@ -98,7 +98,7 @@ quick_error! { display("TikvRPC {:?}", err) } NotLeader(new_leader: Option) {} - StaleEpoch(new_regions: Vec) {} + EpochNotMatch(current_regions: Vec) {} UpdateRegion(new_region: RegionInfo) {} ImportJobFailed(tag: String) { display("{}", tag) @@ -124,9 +124,9 @@ impl From for Error { } else { Error::NotLeader(None) } - } else if err.has_stale_epoch() { - let mut error = err.take_stale_epoch(); - Error::StaleEpoch(error.take_new_regions().to_vec()) + } else if err.has_epoch_not_match() { + let mut error = err.take_epoch_not_match(); + Error::EpochNotMatch(error.take_current_regions().to_vec()) } else { Error::TikvRPC(err) } diff --git a/src/import/import.rs b/src/import/import.rs index 1e1b75bf93d..f26baae95c4 100644 --- a/src/import/import.rs +++ b/src/import/import.rs @@ -320,21 +320,24 @@ impl ImportSSTJob { region.leader = new_leader; Err(Error::UpdateRegion(region)) } - Err(Error::StaleEpoch(new_regions)) => { - let new_region = new_regions + Err(Error::EpochNotMatch(current_regions)) => { + let current_region = current_regions .iter() .find(|&r| self.sst.inside_region(r)) .cloned(); - match new_region { - Some(new_region) => { + match current_region { + Some(current_region) => { let new_leader = region .leader - .and_then(|p| find_region_peer(&new_region, p.get_store_id())); - Err(Error::UpdateRegion(RegionInfo::new(new_region, new_leader))) + .and_then(|p| find_region_peer(¤t_region, p.get_store_id())); + Err(Error::UpdateRegion(RegionInfo::new( + current_region, + new_leader, + ))) } None => { - warn!("{} stale epoch {:?}", self.tag, new_regions); - Err(Error::StaleEpoch(new_regions)) + warn!("{} epoch not match {:?}", self.tag, current_region); + Err(Error::EpochNotMatch(current_regions)) } } } @@ -373,7 +376,7 @@ impl ImportSSTJob { Ok(()) } else { match Error::from(resp.take_error()) { - e @ Error::NotLeader(_) | e @ Error::StaleEpoch(_) => return Err(e), + e @ Error::NotLeader(_) | e @ Error::EpochNotMatch(_) => return Err(e), e => Err(e), } } diff --git a/src/import/prepare.rs b/src/import/prepare.rs index 694a8caeb10..792da77aa6e 100644 --- a/src/import/prepare.rs +++ b/src/import/prepare.rs @@ -192,18 +192,24 @@ impl PrepareRangeJob { region.leader = new_leader; Err(Error::UpdateRegion(region)) } - Err(Error::StaleEpoch(new_regions)) => { - let new_region = new_regions.iter().find(|&r| self.need_split(r)).cloned(); - match new_region { - Some(new_region) => { + Err(Error::EpochNotMatch(current_regions)) => { + let current_region = current_regions + .iter() + .find(|&r| self.need_split(r)) + .cloned(); + match current_region { + Some(current_region) => { let new_leader = region .leader - .and_then(|p| find_region_peer(&new_region, p.get_store_id())); - Err(Error::UpdateRegion(RegionInfo::new(new_region, new_leader))) + .and_then(|p| find_region_peer(¤t_region, p.get_store_id())); + Err(Error::UpdateRegion(RegionInfo::new( + current_region, + new_leader, + ))) } None => { - warn!("{} stale epoch {:?}", self.tag, new_regions); - Err(Error::StaleEpoch(new_regions)) + warn!("{} epoch not match {:?}", self.tag, current_regions); + Err(Error::EpochNotMatch(current_regions)) } } } @@ -231,7 +237,7 @@ impl PrepareRangeJob { Ok(resp) } else { match Error::from(resp.take_region_error()) { - e @ Error::NotLeader(_) | e @ Error::StaleEpoch(_) => return Err(e), + e @ Error::NotLeader(_) | e @ Error::EpochNotMatch(_) => return Err(e), e => Err(e), } } diff --git a/src/raftstore/errors.rs b/src/raftstore/errors.rs index 1b6de1ce352..839006c4310 100644 --- a/src/raftstore/errors.rs +++ b/src/raftstore/errors.rs @@ -115,9 +115,9 @@ quick_error! { description("request timeout") display("Timeout {}", msg) } - StaleEpoch(msg: String, new_regions: Vec) { - description("region is stale") - display("StaleEpoch {}", msg) + EpochNotMatch(msg: String, new_regions: Vec) { + description("region epoch is not match") + display("EpochNotMatch {}", msg) } StaleCommand { description("stale command") @@ -186,10 +186,10 @@ impl Into for Error { .mut_key_not_in_region() .set_end_key(region.get_end_key().to_vec()); } - Error::StaleEpoch(_, new_regions) => { - let mut e = errorpb::StaleEpoch::new(); - e.set_new_regions(RepeatedField::from_vec(new_regions)); - errorpb.set_stale_epoch(e); + Error::EpochNotMatch(_, new_regions) => { + let mut e = errorpb::EpochNotMatch::new(); + e.set_current_regions(RepeatedField::from_vec(new_regions)); + errorpb.set_epoch_not_match(e); } Error::StaleCommand => { errorpb.set_stale_command(errorpb::StaleCommand::new()); diff --git a/src/raftstore/store/fsm/apply.rs b/src/raftstore/store/fsm/apply.rs index 6fcc2d006ef..9a9d5edb4e5 100644 --- a/src/raftstore/store/fsm/apply.rs +++ b/src/raftstore/store/fsm/apply.rs @@ -883,7 +883,7 @@ impl ApplyDelegate { /// /// An apply operation can fail in the following situations: /// 1. it encounters an error that will occur on all stores, it can continue - /// applying next entry safely, like stale epoch for example; + /// applying next entry safely, like epoch not match for example; /// 2. it encounters an error that may not occur on all stores, in this case /// we should try to apply the entry again or panic. Considering that this /// usually due to disk operation fail, which is rare, so just panic is ok. @@ -905,7 +905,7 @@ impl ApplyDelegate { // clear dirty values. ctx.wb_mut().rollback_to_save_point().unwrap(); match e { - Error::StaleEpoch(..) => debug!("{} stale epoch err: {:?}", self.tag, e), + Error::EpochNotMatch(..) => debug!("{} epoch not match err: {:?}", self.tag, e), _ => error!("{} execute raft command err: {:?}", self.tag, e), } (cmd_resp::new_error(e), ApplyResult::None) @@ -986,7 +986,7 @@ impl ApplyDelegate { ctx: &mut ApplyContext, req: RaftCmdRequest, ) -> Result<(RaftCmdResponse, ApplyResult)> { - // Include region for stale epoch after merge may cause key not in range. + // Include region for epoch not match after merge may cause key not in range. let include_region = req.get_header().get_region_epoch().get_version() >= self.last_merge_version; check_region_epoch(&req, &self.region, include_region)?; @@ -1995,7 +1995,7 @@ fn check_sst_for_ingestion(sst: &SSTMeta, region: &Region) -> Result<()> { || epoch.get_version() != region_epoch.get_version() { let error = format!("{:?} != {:?}", epoch, region_epoch); - return Err(Error::StaleEpoch(error, vec![region.clone()])); + return Err(Error::EpochNotMatch(error, vec![region.clone()])); } let range = sst.get_range(); @@ -3180,7 +3180,7 @@ mod tests { .build(); router.schedule_task(1, Msg::apply(Apply::new(1, 2, vec![put_entry]))); let resp = capture_rx.recv_timeout(Duration::from_secs(3)).unwrap(); - assert!(resp.get_header().get_error().has_stale_epoch()); + assert!(resp.get_header().get_error().has_epoch_not_match()); let apply_res = fetch_apply_res(&rx); assert_eq!(apply_res.applied_index_term, 2); assert_eq!(apply_res.apply_state.get_applied_index(), 3); @@ -3290,12 +3290,12 @@ mod tests { .ingest_sst(&meta1) .epoch(0, 3) .build(); - let ingest_stale_epoch = EntryBuilder::new(11, 3) + let ingest_epoch_not_match = EntryBuilder::new(11, 3) .capture_resp(&router, 3, 1, capture_tx.clone()) .ingest_sst(&meta2) .epoch(0, 3) .build(); - let entries = vec![put_ok, ingest_ok, ingest_stale_epoch]; + let entries = vec![put_ok, ingest_ok, ingest_epoch_not_match]; router.schedule_task(1, Msg::apply(Apply::new(1, 3, entries))); let resp = capture_rx.recv_timeout(Duration::from_secs(3)).unwrap(); assert!(!resp.get_header().has_error(), "{:?}", resp); @@ -3528,9 +3528,6 @@ mod tests { let resp = exec_split(&router, splits.clone()); // All requests should be checked. assert!(error_msg(&resp).contains("id count"), "{:?}", resp); - - let mut new_version = epoch.borrow().get_version() + 1; - epoch.borrow_mut().set_version(new_version); let checker = SplitResultChecker { db: &engines.kv, origin_peers: &peers, @@ -3544,11 +3541,11 @@ mod tests { let resp = exec_split(&router, splits.clone()); // Split should succeed. assert!(!resp.get_header().has_error(), "{:?}", resp); + let mut new_version = epoch.borrow().get_version() + 1; + epoch.borrow_mut().set_version(new_version); checker.check(b"", b"k1", 8, &[9, 10, 11], true); checker.check(b"k1", b"k5", 1, &[3, 5, 7], false); - new_version = epoch.borrow().get_version() + 1; - epoch.borrow_mut().set_version(new_version); splits.mut_requests().clear(); splits .mut_requests() @@ -3557,11 +3554,11 @@ mod tests { let resp = exec_split(&router, splits.clone()); // Right derive should be respected. assert!(!resp.get_header().has_error(), "{:?}", resp); + new_version = epoch.borrow().get_version() + 1; + epoch.borrow_mut().set_version(new_version); checker.check(b"k4", b"k5", 12, &[13, 14, 15], true); checker.check(b"k1", b"k4", 1, &[3, 5, 7], false); - new_version = epoch.borrow().get_version() + 2; - epoch.borrow_mut().set_version(new_version); splits.mut_requests().clear(); splits .mut_requests() @@ -3573,12 +3570,12 @@ mod tests { let resp = exec_split(&router, splits.clone()); // Right derive should be respected. assert!(!resp.get_header().has_error(), "{:?}", resp); + new_version = epoch.borrow().get_version() + 2; + epoch.borrow_mut().set_version(new_version); checker.check(b"k1", b"k2", 16, &[17, 18, 19], true); checker.check(b"k2", b"k3", 20, &[21, 22, 23], true); checker.check(b"k3", b"k4", 1, &[3, 5, 7], false); - new_version = epoch.borrow().get_version() + 2; - epoch.borrow_mut().set_version(new_version); splits.mut_requests().clear(); splits .mut_requests() @@ -3590,6 +3587,8 @@ mod tests { let resp = exec_split(&router, splits.clone()); // Right derive should be respected. assert!(!resp.get_header().has_error(), "{:?}", resp); + new_version = epoch.borrow().get_version() + 2; + epoch.borrow_mut().set_version(new_version); checker.check(b"k3", b"k31", 1, &[3, 5, 7], false); checker.check(b"k31", b"k32", 24, &[25, 26, 27], true); checker.check(b"k32", b"k4", 28, &[29, 30, 31], true); diff --git a/src/raftstore/store/fsm/peer.rs b/src/raftstore/store/fsm/peer.rs index 76f5bddc15e..b7e5494cf01 100644 --- a/src/raftstore/store/fsm/peer.rs +++ b/src/raftstore/store/fsm/peer.rs @@ -1927,7 +1927,7 @@ impl<'a, T: Transport, C: PdClient> PeerFsmDelegate<'a, T, C> { } match util::check_region_epoch(msg, self.fsm.peer.region(), true) { - Err(Error::StaleEpoch(msg, mut new_regions)) => { + Err(Error::EpochNotMatch(msg, mut new_regions)) => { // Attach the region which might be split from the current region. But it doesn't // matter if the region is not split from the current region. If the region meta // received by the TiKV driver is newer than the meta cached in the driver, the meta is @@ -1936,8 +1936,8 @@ impl<'a, T: Transport, C: PdClient> PeerFsmDelegate<'a, T, C> { if let Some(sibling_region) = sibling_region { new_regions.push(sibling_region); } - self.ctx.raft_metrics.invalid_proposal.stale_epoch += 1; - Err(Error::StaleEpoch(msg, new_regions)) + self.ctx.raft_metrics.invalid_proposal.epoch_not_match += 1; + Err(Error::EpochNotMatch(msg, new_regions)) } Err(e) => Err(e), Ok(()) => Ok(None), @@ -2230,7 +2230,7 @@ impl<'a, T: Transport, C: PdClient> PeerFsmDelegate<'a, T, C> { region.get_region_epoch(), epoch ); - return Err(Error::StaleEpoch( + return Err(Error::EpochNotMatch( format!( "{} epoch changed {:?} != {:?}, retry later", self.fsm.peer.tag, latest_epoch, epoch diff --git a/src/raftstore/store/local_metrics.rs b/src/raftstore/store/local_metrics.rs index 1722213ce68..ae62c0a1fee 100644 --- a/src/raftstore/store/local_metrics.rs +++ b/src/raftstore/store/local_metrics.rs @@ -312,7 +312,7 @@ pub struct RaftInvalidProposeMetrics { pub not_leader: u64, pub mismatch_peer_id: u64, pub stale_command: u64, - pub stale_epoch: u64, + pub epoch_not_match: u64, } impl Default for RaftInvalidProposeMetrics { @@ -323,7 +323,7 @@ impl Default for RaftInvalidProposeMetrics { not_leader: 0, mismatch_peer_id: 0, stale_command: 0, - stale_epoch: 0, + epoch_not_match: 0, } } } @@ -360,11 +360,11 @@ impl RaftInvalidProposeMetrics { .inc_by(self.stale_command as i64); self.stale_command = 0; } - if self.stale_epoch > 0 { + if self.epoch_not_match > 0 { RAFT_INVALID_PROPOSAL_COUNTER_VEC - .with_label_values(&["stale_epoch"]) - .inc_by(self.stale_epoch as i64); - self.stale_epoch = 0; + .with_label_values(&["epoch_not_match"]) + .inc_by(self.epoch_not_match as i64); + self.epoch_not_match = 0; } } } diff --git a/src/raftstore/store/peer.rs b/src/raftstore/store/peer.rs index a79243ed20e..22f464e2df3 100644 --- a/src/raftstore/store/peer.rs +++ b/src/raftstore/store/peer.rs @@ -2108,7 +2108,7 @@ impl ReadExecutor { pub fn execute(&mut self, msg: &RaftCmdRequest, region: &metapb::Region) -> ReadResponse { if self.check_epoch { if let Err(e) = check_region_epoch(msg, region, true) { - debug!("[region {}] stale epoch err: {:?}", region.get_id(), e); + debug!("[region {}] epoch not match err: {:?}", region.get_id(), e); return ReadResponse { response: cmd_resp::new_error(e), snapshot: None, diff --git a/src/raftstore/store/util.rs b/src/raftstore/store/util.rs index 8cad19d53b0..eeee0c8f802 100644 --- a/src/raftstore/store/util.rs +++ b/src/raftstore/store/util.rs @@ -278,29 +278,37 @@ pub fn check_region_epoch( } let from_epoch = req.get_header().get_region_epoch(); - let latest_epoch = region.get_region_epoch(); + let current_epoch = region.get_region_epoch(); - // should we use not equal here? - if (check_conf_ver && from_epoch.get_conf_ver() < latest_epoch.get_conf_ver()) - || (check_ver && from_epoch.get_version() < latest_epoch.get_version()) + // We must check epochs strictly to avoid key not in region error. + // + // A 3 nodes TiKV cluster with merge enabled, after commit merge, TiKV A + // tells TiDB with a epoch not match error contains the latest target Region + // info, TiDB updates its region cache and sends requests to TiKV B, + // and TiKV B has not applied commit merge yet, since the region epoch in + // request is higher than TiKV B, the request must be denied due to epoch + // not match, so it does not read on a stale snapshot, thus avoid the + // KeyNotInRegion error. + if (check_conf_ver && from_epoch.get_conf_ver() != current_epoch.get_conf_ver()) + || (check_ver && from_epoch.get_version() != current_epoch.get_version()) { debug!( - "[region {}] received stale epoch {:?}, mine: {:?}", - region.get_id(), - from_epoch, - latest_epoch + "epoch not match"; + "region_id" => region.get_id(), + "from_epoch" => ?from_epoch, + "current_epoch" => ?current_epoch, ); let regions = if include_region { vec![region.to_owned()] } else { vec![] }; - return Err(Error::StaleEpoch( + return Err(Error::EpochNotMatch( format!( - "latest_epoch of region {} is {:?}, but you \ + "current epoch of region {} is {:?}, but you \ sent {:?}", region.get_id(), - latest_epoch, + current_epoch, from_epoch ), regions, @@ -1694,8 +1702,14 @@ mod tests { req.mut_header() .set_region_epoch(stale_version_epoch.clone()); check_region_epoch(&req, &stale_region, false).unwrap(); - check_region_epoch(&req, ®ion, false).unwrap_err(); - check_region_epoch(&req, ®ion, true).unwrap_err(); + + let mut latest_version_epoch = epoch.clone(); + latest_version_epoch.set_version(3); + for epoch in &[stale_version_epoch, latest_version_epoch] { + req.mut_header().set_region_epoch(epoch.clone()); + check_region_epoch(&req, ®ion, false).unwrap_err(); + check_region_epoch(&req, ®ion, true).unwrap_err(); + } } // These admin commands requires epoch.conf_version. @@ -1722,8 +1736,14 @@ mod tests { stale_region.set_region_epoch(stale_conf_epoch.clone()); req.mut_header().set_region_epoch(stale_conf_epoch.clone()); check_region_epoch(&req, &stale_region, false).unwrap(); - check_region_epoch(&req, ®ion, false).unwrap_err(); - check_region_epoch(&req, ®ion, true).unwrap_err(); + + let mut latest_conf_epoch = epoch.clone(); + latest_conf_epoch.set_conf_ver(3); + for epoch in &[stale_conf_epoch, latest_conf_epoch] { + req.mut_header().set_region_epoch(epoch.clone()); + check_region_epoch(&req, ®ion, false).unwrap_err(); + check_region_epoch(&req, ®ion, true).unwrap_err(); + } } } diff --git a/src/raftstore/store/worker/read.rs b/src/raftstore/store/worker/read.rs index 924ed89a513..b0aa2b268b6 100644 --- a/src/raftstore/store/worker/read.rs +++ b/src/raftstore/store/worker/read.rs @@ -352,7 +352,7 @@ impl> LocalReader { if util::check_region_epoch(req, &delegate.region, false).is_err() { self.metrics.borrow_mut().rejected_by_epoch += 1; // Stale epoch, redirect it to raftstore to get the latest region. - debug!("rejected by stale epoch"; "tag" => &delegate.tag); + debug!("rejected by epoch not match"; "tag" => &delegate.tag); return Ok(None); } diff --git a/src/storage/engine/metrics.rs b/src/storage/engine/metrics.rs index 77e8ec4ae83..a199e2850a6 100644 --- a/src/storage/engine/metrics.rs +++ b/src/storage/engine/metrics.rs @@ -31,7 +31,7 @@ make_static_metric! { err_not_leader, err_region_not_found, err_key_not_in_region, - err_stale_epoch, + err_epoch_not_match, err_server_is_busy, err_stale_command, err_store_not_match, @@ -59,7 +59,7 @@ impl From for RequestStatusKind { ErrorHeaderKind::NotLeader => RequestStatusKind::err_not_leader, ErrorHeaderKind::RegionNotFound => RequestStatusKind::err_region_not_found, ErrorHeaderKind::KeyNotInRegion => RequestStatusKind::err_key_not_in_region, - ErrorHeaderKind::StaleEpoch => RequestStatusKind::err_stale_epoch, + ErrorHeaderKind::EpochNotMatch => RequestStatusKind::err_epoch_not_match, ErrorHeaderKind::ServerIsBusy => RequestStatusKind::err_server_is_busy, ErrorHeaderKind::StaleCommand => RequestStatusKind::err_stale_command, ErrorHeaderKind::StoreNotMatch => RequestStatusKind::err_store_not_match, diff --git a/src/storage/mod.rs b/src/storage/mod.rs index a85ab1e842b..4a72f1eb58b 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -1703,7 +1703,7 @@ pub enum ErrorHeaderKind { NotLeader, RegionNotFound, KeyNotInRegion, - StaleEpoch, + EpochNotMatch, ServerIsBusy, StaleCommand, StoreNotMatch, @@ -1719,7 +1719,7 @@ impl ErrorHeaderKind { ErrorHeaderKind::NotLeader => "not_leader", ErrorHeaderKind::RegionNotFound => "region_not_found", ErrorHeaderKind::KeyNotInRegion => "key_not_in_region", - ErrorHeaderKind::StaleEpoch => "stale_epoch", + ErrorHeaderKind::EpochNotMatch => "epoch_not_match", ErrorHeaderKind::ServerIsBusy => "server_is_busy", ErrorHeaderKind::StaleCommand => "stale_command", ErrorHeaderKind::StoreNotMatch => "store_not_match", @@ -1742,8 +1742,8 @@ pub fn get_error_kind_from_header(header: &errorpb::Error) -> ErrorHeaderKind { ErrorHeaderKind::RegionNotFound } else if header.has_key_not_in_region() { ErrorHeaderKind::KeyNotInRegion - } else if header.has_stale_epoch() { - ErrorHeaderKind::StaleEpoch + } else if header.has_epoch_not_match() { + ErrorHeaderKind::EpochNotMatch } else if header.has_server_is_busy() { ErrorHeaderKind::ServerIsBusy } else if header.has_stale_command() { diff --git a/tests/integrations/raftstore/test_multi.rs b/tests/integrations/raftstore/test_multi.rs index bd57cf5f20d..f996eb4ecae 100644 --- a/tests/integrations/raftstore/test_multi.rs +++ b/tests/integrations/raftstore/test_multi.rs @@ -59,7 +59,7 @@ fn test_multi_base_after_bootstrap(cluster: &mut Cluster) { cluster.assert_quorum(|engine| engine.get_value(&keys::data_key(key)).unwrap().is_none()); - // TODO add stale epoch test cases. + // TODO add epoch not match test cases. } fn test_multi_leader_crash(cluster: &mut Cluster) { diff --git a/tests/integrations/raftstore/test_single.rs b/tests/integrations/raftstore/test_single.rs index 9e345187f87..947334f2002 100644 --- a/tests/integrations/raftstore/test_single.rs +++ b/tests/integrations/raftstore/test_single.rs @@ -16,7 +16,7 @@ use std::time::Duration; use test_raftstore::*; use tikv::util::config::*; -// TODO add stale epoch test cases. +// TODO add epoch not match test cases. fn test_put(cluster: &mut Cluster) { cluster.run(); diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index 17d2dbf3350..c65cac52e78 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -601,13 +601,13 @@ fn test_node_split_region_diff_check() { test_split_region_diff_check(&mut cluster); } -fn test_split_stale_epoch(cluster: &mut Cluster, right_derive: bool) { +fn test_split_epoch_not_match(cluster: &mut Cluster, right_derive: bool) { cluster.cfg.raft_store.right_derive_when_split = right_derive; cluster.run(); let pd_client = Arc::clone(&cluster.pd_client); let old = pd_client.get_region(b"k1").unwrap(); // Construct a get command using old region meta. - let get = new_request( + let get_old = new_request( old.get_id(), old.get_region_epoch().clone(), vec![new_get_cmd(b"k1")], @@ -617,52 +617,70 @@ fn test_split_stale_epoch(cluster: &mut Cluster, right_derive: let left = pd_client.get_region(b"k1").unwrap(); let right = pd_client.get_region(b"k3").unwrap(); - let resp = cluster - .call_command_on_leader(get, Duration::from_secs(5)) - .unwrap(); - assert!(resp.get_header().has_error()); - assert!(resp.get_header().get_error().has_stale_epoch()); - if right_derive { - assert_eq!( - resp.get_header() - .get_error() - .get_stale_epoch() - .get_new_regions(), - &[right, left] - ); + let new = if right_derive { + right.clone() } else { - assert_eq!( - resp.get_header() - .get_error() - .get_stale_epoch() - .get_new_regions(), - &[left, right] + left.clone() + }; + + // Newer epoch also triggers the EpochNotMatch error. + let mut latest_epoch = new.get_region_epoch().clone(); + let latest_version = latest_epoch.get_version() + 1; + latest_epoch.set_version(latest_version); + + let get_new = new_request(new.get_id(), latest_epoch, vec![new_get_cmd(b"k1")], false); + for get in &[get_old, get_new] { + let resp = cluster + .call_command_on_leader(get.clone(), Duration::from_secs(5)) + .unwrap(); + assert!(resp.get_header().has_error(), "{:?}", get); + assert!( + resp.get_header().get_error().has_epoch_not_match(), + "{:?}", + get ); + if right_derive { + assert_eq!( + resp.get_header() + .get_error() + .get_epoch_not_match() + .get_current_regions(), + &[right.clone(), left.clone()] + ); + } else { + assert_eq!( + resp.get_header() + .get_error() + .get_epoch_not_match() + .get_current_regions(), + &[left.clone(), right.clone()] + ); + } } } #[test] -fn test_server_split_stale_epoch_left_derive() { +fn test_server_split_epoch_not_match_left_derive() { let mut cluster = new_server_cluster(0, 3); - test_split_stale_epoch(&mut cluster, false); + test_split_epoch_not_match(&mut cluster, false); } #[test] -fn test_server_split_stale_epoch_right_derive() { +fn test_server_split_epoch_not_match_right_derive() { let mut cluster = new_server_cluster(0, 3); - test_split_stale_epoch(&mut cluster, true); + test_split_epoch_not_match(&mut cluster, true); } #[test] -fn test_node_split_stale_epoch_left_derive() { +fn test_node_split_epoch_not_match_left_derive() { let mut cluster = new_node_cluster(0, 3); - test_split_stale_epoch(&mut cluster, false); + test_split_epoch_not_match(&mut cluster, false); } #[test] -fn test_node_split_stale_epoch_right_derive() { +fn test_node_split_epoch_not_match_right_derive() { let mut cluster = new_node_cluster(0, 3); - test_split_stale_epoch(&mut cluster, true); + test_split_epoch_not_match(&mut cluster, true); } // For the peer which is the leader of the region before split, @@ -800,7 +818,7 @@ fn test_node_split_update_region_right_derive() { } #[test] -fn test_split_with_stale_epoch() { +fn test_split_with_epoch_not_match() { let mut cluster = new_node_cluster(0, 3); cluster.run(); cluster.must_transfer_leader(1, new_peer(1, 1)); @@ -828,5 +846,5 @@ fn test_split_with_stale_epoch() { let resp = cluster .call_command_on_leader(req, Duration::from_secs(3)) .unwrap(); - assert!(resp.get_header().get_error().has_stale_epoch()); + assert!(resp.get_header().get_error().has_epoch_not_match()); }