diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index d684243a6..7e6dc84ab 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -321,6 +321,9 @@ jobs:
- toolchain: "nightly"
features: "single-term-leader"
+ - toolchain: "nightly"
+ features: "loosen-follower-log-revert"
+
steps:
- name: Setup | Checkout
diff --git a/openraft/Cargo.toml b/openraft/Cargo.toml
index 60f5d3b18..e91f17b69 100644
--- a/openraft/Cargo.toml
+++ b/openraft/Cargo.toml
@@ -87,6 +87,12 @@ storage-v2 = []
# Disallows applications to share a raft instance with multiple threads.
singlethreaded = ["macros/singlethreaded"]
+
+# Permit the follower's log to roll back to an earlier state without causing the leader to panic.
+# Although log state reversion is typically seen as a bug, enabling it can be useful for testing or other special scenarios.
+# For instance, in an even number nodes cluster, erasing a node's data and then rebooting it(log reverts to empty) will not result in data loss.
+loosen-follower-log-revert = []
+
# default = ["single-term-leader"]
[package.metadata.docs.rs]
diff --git a/openraft/src/docs/faq/faq.md b/openraft/src/docs/faq/faq.md
index 4cb011675..ee62e2297 100644
--- a/openraft/src/docs/faq/faq.md
+++ b/openraft/src/docs/faq/faq.md
@@ -1,8 +1,52 @@
# FAQ
-- Q: 🤔 Why is log id `(term, node_id, log_index)`, while standard Raft uses just
- `(term, log_index)`?
+- **🤔 Why is log id a tuple of `(term, node_id, log_index)`, while standard Raft uses just
+ `(term, log_index)`**?
- A: The log id `(term, node_id, log_index)` is used to minimize the chance of election conflicts.
+ 💡 The log id `(term, node_id, log_index)` is used to minimize the chance of election conflicts.
This way in every term there could be more than one leaders elected, and the last one is valid.
See: [`leader-id`](`crate::docs::data::leader_id`) for details.
+
+
+
+- **🤔 How to remove node-2 safely from a cluster `{1, 2, 3}`**?
+
+ 💡 Call `Raft::change_membership(btreeset!{1, 3})` to exclude node-2 from
+ the cluster. Then wipe out node-2 data.
+ **NEVER** modify/erase the data of any node that is still in a raft cluster, unless you know what you are doing.
+
+
+
+- **🤔 Can I wipe out the data of **one** node and wait for the leader to replicate all data to it again**?
+
+ 💡 Avoid doing this. Doing so will panic the leader. But it is permitted
+ if [`loosen-follower-log-revert`] feature flag is enabled.
+
+ In a raft cluster, although logs are replicated to multiple nodes,
+ wiping out a node and restarting it is still possible to cause data loss.
+ Assumes the leader is `N1`, followers are `N2, N3, N4, N5`:
+ - A log(`a`) that is replicated by `N1` to `N2, N3` is considered committed.
+ - At this point, if `N3` is replaced with an empty node, and at once the leader `N1` is crashed. Then `N5` may elected as a new leader with granted vote by `N3, N4`;
+ - Then the new leader `N5` will not have log `a`.
+
+ ```text
+ Ni: Node i
+ Lj: Leader at term j
+ Fj: Follower at term j
+
+ N1 | L1 a crashed
+ N2 | F1 a
+ N3 | F1 a erased F2
+ N4 | F2
+ N5 | elect L2
+ ----------------------------+---------------> time
+ Data loss: N5 does not have log `a`
+ ```
+
+ But for even number nodes cluster, Erasing **exactly one** node won't cause data loss.
+ Thus, in a special scenario like this, or for testing purpose, you can use
+ `--feature loosen-follower-log-revert` to permit erasing a node.
+
+
+
+[`loosen-follower-log-revert`]: `crate::docs::feature_flags#loosen_follower_log_revert`
diff --git a/openraft/src/docs/feature_flags/feature-flags.md b/openraft/src/docs/feature_flags/feature-flags.md
index adb608710..4c2f0b08c 100644
--- a/openraft/src/docs/feature_flags/feature-flags.md
+++ b/openraft/src/docs/feature_flags/feature-flags.md
@@ -2,15 +2,27 @@
By default openraft enables no features.
-- `bt`: attaches backtrace to generated errors.
+- `bt`:
+ attaches backtrace to generated errors.
+
+
+- `loosen-follower-log-revert`:
+ Permit the follower's log to roll back to an earlier state without causing the leader to panic.
+ Although log state reversion is typically seen as a bug, enabling it can be useful for testing or other special scenarios.
+ For instance, in an even number nodes cluster, erasing a node's data and then rebooting it(log reverts to empty) will not result in data loss.
+
+ **Do not use it unless you know what you are doing**.
+
- `serde`: derives `serde::Serialize, serde::Deserialize` for type that are used
in storage and network, such as `Vote` or `AppendEntriesRequest`.
+
- `single-term-leader`: allows only one leader to be elected in each `term`.
This is the standard raft policy, which increases election confliction rate
but reduce `LogId`(`(term, node_id, index)` to `(term, index)`) size.
Read more about how it is implemented in [`vote`](./vote.md)
+
- `compat-07`: provides additional data types to build v0.7 compatible RaftStorage.
@@ -18,13 +30,16 @@ By default openraft enables no features.
compat-07 = ["compat", "single-term-leader", "serde", "dep:or07", "compat-07-testing"]
compat-07-testing = ["dep:tempdir", "anyhow", "dep:serde_json"]
```
+
- `storage-v2`: enables `RaftLogStorage` and `RaftStateMachine` as the v2 storage
This is a temporary feature flag, and will be removed in the future, when v2 storage is stable.
This feature disables `Adapter`, which is for v1 storage to be used as v2.
- V2 storage separates log store and state machine store so that log IO and state machine IO can be parallelized naturally.
+ V2 storage separates log store and state machine store so that log IO and state machine IO can be parallelized naturally.
+
- `singlethreaded`: removes `Send` bounds from `AppData`, `AppDataResponse`, `RaftEntry`, and `SnapshotData` to force the
asynchronous runtime to spawn any tasks in the current thread.
This is for any single-threaded application that never allows a raft instance to be shared among multiple threads.
- In order to use the feature, `AsyncRuntime::spawn` should invoke `tokio::task::spawn_local` or equivalents.
\ No newline at end of file
+ In order to use the feature, `AsyncRuntime::spawn` should invoke `tokio::task::spawn_local` or equivalents.
+
diff --git a/openraft/src/progress/entry/mod.rs b/openraft/src/progress/entry/mod.rs
index 0dba3c253..14b981185 100644
--- a/openraft/src/progress/entry/mod.rs
+++ b/openraft/src/progress/entry/mod.rs
@@ -5,6 +5,7 @@ use std::fmt::Display;
use std::fmt::Formatter;
use std::ops::Deref;
+use crate::display_ext::DisplayOptionExt;
use crate::less;
use crate::less_equal;
use crate::progress::inflight::Inflight;
@@ -124,6 +125,27 @@ impl ProgressEntry {
debug_assert!(conflict < self.searching_end);
self.searching_end = conflict;
+ // An already matching log id is found lost:
+ //
+ // - If log reversion is allowed, just restart the binary search from the beginning.
+ // - Otherwise, panic it.
+ //
+ // Refer to: `docs::feature_flags#loosen_follower_log_revert`
+ {
+ #[cfg(feature = "loosen-follower-log-revert")]
+ if conflict < self.matching.next_index() {
+ self.matching = None;
+ }
+
+ debug_assert!(
+ conflict >= self.matching.next_index(),
+ "follower log reversion is not allowed \
+ without `--features loosen-follower-log-revert`; \
+ matching: {}; conflict: {}",
+ self.matching.display(),
+ conflict
+ );
+ }
Ok(())
}
diff --git a/tests/Cargo.toml b/tests/Cargo.toml
index f053bd110..1c421fb81 100644
--- a/tests/Cargo.toml
+++ b/tests/Cargo.toml
@@ -39,3 +39,4 @@ tracing-subscriber = { workspace = true }
bt = ["openraft/bt"]
single-term-leader = ["openraft/single-term-leader"]
+loosen-follower-log-revert = ["openraft/loosen-follower-log-revert"]
diff --git a/tests/tests/README.md b/tests/tests/README.md
index af6434c04..a908d311c 100644
--- a/tests/tests/README.md
+++ b/tests/tests/README.md
@@ -11,7 +11,7 @@ integration tests to a separate crate.
A test file name starts with `t[\d\d]_`, where `\d\d` is the test case number indicating priority.
-- `t00`: no used.
+- `t00`: not used.
- `t10`: basic behaviors.
- `t20`: life cycle test cases.
- `t30`: special cases for an API.
diff --git a/tests/tests/replication/main.rs b/tests/tests/replication/main.rs
index bfc7fd20e..72c1c97b5 100644
--- a/tests/tests/replication/main.rs
+++ b/tests/tests/replication/main.rs
@@ -8,3 +8,5 @@ mod fixtures;
mod t10_append_entries_partial_success;
mod t50_append_entries_backoff;
mod t50_append_entries_backoff_rejoin;
+#[cfg(feature = "loosen-follower-log-revert")]
+mod t60_feature_loosen_follower_log_revert;
diff --git a/tests/tests/replication/t60_feature_loosen_follower_log_revert.rs b/tests/tests/replication/t60_feature_loosen_follower_log_revert.rs
new file mode 100644
index 000000000..e5d7e01ff
--- /dev/null
+++ b/tests/tests/replication/t60_feature_loosen_follower_log_revert.rs
@@ -0,0 +1,63 @@
+use std::sync::Arc;
+use std::time::Duration;
+
+use anyhow::Result;
+use maplit::btreeset;
+use openraft::Config;
+use openraft_memstore::MemStore;
+
+use crate::fixtures::init_default_ut_tracing;
+use crate::fixtures::RaftRouter;
+
+/// With "--features loosen-follower-log-revert", the leader allows follower to revert its log to an
+/// earlier state.
+#[async_entry::test(worker_threads = 4, init = "init_default_ut_tracing()", tracing_span = "debug")]
+async fn feature_loosen_follower_log_revert() -> Result<()> {
+ let config = Arc::new(
+ Config {
+ enable_tick: false,
+ enable_heartbeat: false,
+ ..Default::default()
+ }
+ .validate()?,
+ );
+
+ let mut router = RaftRouter::new(config.clone());
+
+ tracing::info!("--- initializing cluster");
+ let mut log_index = router.new_cluster(btreeset! {0,1,2}, btreeset! {3}).await?;
+
+ tracing::info!(log_index, "--- write 10 logs");
+ {
+ log_index += router.client_request_many(0, "0", 10).await?;
+ for i in [0, 1, 2, 3] {
+ router.wait(&i, timeout()).log(Some(log_index), format!("{} writes", 10)).await?;
+ }
+ }
+
+ tracing::info!(log_index, "--- erase node 3 and restart");
+ {
+ let (_raft, ls, sm) = router.remove_node(3).unwrap();
+ {
+ let mut sto = ls.storage_mut().await;
+ *sto = Arc::new(MemStore::new());
+ }
+ router.new_raft_node_with_sto(3, ls, sm).await;
+ router.add_learner(0, 3).await?;
+ log_index += 1; // add learner
+ }
+
+ tracing::info!(log_index, "--- write another 10 logs, leader should not panic");
+ {
+ log_index += router.client_request_many(0, "0", 10).await?;
+ for i in [0, 1, 2, 3] {
+ router.wait(&i, timeout()).log(Some(log_index), format!("{} writes", 10)).await?;
+ }
+ }
+
+ Ok(())
+}
+
+fn timeout() -> Option {
+ Some(Duration::from_millis(1_000))
+}