From ea771caa4f8ef04ea2bee95f7b8f46e7d1625437 Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Wed, 22 Jan 2025 16:22:26 +0100 Subject: [PATCH 01/38] added heuristic for block rejections timeout --- .../src/nakamoto_node/signer_coordinator.rs | 43 +++++++++++++----- .../src/nakamoto_node/stackerdb_listener.rs | 45 +++++++++---------- testnet/stacks-node/src/tests/signer/v0.rs | 45 +++++++++++++++++++ 3 files changed, 98 insertions(+), 35 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index f3df78c66b..d409e73445 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -16,6 +16,7 @@ use std::sync::atomic::AtomicBool; use std::sync::{Arc, Mutex}; use std::thread::JoinHandle; +use std::time::Instant; use libsigner::v0::messages::{MinerSlotID, SignerMessage as SignerMessageV0}; use libsigner::{BlockProposal, SignerSession, StackerDBSession}; @@ -37,7 +38,9 @@ use stacks::util_lib::boot::boot_code_id; use super::stackerdb_listener::StackerDBListenerComms; use super::Error as NakamotoNodeError; use crate::event_dispatcher::StackerDBChannel; -use crate::nakamoto_node::stackerdb_listener::{StackerDBListener, EVENT_RECEIVER_POLL}; +use crate::nakamoto_node::stackerdb_listener::{ + BlockStatus, StackerDBListener, EVENT_RECEIVER_POLL, +}; use crate::neon::Counters; use crate::Config; @@ -270,17 +273,19 @@ impl SignerCoordinator { burn_tip: &BlockSnapshot, counters: &Counters, ) -> Result, NakamotoNodeError> { + let mut rejections_timer = Instant::now(); + let mut rejections: u32 = 0; + let mut rejections_timeout = core::time::Duration::from_secs(600); + let mut block_status_tracker = BlockStatus::default(); loop { + /// + /// TODO: describe the logic let block_status = match self.stackerdb_comms.wait_for_block_status( block_signer_sighash, + &mut block_status_tracker, + rejections_timer, + rejections_timeout, EVENT_RECEIVER_POLL, - |status| { - status.total_weight_signed < self.weight_threshold - && status - .total_reject_weight - .saturating_add(self.weight_threshold) - <= self.total_weight - }, )? { Some(status) => status, None => { @@ -313,10 +318,26 @@ impl SignerCoordinator { return Err(NakamotoNodeError::BurnchainTipChanged); } + if rejections_timer.elapsed() > rejections_timeout { + return Err(NakamotoNodeError::SigningCoordinatorFailure( + "Gave up while tried reaching the threshold".into(), + )); + } + continue; } }; + if rejections != block_status.total_reject_weight { + rejections_timer = Instant::now(); + rejections = block_status.total_reject_weight; + rejections_timeout = core::time::Duration::from_secs_f32( + 600 as f32 + - (600 as f32 + * ((rejections as f32 / self.weight_threshold as f32).powf(2.0))), + ); + } + if block_status .total_reject_weight .saturating_add(self.weight_threshold) @@ -334,10 +355,12 @@ impl SignerCoordinator { "block_signer_sighash" => %block_signer_sighash, ); return Ok(block_status.gathered_signatures.values().cloned().collect()); - } else { + } else if rejections_timer.elapsed() > rejections_timeout { return Err(NakamotoNodeError::SigningCoordinatorFailure( - "Unblocked without reaching the threshold".into(), + "Gave up while tried reaching the threshold".into(), )); + } else { + continue; } } } diff --git a/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs b/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs index 834c59fa95..92688c0075 100644 --- a/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs +++ b/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs @@ -50,7 +50,7 @@ pub static TEST_IGNORE_SIGNERS: LazyLock> = LazyLock::new(TestFla /// waking up to check timeouts? pub static EVENT_RECEIVER_POLL: Duration = Duration::from_millis(500); -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Default, PartialEq)] pub struct BlockStatus { pub responded_signers: HashSet, pub gathered_signatures: BTreeMap, @@ -337,10 +337,8 @@ impl StackerDBListener { block.gathered_signatures.insert(slot_id, signature); block.responded_signers.insert(signer_pubkey); - if block.total_weight_signed >= self.weight_threshold { - // Signal to anyone waiting on this block that we have enough signatures - cvar.notify_all(); - } + // Signal to anyone waiting on this block that we have a new status + cvar.notify_all(); // Update the idle timestamp for this signer self.update_idle_timestamp( @@ -378,6 +376,7 @@ impl StackerDBListener { } }; block.responded_signers.insert(rejected_pubkey); + block.total_reject_weight = block .total_reject_weight .checked_add(signer_entry.weight) @@ -396,14 +395,8 @@ impl StackerDBListener { "server_version" => rejected_data.metadata.server_version, ); - if block - .total_reject_weight - .saturating_add(self.weight_threshold) - > self.total_weight - { - // Signal to anyone waiting on this block that we have enough rejections - cvar.notify_all(); - } + // Signal to anyone waiting on this block that we have a new status + cvar.notify_all(); // Update the idle timestamp for this signer self.update_idle_timestamp( @@ -487,30 +480,32 @@ impl StackerDBListenerComms { /// Get the status for `block` from the Stacker DB listener. /// If the block is not found in the map, return an error. - /// If the block is found, call `condition` to check if the block status - /// satisfies the condition. - /// If the condition is satisfied, return the block status as - /// `Ok(Some(status))`. - /// If the condition is not satisfied, wait for it to be satisfied. + /// If the block is found, return it. /// If the timeout is reached, return `Ok(None)`. - pub fn wait_for_block_status( + pub fn wait_for_block_status( &self, block_signer_sighash: &Sha512Trunc256Sum, + block_status_tracker: &mut BlockStatus, + rejections_timer: std::time::Instant, + rejections_timeout: Duration, timeout: Duration, - condition: F, - ) -> Result, NakamotoNodeError> - where - F: Fn(&BlockStatus) -> bool, - { + ) -> Result, NakamotoNodeError> { let (lock, cvar) = &*self.blocks; let blocks = lock.lock().expect("FATAL: failed to lock block status"); let (guard, timeout_result) = cvar .wait_timeout_while(blocks, timeout, |map| { + if rejections_timer.elapsed() > rejections_timeout { + return true; + } let Some(status) = map.get(block_signer_sighash) else { return true; }; - condition(status) + if status != block_status_tracker { + *block_status_tracker = status.clone(); + return false; + } + return true; }) .expect("FATAL: failed to wait on block status cond var"); diff --git a/testnet/stacks-node/src/tests/signer/v0.rs b/testnet/stacks-node/src/tests/signer/v0.rs index 190145279f..5b92bd47f7 100644 --- a/testnet/stacks-node/src/tests/signer/v0.rs +++ b/testnet/stacks-node/src/tests/signer/v0.rs @@ -7733,6 +7733,51 @@ fn block_validation_response_timeout() { ); } +// Ensures that a signer that successfully submits a block to the node for validation +// will issue ConnectivityIssues rejections if a block submission times out. +// Also ensures that no other proposal gets submitted for validation if we +// are already waiting for a block submission response. +#[test] +#[ignore] +fn block_validation_check_rejection_timeout_heuristic() { + if env::var("BITCOIND_TEST") != Ok("1".into()) { + return; + } + + info!("------------------------- Test Setup -------------------------"); + let num_signers = 5; + let timeout = Duration::from_secs(30); + let sender_sk = Secp256k1PrivateKey::new(); + let sender_addr = tests::to_addr(&sender_sk); + let send_amt = 100; + let send_fee = 180; + + let mut signer_test: SignerTest = SignerTest::new_with_config_modifications( + num_signers, + vec![(sender_addr, send_amt + send_fee)], + |config| { + config.block_proposal_validation_timeout = timeout; + }, + |_| {}, + None, + None, + ); + + let all_signers: Vec<_> = signer_test + .signer_stacks_private_keys + .iter() + .map(StacksPublicKey::from_private) + .collect(); + + signer_test.boot_to_epoch_3(); + + TEST_REJECT_ALL_BLOCK_PROPOSAL.set(vec![all_signers[3], all_signers[4]]); + TEST_IGNORE_ALL_BLOCK_PROPOSALS.set(vec![all_signers[0], all_signers[1], all_signers[2]]); + + info!("------------------------- Test Mine and Verify Confirmed Nakamoto Block -------------------------"); + signer_test.mine_and_verify_confirmed_naka_block(timeout, num_signers, true); +} + /// Test scenario: /// /// - when a signer submits a block validation request and From 6ab756b201a064fcbf3a0bb824a605187e34e894 Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Wed, 22 Jan 2025 16:33:14 +0100 Subject: [PATCH 02/38] added rejections_timeout test exposure via BLOCK_REJECTIONS_CURRENT_TIMEOUT --- .../src/nakamoto_node/signer_coordinator.rs | 28 +++++++++++++++++-- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index 146e767b24..f018852e9e 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -44,6 +44,24 @@ use crate::nakamoto_node::stackerdb_listener::{ use crate::neon::Counters; use crate::Config; +#[cfg(test)] +use std::time::Duration; + +#[cfg(test)] +use stacks_common::util::tests::TestFlag; + +#[cfg(test)] +use std::sync::LazyLock; + +#[cfg(test)] +/// Test-only value for storing the current rejection based timeout +/// Used to test that the signers will broadcast a block if it gets enough signatures +pub static BLOCK_REJECTIONS_CURRENT_TIMEOUT: LazyLock> = + LazyLock::new(TestFlag::default); + +/// Base timeout for rejections heuristic +pub static BLOCK_REJECTIONS_TIMEOUT_BASE: u64 = 600; + /// The state of the signer database listener, used by the miner thread to /// interact with the signer listener. pub struct SignerCoordinator { @@ -298,11 +316,12 @@ impl SignerCoordinator { ) -> Result, NakamotoNodeError> { let mut rejections_timer = Instant::now(); let mut rejections: u32 = 0; - let mut rejections_timeout = core::time::Duration::from_secs(600); + let mut rejections_timeout = core::time::Duration::from_secs(BLOCK_REJECTIONS_TIMEOUT_BASE); let mut block_status_tracker = BlockStatus::default(); loop { /// /// TODO: describe the logic + /// let block_status = match self.stackerdb_comms.wait_for_block_status( block_signer_sighash, &mut block_status_tracker, @@ -355,10 +374,13 @@ impl SignerCoordinator { rejections_timer = Instant::now(); rejections = block_status.total_reject_weight; rejections_timeout = core::time::Duration::from_secs_f32( - 600 as f32 - - (600 as f32 + BLOCK_REJECTIONS_TIMEOUT_BASE as f32 + - (BLOCK_REJECTIONS_TIMEOUT_BASE as f32 * ((rejections as f32 / self.weight_threshold as f32).powf(2.0))), ); + + #[cfg(test)] + BLOCK_REJECTIONS_CURRENT_TIMEOUT.set(rejections_timeout); } if block_status From bfd47939cce6cab5277c307497fc649e6ed47f53 Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Wed, 22 Jan 2025 16:55:05 +0100 Subject: [PATCH 03/38] improved test logic --- testnet/stacks-node/src/tests/signer/v0.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/testnet/stacks-node/src/tests/signer/v0.rs b/testnet/stacks-node/src/tests/signer/v0.rs index c6b2367290..345e9d8e90 100644 --- a/testnet/stacks-node/src/tests/signer/v0.rs +++ b/testnet/stacks-node/src/tests/signer/v0.rs @@ -75,6 +75,7 @@ use crate::event_dispatcher::{MinedNakamotoBlockEvent, TEST_SKIP_BLOCK_ANNOUNCEM use crate::nakamoto_node::miner::{ TEST_BLOCK_ANNOUNCE_STALL, TEST_BROADCAST_STALL, TEST_MINE_STALL, }; +use crate::nakamoto_node::signer_coordinator::BLOCK_REJECTIONS_CURRENT_TIMEOUT; use crate::nakamoto_node::stackerdb_listener::TEST_IGNORE_SIGNERS; use crate::neon::Counters; use crate::run_loop::boot_nakamoto; @@ -7842,8 +7843,13 @@ fn block_validation_check_rejection_timeout_heuristic() { TEST_REJECT_ALL_BLOCK_PROPOSAL.set(vec![all_signers[3], all_signers[4]]); TEST_IGNORE_ALL_BLOCK_PROPOSALS.set(vec![all_signers[0], all_signers[1], all_signers[2]]); - info!("------------------------- Test Mine and Verify Confirmed Nakamoto Block -------------------------"); - signer_test.mine_and_verify_confirmed_naka_block(timeout, num_signers, true); + info!("------------------------- Test Mine and Verify Rejected Nakamoto Block -------------------------"); + signer_test.mine_nakamoto_block(timeout, true); + signer_test + .wait_for_block_rejections(timeout.as_secs(), &[all_signers[3], all_signers[4]]) + .unwrap(); + + assert_eq!(BLOCK_REJECTIONS_CURRENT_TIMEOUT.get().as_secs(), 400); } /// Test scenario: From 3cebb352e2ff379491b08e5b2077be0b4664da8c Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Wed, 22 Jan 2025 17:57:24 +0100 Subject: [PATCH 04/38] fixed integration test --- testnet/stacks-node/src/tests/signer/v0.rs | 46 ++++++++++++++++++++-- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/testnet/stacks-node/src/tests/signer/v0.rs b/testnet/stacks-node/src/tests/signer/v0.rs index 345e9d8e90..005c09c034 100644 --- a/testnet/stacks-node/src/tests/signer/v0.rs +++ b/testnet/stacks-node/src/tests/signer/v0.rs @@ -7840,16 +7840,56 @@ fn block_validation_check_rejection_timeout_heuristic() { signer_test.boot_to_epoch_3(); + // note we just use mined nakamoto_blocks as the second block is not going to be confirmed + + info!("------------------------- Check Rejections-based timeout with 1 rejection -------------------------"); + + let blocks_before = test_observer::get_mined_nakamoto_blocks().len(); + + TEST_REJECT_ALL_BLOCK_PROPOSAL.set(vec![all_signers[4]]); + TEST_IGNORE_ALL_BLOCK_PROPOSALS.set(vec![ + all_signers[0], + all_signers[1], + all_signers[2], + all_signers[3], + ]); + + next_block_and( + &mut signer_test.running_nodes.btc_regtest_controller, + 30, + || Ok(test_observer::get_mined_nakamoto_blocks().len() > blocks_before), + ) + .unwrap(); + + signer_test + .wait_for_block_rejections(timeout.as_secs(), &[all_signers[4]]) + .unwrap(); + + assert_eq!(BLOCK_REJECTIONS_CURRENT_TIMEOUT.get().as_secs(), 551); + + info!("------------------------- Check Rejections-based timeout with 2 rejections -------------------------"); + + let blocks_before = test_observer::get_mined_nakamoto_blocks().len(); + TEST_REJECT_ALL_BLOCK_PROPOSAL.set(vec![all_signers[3], all_signers[4]]); TEST_IGNORE_ALL_BLOCK_PROPOSALS.set(vec![all_signers[0], all_signers[1], all_signers[2]]); - info!("------------------------- Test Mine and Verify Rejected Nakamoto Block -------------------------"); - signer_test.mine_nakamoto_block(timeout, true); + next_block_and( + &mut signer_test.running_nodes.btc_regtest_controller, + 30, + || Ok(test_observer::get_mined_nakamoto_blocks().len() > blocks_before), + ) + .unwrap(); + signer_test .wait_for_block_rejections(timeout.as_secs(), &[all_signers[3], all_signers[4]]) .unwrap(); - assert_eq!(BLOCK_REJECTIONS_CURRENT_TIMEOUT.get().as_secs(), 400); + assert_eq!(BLOCK_REJECTIONS_CURRENT_TIMEOUT.get().as_secs(), 404); + + // reset reject/ignore + TEST_REJECT_ALL_BLOCK_PROPOSAL.set(vec![]); + TEST_IGNORE_ALL_BLOCK_PROPOSALS.set(vec![]); } /// Test scenario: From c3e163e6171934ec0519352d59189c861354957b Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Wed, 22 Jan 2025 19:17:28 +0100 Subject: [PATCH 05/38] fixed comment for BLOCK_REJECTIONS_CURRENT_TIMEOUT --- testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index f018852e9e..757d5943d3 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -55,7 +55,6 @@ use std::sync::LazyLock; #[cfg(test)] /// Test-only value for storing the current rejection based timeout -/// Used to test that the signers will broadcast a block if it gets enough signatures pub static BLOCK_REJECTIONS_CURRENT_TIMEOUT: LazyLock> = LazyLock::new(TestFlag::default); From 6b751c406843ed16709b93cf239fe1dd70f91a1d Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Wed, 22 Jan 2025 19:19:35 +0100 Subject: [PATCH 06/38] fixed test comment --- testnet/stacks-node/src/tests/signer/v0.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/testnet/stacks-node/src/tests/signer/v0.rs b/testnet/stacks-node/src/tests/signer/v0.rs index 30b1ffbe52..ddde8bd5da 100644 --- a/testnet/stacks-node/src/tests/signer/v0.rs +++ b/testnet/stacks-node/src/tests/signer/v0.rs @@ -7803,10 +7803,8 @@ fn block_validation_response_timeout() { ); } -// Ensures that a signer that successfully submits a block to the node for validation -// will issue ConnectivityIssues rejections if a block submission times out. -// Also ensures that no other proposal gets submitted for validation if we -// are already waiting for a block submission response. +// Verify that the miner timeout while waiting for signers will change accordingly +// to rejections. #[test] #[ignore] fn block_validation_check_rejection_timeout_heuristic() { From bb8df99742cf37723d3858bcc581d822f6c41b7a Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Wed, 22 Jan 2025 19:23:19 +0100 Subject: [PATCH 07/38] fmt --- .../src/nakamoto_node/signer_coordinator.rs | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index 757d5943d3..4aabe006e6 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -14,8 +14,12 @@ // along with this program. If not, see . use std::sync::atomic::AtomicBool; +#[cfg(test)] +use std::sync::LazyLock; use std::sync::{Arc, Mutex}; use std::thread::JoinHandle; +#[cfg(test)] +use std::time::Duration; use std::time::Instant; use libsigner::v0::messages::{MinerSlotID, SignerMessage as SignerMessageV0}; @@ -34,6 +38,8 @@ use stacks::types::chainstate::{StacksBlockId, StacksPrivateKey}; use stacks::util::hash::Sha512Trunc256Sum; use stacks::util::secp256k1::MessageSignature; use stacks::util_lib::boot::boot_code_id; +#[cfg(test)] +use stacks_common::util::tests::TestFlag; use super::stackerdb_listener::StackerDBListenerComms; use super::Error as NakamotoNodeError; @@ -44,15 +50,6 @@ use crate::nakamoto_node::stackerdb_listener::{ use crate::neon::Counters; use crate::Config; -#[cfg(test)] -use std::time::Duration; - -#[cfg(test)] -use stacks_common::util::tests::TestFlag; - -#[cfg(test)] -use std::sync::LazyLock; - #[cfg(test)] /// Test-only value for storing the current rejection based timeout pub static BLOCK_REJECTIONS_CURRENT_TIMEOUT: LazyLock> = @@ -318,9 +315,9 @@ impl SignerCoordinator { let mut rejections_timeout = core::time::Duration::from_secs(BLOCK_REJECTIONS_TIMEOUT_BASE); let mut block_status_tracker = BlockStatus::default(); loop { - /// - /// TODO: describe the logic - /// + // At every iteration wait for the block_status. + // Exit when the amount of confirmations/rejections reach the threshold (or until timeout) + // Based on the amount of rejections, eventually modify the timeout. let block_status = match self.stackerdb_comms.wait_for_block_status( block_signer_sighash, &mut block_status_tracker, From 280483b7555ae482cfdec75293e0465a1ad5ad9e Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Wed, 22 Jan 2025 19:26:50 +0100 Subject: [PATCH 08/38] added more comments --- testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index 4aabe006e6..f3044de972 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -310,13 +310,17 @@ impl SignerCoordinator { sortdb: &SortitionDB, counters: &Counters, ) -> Result, NakamotoNodeError> { + // this is used to track the start of the waiting cycle let mut rejections_timer = Instant::now(); + // the amount of current rejections to eventually modify the timeout let mut rejections: u32 = 0; + // default timeout let mut rejections_timeout = core::time::Duration::from_secs(BLOCK_REJECTIONS_TIMEOUT_BASE); + // this is used for comparing block_status to identify if it has been changed from the previous event let mut block_status_tracker = BlockStatus::default(); loop { // At every iteration wait for the block_status. - // Exit when the amount of confirmations/rejections reach the threshold (or until timeout) + // Exit when the amount of confirmations/rejections reaches the threshold (or until timeout) // Based on the amount of rejections, eventually modify the timeout. let block_status = match self.stackerdb_comms.wait_for_block_status( block_signer_sighash, From 05b3f92c6c96ff57e4d05c5caded32885037dbf5 Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Thu, 23 Jan 2025 15:51:08 +0100 Subject: [PATCH 09/38] fixed formatting --- .../stacks-node/src/nakamoto_node/signer_coordinator.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index f3044de972..818f1f6a08 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -18,9 +18,7 @@ use std::sync::atomic::AtomicBool; use std::sync::LazyLock; use std::sync::{Arc, Mutex}; use std::thread::JoinHandle; -#[cfg(test)] -use std::time::Duration; -use std::time::Instant; +use std::time::{Duration, Instant}; use libsigner::v0::messages::{MinerSlotID, SignerMessage as SignerMessageV0}; use libsigner::{BlockProposal, SignerSession, StackerDBSession}; @@ -315,7 +313,7 @@ impl SignerCoordinator { // the amount of current rejections to eventually modify the timeout let mut rejections: u32 = 0; // default timeout - let mut rejections_timeout = core::time::Duration::from_secs(BLOCK_REJECTIONS_TIMEOUT_BASE); + let mut rejections_timeout = Duration::from_secs(BLOCK_REJECTIONS_TIMEOUT_BASE); // this is used for comparing block_status to identify if it has been changed from the previous event let mut block_status_tracker = BlockStatus::default(); loop { @@ -373,7 +371,7 @@ impl SignerCoordinator { if rejections != block_status.total_reject_weight { rejections_timer = Instant::now(); rejections = block_status.total_reject_weight; - rejections_timeout = core::time::Duration::from_secs_f32( + rejections_timeout = Duration::from_secs_f32( BLOCK_REJECTIONS_TIMEOUT_BASE as f32 - (BLOCK_REJECTIONS_TIMEOUT_BASE as f32 * ((rejections as f32 / self.weight_threshold as f32).powf(2.0))), From 9971d1a40d5f1accb013565e545633ad5489f589 Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Fri, 24 Jan 2025 08:17:33 +0100 Subject: [PATCH 10/38] added wanr! when timeout --- .../src/nakamoto_node/signer_coordinator.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index 818f1f6a08..ce136a05a2 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -359,6 +359,12 @@ impl SignerCoordinator { } if rejections_timer.elapsed() > rejections_timeout { + warn!("Timed out while waiting for responses from signers"; + "elapsed" => rejections_timer.elapsed().as_secs(), + "rejections_timeout" => rejections_timeout.as_secs(), + "rejections" => rejections, + "rejections_threshold" => self.total_weight.saturating_sub(self.weight_threshold) + ); return Err(NakamotoNodeError::SigningCoordinatorFailure( "Gave up while tried reaching the threshold".into(), )); @@ -399,6 +405,12 @@ impl SignerCoordinator { ); return Ok(block_status.gathered_signatures.values().cloned().collect()); } else if rejections_timer.elapsed() > rejections_timeout { + warn!("Timed out while waiting for responses from signers"; + "elapsed" => rejections_timer.elapsed().as_secs(), + "rejections_timeout" => rejections_timeout.as_secs(), + "rejections" => rejections, + "rejections_threshold" => self.total_weight.saturating_sub(self.weight_threshold) + ); return Err(NakamotoNodeError::SigningCoordinatorFailure( "Gave up while tried reaching the threshold".into(), )); From 45ae0153843a91788e7990e164762d22fa163e7e Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Tue, 28 Jan 2025 18:12:15 +0000 Subject: [PATCH 11/38] initial prototype for configurable block_rejction timeout steps --- stackslib/src/config/mod.rs | 27 ++++++++++ .../src/nakamoto_node/signer_coordinator.rs | 50 ++++++++++++------- testnet/stacks-node/src/tests/signer/v0.rs | 6 ++- 3 files changed, 64 insertions(+), 19 deletions(-) diff --git a/stackslib/src/config/mod.rs b/stackslib/src/config/mod.rs index 6d2d5e4389..b26676294f 100644 --- a/stackslib/src/config/mod.rs +++ b/stackslib/src/config/mod.rs @@ -36,6 +36,8 @@ use stacks_common::util::get_epoch_time_ms; use stacks_common::util::hash::hex_bytes; use stacks_common::util::secp256k1::{Secp256k1PrivateKey, Secp256k1PublicKey}; +use std::collections::BTreeMap; + use crate::burnchains::affirmation::AffirmationMap; use crate::burnchains::bitcoin::BitcoinNetworkType; use crate::burnchains::{Burnchain, MagicBytes, PoxConstants, BLOCKSTACK_MAGIC_MAINNET}; @@ -2156,6 +2158,8 @@ pub struct MinerConfig { pub tenure_extend_poll_secs: Duration, /// Duration to wait before attempting to issue a tenure extend pub tenure_timeout: Duration, + /// + pub block_rejection_timeout_steps: BTreeMap, } impl Default for MinerConfig { @@ -2194,6 +2198,14 @@ impl Default for MinerConfig { ), tenure_extend_poll_secs: Duration::from_secs(DEFAULT_TENURE_EXTEND_POLL_SECS), tenure_timeout: Duration::from_secs(DEFAULT_TENURE_TIMEOUT_SECS), + block_rejection_timeout_steps: { + let mut timeouts_btree = BTreeMap::::new(); + timeouts_btree.insert(0, Duration::from_secs(600)); + timeouts_btree.insert(1, Duration::from_secs(300)); + timeouts_btree.insert(2, Duration::from_secs(150)); + timeouts_btree.insert(3, Duration::from_secs(0)); + timeouts_btree + }, } } } @@ -2590,6 +2602,7 @@ pub struct MinerConfigFile { pub tenure_cost_limit_per_block_percentage: Option, pub tenure_extend_poll_secs: Option, pub tenure_timeout_secs: Option, + pub block_rejection_timeout_steps: Option>, } impl MinerConfigFile { @@ -2732,6 +2745,20 @@ impl MinerConfigFile { tenure_cost_limit_per_block_percentage, tenure_extend_poll_secs: self.tenure_extend_poll_secs.map(Duration::from_secs).unwrap_or(miner_default_config.tenure_extend_poll_secs), tenure_timeout: self.tenure_timeout_secs.map(Duration::from_secs).unwrap_or(miner_default_config.tenure_timeout), + block_rejection_timeout_steps: { + if let Some(block_rejection_timeout_items) = self.block_rejection_timeout_steps { + let mut timeouts_btree = BTreeMap::::new(); + for (slice, millis) in block_rejection_timeout_items.iter() { + match slice.parse::() { + Ok(slice_slot) => timeouts_btree.insert(slice_slot, Duration::from_millis(*millis)), + Err(e) => panic!("block_rejection_timeout_items keys must be unsigned integers: {}", e) + }; + } + timeouts_btree + } else{ + miner_default_config.block_rejection_timeout_steps + } + } }) } } diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index ce136a05a2..686d62a2d2 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -38,6 +38,8 @@ use stacks::util::secp256k1::MessageSignature; use stacks::util_lib::boot::boot_code_id; #[cfg(test)] use stacks_common::util::tests::TestFlag; +use std::collections::BTreeMap; +use std::ops::Bound::Included; use super::stackerdb_listener::StackerDBListenerComms; use super::Error as NakamotoNodeError; @@ -53,9 +55,6 @@ use crate::Config; pub static BLOCK_REJECTIONS_CURRENT_TIMEOUT: LazyLock> = LazyLock::new(TestFlag::default); -/// Base timeout for rejections heuristic -pub static BLOCK_REJECTIONS_TIMEOUT_BASE: u64 = 600; - /// The state of the signer database listener, used by the miner thread to /// interact with the signer listener. pub struct SignerCoordinator { @@ -81,6 +80,8 @@ pub struct SignerCoordinator { /// Rather, this burn block is used to determine whether or not a new /// burn block has arrived since this thread started. burn_tip_at_start: ConsensusHash, + /// + block_rejection_timeout_steps: BTreeMap, } impl SignerCoordinator { @@ -126,6 +127,7 @@ impl SignerCoordinator { keep_running, listener_thread: None, burn_tip_at_start: burn_tip_at_start.clone(), + block_rejection_timeout_steps: config.miner.block_rejection_timeout_steps.clone(), }; // Spawn the signer DB listener thread @@ -311,9 +313,17 @@ impl SignerCoordinator { // this is used to track the start of the waiting cycle let mut rejections_timer = Instant::now(); // the amount of current rejections to eventually modify the timeout - let mut rejections: u32 = 0; + let mut rejections: u64 = 0; // default timeout - let mut rejections_timeout = Duration::from_secs(BLOCK_REJECTIONS_TIMEOUT_BASE); + let mut rejections_timeout = self + .block_rejection_timeout_steps + .range((Included(0), Included(rejections))) + .last() + .ok_or_else(|| { + NakamotoNodeError::SigningCoordinatorFailure( + "Invalid rejection timeout step function definition".into(), + ) + })?; // this is used for comparing block_status to identify if it has been changed from the previous event let mut block_status_tracker = BlockStatus::default(); loop { @@ -324,7 +334,7 @@ impl SignerCoordinator { block_signer_sighash, &mut block_status_tracker, rejections_timer, - rejections_timeout, + *rejections_timeout.1, EVENT_RECEIVER_POLL, )? { Some(status) => status, @@ -358,10 +368,10 @@ impl SignerCoordinator { return Err(NakamotoNodeError::BurnchainTipChanged); } - if rejections_timer.elapsed() > rejections_timeout { + if rejections_timer.elapsed() > *rejections_timeout.1 { warn!("Timed out while waiting for responses from signers"; "elapsed" => rejections_timer.elapsed().as_secs(), - "rejections_timeout" => rejections_timeout.as_secs(), + "rejections_timeout" => rejections_timeout.1.as_secs(), "rejections" => rejections, "rejections_threshold" => self.total_weight.saturating_sub(self.weight_threshold) ); @@ -374,17 +384,21 @@ impl SignerCoordinator { } }; - if rejections != block_status.total_reject_weight { + if rejections != block_status.total_reject_weight as u64 { rejections_timer = Instant::now(); - rejections = block_status.total_reject_weight; - rejections_timeout = Duration::from_secs_f32( - BLOCK_REJECTIONS_TIMEOUT_BASE as f32 - - (BLOCK_REJECTIONS_TIMEOUT_BASE as f32 - * ((rejections as f32 / self.weight_threshold as f32).powf(2.0))), - ); + rejections = block_status.total_reject_weight as u64; + rejections_timeout = self + .block_rejection_timeout_steps + .range((Included(0), Included((rejections as f64 / self.total_weight as f64) as u64))) + .last() + .ok_or_else(|| { + NakamotoNodeError::SigningCoordinatorFailure( + "Invalid rejection timeout step function definition".into(), + ) + })?; #[cfg(test)] - BLOCK_REJECTIONS_CURRENT_TIMEOUT.set(rejections_timeout); + BLOCK_REJECTIONS_CURRENT_TIMEOUT.set(*rejections_timeout.1); } if block_status @@ -404,10 +418,10 @@ impl SignerCoordinator { "block_signer_sighash" => %block_signer_sighash, ); return Ok(block_status.gathered_signatures.values().cloned().collect()); - } else if rejections_timer.elapsed() > rejections_timeout { + } else if rejections_timer.elapsed() > *rejections_timeout.1 { warn!("Timed out while waiting for responses from signers"; "elapsed" => rejections_timer.elapsed().as_secs(), - "rejections_timeout" => rejections_timeout.as_secs(), + "rejections_timeout" => rejections_timeout.1.as_secs(), "rejections" => rejections, "rejections_threshold" => self.total_weight.saturating_sub(self.weight_threshold) ); diff --git a/testnet/stacks-node/src/tests/signer/v0.rs b/testnet/stacks-node/src/tests/signer/v0.rs index ddde8bd5da..ba10f9e792 100644 --- a/testnet/stacks-node/src/tests/signer/v0.rs +++ b/testnet/stacks-node/src/tests/signer/v0.rs @@ -7826,7 +7826,11 @@ fn block_validation_check_rejection_timeout_heuristic() { |config| { config.block_proposal_validation_timeout = timeout; }, - |_| {}, + |config| { + config.miner.block_rejection_timeout_steps.clear(); + config.miner.block_rejection_timeout_steps.insert(0, Duration::from_secs(600)); + config.miner.block_rejection_timeout_steps.insert(3, Duration::from_secs(17)); + }, None, None, ); From 3071d3601f951bd9f4e0ea14944e38fd1e840ca3 Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Wed, 29 Jan 2025 15:37:42 +0000 Subject: [PATCH 12/38] improved config system --- .github/workflows/bitcoin-tests.yml | 1 + stackslib/src/config/mod.rs | 25 +++-- .../src/nakamoto_node/signer_coordinator.rs | 48 +++++--- testnet/stacks-node/src/tests/signer/v0.rs | 105 +++++++++++++++--- 4 files changed, 133 insertions(+), 46 deletions(-) diff --git a/.github/workflows/bitcoin-tests.yml b/.github/workflows/bitcoin-tests.yml index bb64a1a8b7..41fb60d2e8 100644 --- a/.github/workflows/bitcoin-tests.yml +++ b/.github/workflows/bitcoin-tests.yml @@ -133,6 +133,7 @@ jobs: - tests::signer::v0::block_commit_delay - tests::signer::v0::continue_after_fast_block_no_sortition - tests::signer::v0::block_validation_response_timeout + - tests::signer::v0::block_validation_check_rejection_timeout_heuristic - tests::signer::v0::block_validation_pending_table - tests::signer::v0::new_tenure_while_validating_previous_scenario - tests::signer::v0::tenure_extend_after_bad_commit diff --git a/stackslib/src/config/mod.rs b/stackslib/src/config/mod.rs index b26676294f..2982273825 100644 --- a/stackslib/src/config/mod.rs +++ b/stackslib/src/config/mod.rs @@ -2158,8 +2158,8 @@ pub struct MinerConfig { pub tenure_extend_poll_secs: Duration, /// Duration to wait before attempting to issue a tenure extend pub tenure_timeout: Duration, - /// - pub block_rejection_timeout_steps: BTreeMap, + /// Define the timeout to apply while waiting for signers responses, based on the amount of rejections + pub block_rejection_timeout_steps: HashMap, } impl Default for MinerConfig { @@ -2199,12 +2199,13 @@ impl Default for MinerConfig { tenure_extend_poll_secs: Duration::from_secs(DEFAULT_TENURE_EXTEND_POLL_SECS), tenure_timeout: Duration::from_secs(DEFAULT_TENURE_TIMEOUT_SECS), block_rejection_timeout_steps: { - let mut timeouts_btree = BTreeMap::::new(); - timeouts_btree.insert(0, Duration::from_secs(600)); - timeouts_btree.insert(1, Duration::from_secs(300)); - timeouts_btree.insert(2, Duration::from_secs(150)); - timeouts_btree.insert(3, Duration::from_secs(0)); - timeouts_btree + let mut rejections_timeouts_default_map = HashMap::::new(); + rejections_timeouts_default_map.insert(0, Duration::from_secs(600)); + rejections_timeouts_default_map.insert(10, Duration::from_secs(300)); + rejections_timeouts_default_map.insert(20, Duration::from_secs(150)); + rejections_timeouts_default_map.insert(30, Duration::from_secs(60)); + rejections_timeouts_default_map.insert(31, Duration::from_secs(0)); + rejections_timeouts_default_map }, } } @@ -2747,14 +2748,14 @@ impl MinerConfigFile { tenure_timeout: self.tenure_timeout_secs.map(Duration::from_secs).unwrap_or(miner_default_config.tenure_timeout), block_rejection_timeout_steps: { if let Some(block_rejection_timeout_items) = self.block_rejection_timeout_steps { - let mut timeouts_btree = BTreeMap::::new(); - for (slice, millis) in block_rejection_timeout_items.iter() { + let mut rejection_timeout_durations = HashMap::::new(); + for (slice, seconds) in block_rejection_timeout_items.iter() { match slice.parse::() { - Ok(slice_slot) => timeouts_btree.insert(slice_slot, Duration::from_millis(*millis)), + Ok(slice_slot) => rejection_timeout_durations.insert(slice_slot, Duration::from_secs(*seconds)), Err(e) => panic!("block_rejection_timeout_items keys must be unsigned integers: {}", e) }; } - timeouts_btree + rejection_timeout_durations } else{ miner_default_config.block_rejection_timeout_steps } diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index 686d62a2d2..d04b306bfd 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -38,7 +38,7 @@ use stacks::util::secp256k1::MessageSignature; use stacks::util_lib::boot::boot_code_id; #[cfg(test)] use stacks_common::util::tests::TestFlag; -use std::collections::BTreeMap; +use std::collections::{BTreeMap, HashMap}; use std::ops::Bound::Included; use super::stackerdb_listener::StackerDBListenerComms; @@ -81,7 +81,7 @@ pub struct SignerCoordinator { /// burn block has arrived since this thread started. burn_tip_at_start: ConsensusHash, /// - block_rejection_timeout_steps: BTreeMap, + block_rejection_timeout_steps: HashMap, } impl SignerCoordinator { @@ -310,15 +310,20 @@ impl SignerCoordinator { sortdb: &SortitionDB, counters: &Counters, ) -> Result, NakamotoNodeError> { - // this is used to track the start of the waiting cycle - let mut rejections_timer = Instant::now(); - // the amount of current rejections to eventually modify the timeout + // build a BTreeMap of the various timeout steps + let mut block_rejection_timeout_steps = BTreeMap::::new(); + for (percentage, duration) in self.block_rejection_timeout_steps.iter() { + let rejections_amount = + ((self.total_weight as f64 / 100.0) * *percentage as f64) as u64; + block_rejection_timeout_steps.insert(rejections_amount, *duration); + } + + // the amount of current rejections (used to eventually modify the timeout) let mut rejections: u64 = 0; - // default timeout + // default timeout (the 0 entry must be always present) let mut rejections_timeout = self .block_rejection_timeout_steps - .range((Included(0), Included(rejections))) - .last() + .get(&rejections) .ok_or_else(|| { NakamotoNodeError::SigningCoordinatorFailure( "Invalid rejection timeout step function definition".into(), @@ -326,6 +331,9 @@ impl SignerCoordinator { })?; // this is used for comparing block_status to identify if it has been changed from the previous event let mut block_status_tracker = BlockStatus::default(); + + // this is used to track the start of the waiting cycle + let mut rejections_timer = Instant::now(); loop { // At every iteration wait for the block_status. // Exit when the amount of confirmations/rejections reaches the threshold (or until timeout) @@ -334,7 +342,7 @@ impl SignerCoordinator { block_signer_sighash, &mut block_status_tracker, rejections_timer, - *rejections_timeout.1, + *rejections_timeout, EVENT_RECEIVER_POLL, )? { Some(status) => status, @@ -368,10 +376,10 @@ impl SignerCoordinator { return Err(NakamotoNodeError::BurnchainTipChanged); } - if rejections_timer.elapsed() > *rejections_timeout.1 { + if rejections_timer.elapsed() > *rejections_timeout { warn!("Timed out while waiting for responses from signers"; "elapsed" => rejections_timer.elapsed().as_secs(), - "rejections_timeout" => rejections_timeout.1.as_secs(), + "rejections_timeout" => rejections_timeout.as_secs(), "rejections" => rejections, "rejections_threshold" => self.total_weight.saturating_sub(self.weight_threshold) ); @@ -385,20 +393,24 @@ impl SignerCoordinator { }; if rejections != block_status.total_reject_weight as u64 { - rejections_timer = Instant::now(); rejections = block_status.total_reject_weight as u64; - rejections_timeout = self - .block_rejection_timeout_steps - .range((Included(0), Included((rejections as f64 / self.total_weight as f64) as u64))) + let rejections_timeout_tuple = block_rejection_timeout_steps + .range((Included(0), Included(rejections))) .last() .ok_or_else(|| { NakamotoNodeError::SigningCoordinatorFailure( "Invalid rejection timeout step function definition".into(), ) })?; + rejections_timeout = rejections_timeout_tuple.1; + info!("Number of received rejections updated, resetting timeout"; + "rejections" => rejections, + "rejections_timeout" => rejections_timeout.as_secs(), + "rejections_threshold" => self.total_weight.saturating_sub(self.weight_threshold)); + rejections_timer = Instant::now(); #[cfg(test)] - BLOCK_REJECTIONS_CURRENT_TIMEOUT.set(*rejections_timeout.1); + BLOCK_REJECTIONS_CURRENT_TIMEOUT.set(*rejections_timeout); } if block_status @@ -418,10 +430,10 @@ impl SignerCoordinator { "block_signer_sighash" => %block_signer_sighash, ); return Ok(block_status.gathered_signatures.values().cloned().collect()); - } else if rejections_timer.elapsed() > *rejections_timeout.1 { + } else if rejections_timer.elapsed() > *rejections_timeout { warn!("Timed out while waiting for responses from signers"; "elapsed" => rejections_timer.elapsed().as_secs(), - "rejections_timeout" => rejections_timeout.1.as_secs(), + "rejections_timeout" => rejections_timeout.as_secs(), "rejections" => rejections, "rejections_threshold" => self.total_weight.saturating_sub(self.weight_threshold) ); diff --git a/testnet/stacks-node/src/tests/signer/v0.rs b/testnet/stacks-node/src/tests/signer/v0.rs index ba10f9e792..b603a887a6 100644 --- a/testnet/stacks-node/src/tests/signer/v0.rs +++ b/testnet/stacks-node/src/tests/signer/v0.rs @@ -7813,7 +7813,7 @@ fn block_validation_check_rejection_timeout_heuristic() { } info!("------------------------- Test Setup -------------------------"); - let num_signers = 5; + let num_signers = 20; let timeout = Duration::from_secs(30); let sender_sk = Secp256k1PrivateKey::new(); let sender_addr = tests::to_addr(&sender_sk); @@ -7828,8 +7828,22 @@ fn block_validation_check_rejection_timeout_heuristic() { }, |config| { config.miner.block_rejection_timeout_steps.clear(); - config.miner.block_rejection_timeout_steps.insert(0, Duration::from_secs(600)); - config.miner.block_rejection_timeout_steps.insert(3, Duration::from_secs(17)); + config + .miner + .block_rejection_timeout_steps + .insert(0, Duration::from_secs(123)); + config + .miner + .block_rejection_timeout_steps + .insert(10, Duration::from_secs(20)); + config + .miner + .block_rejection_timeout_steps + .insert(15, Duration::from_secs(10)); + config + .miner + .block_rejection_timeout_steps + .insert(20, Duration::from_secs(99)); }, None, None, @@ -7849,13 +7863,8 @@ fn block_validation_check_rejection_timeout_heuristic() { let blocks_before = test_observer::get_mined_nakamoto_blocks().len(); - TEST_REJECT_ALL_BLOCK_PROPOSAL.set(vec![all_signers[4]]); - TEST_IGNORE_ALL_BLOCK_PROPOSALS.set(vec![ - all_signers[0], - all_signers[1], - all_signers[2], - all_signers[3], - ]); + TEST_REJECT_ALL_BLOCK_PROPOSAL.set(vec![all_signers[19]]); + TEST_IGNORE_ALL_BLOCK_PROPOSALS.set(all_signers[0..19].to_vec()); next_block_and( &mut signer_test.running_nodes.btc_regtest_controller, @@ -7865,17 +7874,19 @@ fn block_validation_check_rejection_timeout_heuristic() { .unwrap(); signer_test - .wait_for_block_rejections(timeout.as_secs(), &[all_signers[4]]) + .wait_for_block_rejections(timeout.as_secs(), &[all_signers[19]]) .unwrap(); - assert_eq!(BLOCK_REJECTIONS_CURRENT_TIMEOUT.get().as_secs(), 551); + thread::sleep(Duration::from_secs(3)); + + assert_eq!(BLOCK_REJECTIONS_CURRENT_TIMEOUT.get().as_secs(), 123); info!("------------------------- Check Rejections-based timeout with 2 rejections -------------------------"); let blocks_before = test_observer::get_mined_nakamoto_blocks().len(); - TEST_REJECT_ALL_BLOCK_PROPOSAL.set(vec![all_signers[3], all_signers[4]]); - TEST_IGNORE_ALL_BLOCK_PROPOSALS.set(vec![all_signers[0], all_signers[1], all_signers[2]]); + TEST_REJECT_ALL_BLOCK_PROPOSAL.set(vec![all_signers[18], all_signers[19]]); + TEST_IGNORE_ALL_BLOCK_PROPOSALS.set(all_signers[0..18].to_vec()); next_block_and( &mut signer_test.running_nodes.btc_regtest_controller, @@ -7885,10 +7896,72 @@ fn block_validation_check_rejection_timeout_heuristic() { .unwrap(); signer_test - .wait_for_block_rejections(timeout.as_secs(), &[all_signers[3], all_signers[4]]) + .wait_for_block_rejections(timeout.as_secs(), &[all_signers[18], all_signers[19]]) .unwrap(); - assert_eq!(BLOCK_REJECTIONS_CURRENT_TIMEOUT.get().as_secs(), 404); + thread::sleep(Duration::from_secs(3)); + + assert_eq!(BLOCK_REJECTIONS_CURRENT_TIMEOUT.get().as_secs(), 20); + + info!("------------------------- Check Rejections-based timeout with 3 rejections -------------------------"); + + let blocks_before = test_observer::get_mined_nakamoto_blocks().len(); + + TEST_REJECT_ALL_BLOCK_PROPOSAL.set(vec![all_signers[17], all_signers[18], all_signers[19]]); + TEST_IGNORE_ALL_BLOCK_PROPOSALS.set(all_signers[0..17].to_vec()); + + next_block_and( + &mut signer_test.running_nodes.btc_regtest_controller, + 30, + || Ok(test_observer::get_mined_nakamoto_blocks().len() > blocks_before), + ) + .unwrap(); + + signer_test + .wait_for_block_rejections( + timeout.as_secs(), + &[all_signers[17], all_signers[18], all_signers[19]], + ) + .unwrap(); + + thread::sleep(Duration::from_secs(3)); + + assert_eq!(BLOCK_REJECTIONS_CURRENT_TIMEOUT.get().as_secs(), 10); + + info!("------------------------- Check Rejections-based timeout with 4 rejections -------------------------"); + + let blocks_before = test_observer::get_mined_nakamoto_blocks().len(); + + TEST_REJECT_ALL_BLOCK_PROPOSAL.set(vec![ + all_signers[16], + all_signers[17], + all_signers[18], + all_signers[19], + ]); + TEST_IGNORE_ALL_BLOCK_PROPOSALS.set(all_signers[0..16].to_vec()); + + next_block_and( + &mut signer_test.running_nodes.btc_regtest_controller, + 30, + || Ok(test_observer::get_mined_nakamoto_blocks().len() > blocks_before), + ) + .unwrap(); + + signer_test + .wait_for_block_rejections( + timeout.as_secs(), + &[ + all_signers[16], + all_signers[17], + all_signers[18], + all_signers[19], + ], + ) + .unwrap(); + + thread::sleep(Duration::from_secs(3)); + + assert_eq!(BLOCK_REJECTIONS_CURRENT_TIMEOUT.get().as_secs(), 99); // reset reject/ignore TEST_REJECT_ALL_BLOCK_PROPOSAL.set(vec![]); From f2ab500150f6fd87a760cb6399a1076f495eeb2b Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Wed, 29 Jan 2025 15:51:52 +0000 Subject: [PATCH 13/38] fmt-stacks --- stackslib/src/config/mod.rs | 4 +--- testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/stackslib/src/config/mod.rs b/stackslib/src/config/mod.rs index 2982273825..0e30de5998 100644 --- a/stackslib/src/config/mod.rs +++ b/stackslib/src/config/mod.rs @@ -16,7 +16,7 @@ pub mod chain_data; -use std::collections::{HashMap, HashSet}; +use std::collections::{BTreeMap, HashMap, HashSet}; use std::net::{Ipv4Addr, SocketAddr, ToSocketAddrs}; use std::path::PathBuf; use std::str::FromStr; @@ -36,8 +36,6 @@ use stacks_common::util::get_epoch_time_ms; use stacks_common::util::hash::hex_bytes; use stacks_common::util::secp256k1::{Secp256k1PrivateKey, Secp256k1PublicKey}; -use std::collections::BTreeMap; - use crate::burnchains::affirmation::AffirmationMap; use crate::burnchains::bitcoin::BitcoinNetworkType; use crate::burnchains::{Burnchain, MagicBytes, PoxConstants, BLOCKSTACK_MAGIC_MAINNET}; diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index d04b306bfd..1e994b89af 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -13,6 +13,8 @@ // You should have received a copy of the GNU General Public License // along with this program. If not, see . +use std::collections::{BTreeMap, HashMap}; +use std::ops::Bound::Included; use std::sync::atomic::AtomicBool; #[cfg(test)] use std::sync::LazyLock; @@ -38,8 +40,6 @@ use stacks::util::secp256k1::MessageSignature; use stacks::util_lib::boot::boot_code_id; #[cfg(test)] use stacks_common::util::tests::TestFlag; -use std::collections::{BTreeMap, HashMap}; -use std::ops::Bound::Included; use super::stackerdb_listener::StackerDBListenerComms; use super::Error as NakamotoNodeError; From 5c0805a73a7e55a187f9911745b7a335ce0d13ef Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Wed, 29 Jan 2025 15:58:01 +0000 Subject: [PATCH 14/38] fixed default rejections timeout --- stackslib/src/config/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/stackslib/src/config/mod.rs b/stackslib/src/config/mod.rs index 0e30de5998..dc79732486 100644 --- a/stackslib/src/config/mod.rs +++ b/stackslib/src/config/mod.rs @@ -2201,8 +2201,7 @@ impl Default for MinerConfig { rejections_timeouts_default_map.insert(0, Duration::from_secs(600)); rejections_timeouts_default_map.insert(10, Duration::from_secs(300)); rejections_timeouts_default_map.insert(20, Duration::from_secs(150)); - rejections_timeouts_default_map.insert(30, Duration::from_secs(60)); - rejections_timeouts_default_map.insert(31, Duration::from_secs(0)); + rejections_timeouts_default_map.insert(30, Duration::from_secs(0)); rejections_timeouts_default_map }, } From f22a9907ba62a76c5a807b677da6a396f1ed4eed Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Wed, 29 Jan 2025 15:59:32 +0000 Subject: [PATCH 15/38] improved comment for rejections timeout --- testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index 1e994b89af..a62efb9570 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -80,7 +80,7 @@ pub struct SignerCoordinator { /// Rather, this burn block is used to determine whether or not a new /// burn block has arrived since this thread started. burn_tip_at_start: ConsensusHash, - /// + /// The tiemout configuration based on the percentage of rejections block_rejection_timeout_steps: HashMap, } From 4f4b58481dc43ba540e87b4d66376323cb583531 Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Wed, 29 Jan 2025 16:05:18 +0000 Subject: [PATCH 16/38] fixed typo --- testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index a62efb9570..32c428534f 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -80,7 +80,7 @@ pub struct SignerCoordinator { /// Rather, this burn block is used to determine whether or not a new /// burn block has arrived since this thread started. burn_tip_at_start: ConsensusHash, - /// The tiemout configuration based on the percentage of rejections + /// The timeout configuration based on the percentage of rejections block_rejection_timeout_steps: HashMap, } From 463839f3f8a0ab12ffed52673522ec0f288801bd Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Wed, 29 Jan 2025 16:08:24 +0000 Subject: [PATCH 17/38] ensure 0 key is specified for rejections timeout --- stackslib/src/config/mod.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/stackslib/src/config/mod.rs b/stackslib/src/config/mod.rs index dc79732486..7897730d70 100644 --- a/stackslib/src/config/mod.rs +++ b/stackslib/src/config/mod.rs @@ -2749,9 +2749,12 @@ impl MinerConfigFile { for (slice, seconds) in block_rejection_timeout_items.iter() { match slice.parse::() { Ok(slice_slot) => rejection_timeout_durations.insert(slice_slot, Duration::from_secs(*seconds)), - Err(e) => panic!("block_rejection_timeout_items keys must be unsigned integers: {}", e) + Err(e) => panic!("block_rejection_timeout_steps keys must be unsigned integers: {}", e) }; } + if !rejection_timeout_durations.contains_key(&0) { + panic!("block_rejection_timeout_steps requires a definition for the '0' key/step"); + } rejection_timeout_durations } else{ miner_default_config.block_rejection_timeout_steps From 0e7461c8c24455028841c4d19198117b97fa707d Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Wed, 29 Jan 2025 16:23:48 +0000 Subject: [PATCH 18/38] message typo --- testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index 32c428534f..096f9eb74e 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -384,7 +384,7 @@ impl SignerCoordinator { "rejections_threshold" => self.total_weight.saturating_sub(self.weight_threshold) ); return Err(NakamotoNodeError::SigningCoordinatorFailure( - "Gave up while tried reaching the threshold".into(), + "Gave up while trying reaching the threshold".into(), )); } @@ -438,7 +438,7 @@ impl SignerCoordinator { "rejections_threshold" => self.total_weight.saturating_sub(self.weight_threshold) ); return Err(NakamotoNodeError::SigningCoordinatorFailure( - "Gave up while tried reaching the threshold".into(), + "Gave up while trying reaching the threshold".into(), )); } else { continue; From 420f861f7e6acc8cd9bd2d07899254198ec16a14 Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Thu, 30 Jan 2025 10:22:19 +0000 Subject: [PATCH 19/38] use Counters instead of global test vars --- .../src/nakamoto_node/signer_coordinator.rs | 17 ++-- testnet/stacks-node/src/run_loop/neon.rs | 7 +- testnet/stacks-node/src/tests/signer/mod.rs | 5 ++ testnet/stacks-node/src/tests/signer/v0.rs | 80 ++++++++++++++++--- 4 files changed, 86 insertions(+), 23 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index 096f9eb74e..4132bb4284 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -16,8 +16,6 @@ use std::collections::{BTreeMap, HashMap}; use std::ops::Bound::Included; use std::sync::atomic::AtomicBool; -#[cfg(test)] -use std::sync::LazyLock; use std::sync::{Arc, Mutex}; use std::thread::JoinHandle; use std::time::{Duration, Instant}; @@ -38,8 +36,6 @@ use stacks::types::chainstate::{StacksBlockId, StacksPrivateKey}; use stacks::util::hash::Sha512Trunc256Sum; use stacks::util::secp256k1::MessageSignature; use stacks::util_lib::boot::boot_code_id; -#[cfg(test)] -use stacks_common::util::tests::TestFlag; use super::stackerdb_listener::StackerDBListenerComms; use super::Error as NakamotoNodeError; @@ -50,11 +46,6 @@ use crate::nakamoto_node::stackerdb_listener::{ use crate::neon::Counters; use crate::Config; -#[cfg(test)] -/// Test-only value for storing the current rejection based timeout -pub static BLOCK_REJECTIONS_CURRENT_TIMEOUT: LazyLock> = - LazyLock::new(TestFlag::default); - /// The state of the signer database listener, used by the miner thread to /// interact with the signer listener. pub struct SignerCoordinator { @@ -410,7 +401,13 @@ impl SignerCoordinator { rejections_timer = Instant::now(); #[cfg(test)] - BLOCK_REJECTIONS_CURRENT_TIMEOUT.set(*rejections_timeout); + { + Counters::set( + &counters.naka_miner_current_rejections_timeout_secs, + rejections_timeout.as_secs(), + ); + Counters::set(&counters.naka_miner_current_rejections, rejections); + } } if block_status diff --git a/testnet/stacks-node/src/run_loop/neon.rs b/testnet/stacks-node/src/run_loop/neon.rs index deead51066..174b4d40a6 100644 --- a/testnet/stacks-node/src/run_loop/neon.rs +++ b/testnet/stacks-node/src/run_loop/neon.rs @@ -115,6 +115,11 @@ pub struct Counters { pub naka_signer_pushed_blocks: RunLoopCounter, pub naka_miner_directives: RunLoopCounter, + #[cfg(test)] + pub naka_miner_current_rejections: RunLoopCounter, + #[cfg(test)] + pub naka_miner_current_rejections_timeout_secs: RunLoopCounter, + #[cfg(test)] pub naka_skip_commit_op: TestFlag, } @@ -133,7 +138,7 @@ impl Counters { fn inc(_ctr: &RunLoopCounter) {} #[cfg(test)] - fn set(ctr: &RunLoopCounter, value: u64) { + pub fn set(ctr: &RunLoopCounter, value: u64) { ctr.0.store(value, Ordering::SeqCst); } diff --git a/testnet/stacks-node/src/tests/signer/mod.rs b/testnet/stacks-node/src/tests/signer/mod.rs index 47958e8690..67ad402a86 100644 --- a/testnet/stacks-node/src/tests/signer/mod.rs +++ b/testnet/stacks-node/src/tests/signer/mod.rs @@ -98,6 +98,7 @@ pub struct RunningNodes { pub nakamoto_test_skip_commit_op: TestFlag, pub coord_channel: Arc>, pub conf: NeonConfig, + pub counters: Counters, } /// A test harness for running a v0 or v1 signer integration test @@ -943,6 +944,9 @@ fn setup_stx_btc_node( } = run_loop.counters(); let coord_channel = run_loop.coordinator_channels(); + + let run_loop_counters = run_loop.counters(); + let run_loop_thread = thread::spawn(move || run_loop.start(None, 0)); // Give the run loop some time to start up! @@ -978,5 +982,6 @@ fn setup_stx_btc_node( nakamoto_miner_directives: naka_miner_directives.0, coord_channel, conf: naka_conf, + counters: run_loop_counters, } } diff --git a/testnet/stacks-node/src/tests/signer/v0.rs b/testnet/stacks-node/src/tests/signer/v0.rs index b603a887a6..ec818d34a0 100644 --- a/testnet/stacks-node/src/tests/signer/v0.rs +++ b/testnet/stacks-node/src/tests/signer/v0.rs @@ -76,7 +76,6 @@ use crate::event_dispatcher::{MinedNakamotoBlockEvent, TEST_SKIP_BLOCK_ANNOUNCEM use crate::nakamoto_node::miner::{ TEST_BLOCK_ANNOUNCE_STALL, TEST_BROADCAST_STALL, TEST_MINE_STALL, }; -use crate::nakamoto_node::signer_coordinator::BLOCK_REJECTIONS_CURRENT_TIMEOUT; use crate::nakamoto_node::stackerdb_listener::TEST_IGNORE_SIGNERS; use crate::neon::Counters; use crate::run_loop::boot_nakamoto; @@ -7877,9 +7876,23 @@ fn block_validation_check_rejection_timeout_heuristic() { .wait_for_block_rejections(timeout.as_secs(), &[all_signers[19]]) .unwrap(); - thread::sleep(Duration::from_secs(3)); - - assert_eq!(BLOCK_REJECTIONS_CURRENT_TIMEOUT.get().as_secs(), 123); + wait_for(60, || { + Ok(signer_test + .running_nodes + .counters + .naka_miner_current_rejections + .get() + >= 1) + }) + .unwrap(); + assert_eq!( + signer_test + .running_nodes + .counters + .naka_miner_current_rejections_timeout_secs + .get(), + 123 + ); info!("------------------------- Check Rejections-based timeout with 2 rejections -------------------------"); @@ -7899,9 +7912,23 @@ fn block_validation_check_rejection_timeout_heuristic() { .wait_for_block_rejections(timeout.as_secs(), &[all_signers[18], all_signers[19]]) .unwrap(); - thread::sleep(Duration::from_secs(3)); - - assert_eq!(BLOCK_REJECTIONS_CURRENT_TIMEOUT.get().as_secs(), 20); + wait_for(60, || { + Ok(signer_test + .running_nodes + .counters + .naka_miner_current_rejections + .get() + >= 2) + }) + .unwrap(); + assert_eq!( + signer_test + .running_nodes + .counters + .naka_miner_current_rejections_timeout_secs + .get(), + 20 + ); info!("------------------------- Check Rejections-based timeout with 3 rejections -------------------------"); @@ -7924,9 +7951,24 @@ fn block_validation_check_rejection_timeout_heuristic() { ) .unwrap(); - thread::sleep(Duration::from_secs(3)); + wait_for(60, || { + Ok(signer_test + .running_nodes + .counters + .naka_miner_current_rejections + .get() + >= 3) + }) + .unwrap(); - assert_eq!(BLOCK_REJECTIONS_CURRENT_TIMEOUT.get().as_secs(), 10); + assert_eq!( + signer_test + .running_nodes + .counters + .naka_miner_current_rejections_timeout_secs + .get(), + 10 + ); info!("------------------------- Check Rejections-based timeout with 4 rejections -------------------------"); @@ -7959,9 +8001,23 @@ fn block_validation_check_rejection_timeout_heuristic() { ) .unwrap(); - thread::sleep(Duration::from_secs(3)); - - assert_eq!(BLOCK_REJECTIONS_CURRENT_TIMEOUT.get().as_secs(), 99); + wait_for(60, || { + Ok(signer_test + .running_nodes + .counters + .naka_miner_current_rejections + .get() + >= 4) + }) + .unwrap(); + assert_eq!( + signer_test + .running_nodes + .counters + .naka_miner_current_rejections_timeout_secs + .get(), + 99 + ); // reset reject/ignore TEST_REJECT_ALL_BLOCK_PROPOSAL.set(vec![]); From 57f80fb79785e0994e565fb954e3ce628a9b3378 Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Thu, 30 Jan 2025 10:57:46 +0000 Subject: [PATCH 20/38] restored original wait_for_block_status --- .../src/nakamoto_node/signer_coordinator.rs | 18 ++++++++++--- .../src/nakamoto_node/stackerdb_listener.rs | 26 +++++++++---------- 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index 4132bb4284..8a05600771 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -331,12 +331,22 @@ impl SignerCoordinator { // Based on the amount of rejections, eventually modify the timeout. let block_status = match self.stackerdb_comms.wait_for_block_status( block_signer_sighash, - &mut block_status_tracker, - rejections_timer, - *rejections_timeout, EVENT_RECEIVER_POLL, + |status| { + if rejections_timer.elapsed() > *rejections_timeout { + return false; + } + if *status != block_status_tracker { + return false; + } + return true; + }, )? { - Some(status) => status, + Some(status) => { + // keep track of the last status + block_status_tracker = status.clone(); + status + } None => { // If we just received a timeout, we should check if the burnchain // tip has changed or if we received this signed block already in diff --git a/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs b/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs index 92688c0075..5ac31c60d1 100644 --- a/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs +++ b/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs @@ -480,32 +480,30 @@ impl StackerDBListenerComms { /// Get the status for `block` from the Stacker DB listener. /// If the block is not found in the map, return an error. - /// If the block is found, return it. + /// If the block is found, call `condition` to check if the block status + /// satisfies the condition. + /// If the condition is satisfied, return the block status as + /// `Ok(Some(status))`. + /// If the condition is not satisfied, wait for it to be satisfied. /// If the timeout is reached, return `Ok(None)`. - pub fn wait_for_block_status( + pub fn wait_for_block_status( &self, block_signer_sighash: &Sha512Trunc256Sum, - block_status_tracker: &mut BlockStatus, - rejections_timer: std::time::Instant, - rejections_timeout: Duration, timeout: Duration, - ) -> Result, NakamotoNodeError> { + condition: F, + ) -> Result, NakamotoNodeError> + where + F: Fn(&BlockStatus) -> bool, + { let (lock, cvar) = &*self.blocks; let blocks = lock.lock().expect("FATAL: failed to lock block status"); let (guard, timeout_result) = cvar .wait_timeout_while(blocks, timeout, |map| { - if rejections_timer.elapsed() > rejections_timeout { - return true; - } let Some(status) = map.get(block_signer_sighash) else { return true; }; - if status != block_status_tracker { - *block_status_tracker = status.clone(); - return false; - } - return true; + condition(status) }) .expect("FATAL: failed to wait on block status cond var"); From d5abfdc0daa73ba8aaf9b67b1bb61c9f0cc17cd4 Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Thu, 30 Jan 2025 11:00:53 +0000 Subject: [PATCH 21/38] removed empty line --- testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs b/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs index 5ac31c60d1..ddaa14a8fd 100644 --- a/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs +++ b/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs @@ -376,7 +376,6 @@ impl StackerDBListener { } }; block.responded_signers.insert(rejected_pubkey); - block.total_reject_weight = block .total_reject_weight .checked_add(signer_entry.weight) From 5269714c1438257d9a32aa57f2c84542864f3fed Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Thu, 30 Jan 2025 11:06:20 +0000 Subject: [PATCH 22/38] updated CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 226f7b5159..7d9ecb3b16 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to the versioning scheme outlined in the [README.md](RE ### Changed - Miner will include other transactions in blocks with tenure extend transactions (#5760) +- Add `block_rejection_timeout_steps` to miner configuration for defining rejections-based timeouts while waiting for signers response ## [3.1.0.0.4] From 5a70c4397446a873522a2f3bb2bfbc8b389a6f93 Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Thu, 30 Jan 2025 11:07:00 +0000 Subject: [PATCH 23/38] updated CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d9ecb3b16..57f940fa31 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ and this project adheres to the versioning scheme outlined in the [README.md](RE ### Changed - Miner will include other transactions in blocks with tenure extend transactions (#5760) -- Add `block_rejection_timeout_steps` to miner configuration for defining rejections-based timeouts while waiting for signers response +- Add `block_rejection_timeout_steps` to miner configuration for defining rejections-based timeouts while waiting for signers response (#5705) ## [3.1.0.0.4] From c424212db56a076fa482dabf920c45b4a10a6f30 Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Thu, 30 Jan 2025 11:13:17 +0000 Subject: [PATCH 24/38] Secp256k1PrivateKey::random() instrad of new() --- testnet/stacks-node/src/tests/signer/v0.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testnet/stacks-node/src/tests/signer/v0.rs b/testnet/stacks-node/src/tests/signer/v0.rs index 2c483c523c..2c98c11e67 100644 --- a/testnet/stacks-node/src/tests/signer/v0.rs +++ b/testnet/stacks-node/src/tests/signer/v0.rs @@ -7899,7 +7899,7 @@ fn block_validation_check_rejection_timeout_heuristic() { info!("------------------------- Test Setup -------------------------"); let num_signers = 20; let timeout = Duration::from_secs(30); - let sender_sk = Secp256k1PrivateKey::new(); + let sender_sk = Secp256k1PrivateKey::random(); let sender_addr = tests::to_addr(&sender_sk); let send_amt = 100; let send_fee = 180; From d71329003e3e697d2e36e9b279bef2d89954f8db Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Thu, 30 Jan 2025 15:30:52 +0000 Subject: [PATCH 25/38] removed useless test attributes --- .../src/nakamoto_node/signer_coordinator.rs | 13 +++++-------- testnet/stacks-node/src/run_loop/neon.rs | 4 +--- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index 2de14708f2..ae68c22cc5 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -410,14 +410,11 @@ impl SignerCoordinator { "rejections_threshold" => self.total_weight.saturating_sub(self.weight_threshold)); rejections_timer = Instant::now(); - #[cfg(test)] - { - Counters::set( - &counters.naka_miner_current_rejections_timeout_secs, - rejections_timeout.as_secs(), - ); - Counters::set(&counters.naka_miner_current_rejections, rejections); - } + Counters::set( + &counters.naka_miner_current_rejections_timeout_secs, + rejections_timeout.as_secs(), + ); + Counters::set(&counters.naka_miner_current_rejections, rejections); } if block_status diff --git a/testnet/stacks-node/src/run_loop/neon.rs b/testnet/stacks-node/src/run_loop/neon.rs index 89973aa9aa..01813b6812 100644 --- a/testnet/stacks-node/src/run_loop/neon.rs +++ b/testnet/stacks-node/src/run_loop/neon.rs @@ -117,9 +117,7 @@ pub struct Counters { pub naka_signer_pushed_blocks: RunLoopCounter, pub naka_miner_directives: RunLoopCounter, - #[cfg(test)] pub naka_miner_current_rejections: RunLoopCounter, - #[cfg(test)] pub naka_miner_current_rejections_timeout_secs: RunLoopCounter, #[cfg(test)] @@ -145,7 +143,7 @@ impl Counters { } #[cfg(not(test))] - fn set(_ctr: &RunLoopCounter, _value: u64) {} + pub fn set(_ctr: &RunLoopCounter, _value: u64) {} pub fn bump_blocks_processed(&self) { Counters::inc(&self.blocks_processed); From adaabe60570acb199e577c11c633eb97078af115 Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Thu, 30 Jan 2025 15:47:49 +0000 Subject: [PATCH 26/38] use u32 for block_rejection_timeout_steps keys --- stackslib/src/config/mod.rs | 8 ++++---- .../src/nakamoto_node/signer_coordinator.rs | 20 +++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/stackslib/src/config/mod.rs b/stackslib/src/config/mod.rs index 3fa2d3a005..b68e55bd03 100644 --- a/stackslib/src/config/mod.rs +++ b/stackslib/src/config/mod.rs @@ -2156,7 +2156,7 @@ pub struct MinerConfig { /// Duration to wait before attempting to issue a tenure extend pub tenure_timeout: Duration, /// Define the timeout to apply while waiting for signers responses, based on the amount of rejections - pub block_rejection_timeout_steps: HashMap, + pub block_rejection_timeout_steps: HashMap, } impl Default for MinerConfig { @@ -2196,7 +2196,7 @@ impl Default for MinerConfig { tenure_extend_poll_secs: Duration::from_secs(DEFAULT_TENURE_EXTEND_POLL_SECS), tenure_timeout: Duration::from_secs(DEFAULT_TENURE_TIMEOUT_SECS), block_rejection_timeout_steps: { - let mut rejections_timeouts_default_map = HashMap::::new(); + let mut rejections_timeouts_default_map = HashMap::::new(); rejections_timeouts_default_map.insert(0, Duration::from_secs(600)); rejections_timeouts_default_map.insert(10, Duration::from_secs(300)); rejections_timeouts_default_map.insert(20, Duration::from_secs(150)); @@ -2744,9 +2744,9 @@ impl MinerConfigFile { tenure_timeout: self.tenure_timeout_secs.map(Duration::from_secs).unwrap_or(miner_default_config.tenure_timeout), block_rejection_timeout_steps: { if let Some(block_rejection_timeout_items) = self.block_rejection_timeout_steps { - let mut rejection_timeout_durations = HashMap::::new(); + let mut rejection_timeout_durations = HashMap::::new(); for (slice, seconds) in block_rejection_timeout_items.iter() { - match slice.parse::() { + match slice.parse::() { Ok(slice_slot) => rejection_timeout_durations.insert(slice_slot, Duration::from_secs(*seconds)), Err(e) => panic!("block_rejection_timeout_steps keys must be unsigned integers: {}", e) }; diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index ae68c22cc5..1e2dba7f05 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -72,7 +72,7 @@ pub struct SignerCoordinator { /// burn block has arrived since this thread started. burn_tip_at_start: ConsensusHash, /// The timeout configuration based on the percentage of rejections - block_rejection_timeout_steps: HashMap, + block_rejection_timeout_steps: HashMap, } impl SignerCoordinator { @@ -305,21 +305,21 @@ impl SignerCoordinator { let mut block_rejection_timeout_steps = BTreeMap::::new(); for (percentage, duration) in self.block_rejection_timeout_steps.iter() { let rejections_amount = - ((self.total_weight as f64 / 100.0) * *percentage as f64) as u64; + ((f64::from(self.total_weight) / 100.0) * f64::from(*percentage)) as u64; block_rejection_timeout_steps.insert(rejections_amount, *duration); } // the amount of current rejections (used to eventually modify the timeout) let mut rejections: u64 = 0; // default timeout (the 0 entry must be always present) - let mut rejections_timeout = self - .block_rejection_timeout_steps - .get(&rejections) - .ok_or_else(|| { - NakamotoNodeError::SigningCoordinatorFailure( - "Invalid rejection timeout step function definition".into(), - ) - })?; + let mut rejections_timeout = + block_rejection_timeout_steps + .get(&rejections) + .ok_or_else(|| { + NakamotoNodeError::SigningCoordinatorFailure( + "Invalid rejection timeout step function definition".into(), + ) + })?; // this is used for comparing block_status to identify if it has been changed from the previous event let mut block_status_tracker = BlockStatus::default(); From 7cfbfddb8b0034e40b9fd4edf766655d76260868 Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Thu, 30 Jan 2025 15:54:04 +0000 Subject: [PATCH 27/38] refactored block_rejection_timeout_steps percentage setup --- .../src/nakamoto_node/signer_coordinator.rs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index 1e2dba7f05..dfddd0a7b2 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -279,12 +279,21 @@ impl SignerCoordinator { } } + // build a BTreeMap of the various timeout steps + let mut block_rejection_timeout_steps = BTreeMap::::new(); + for (percentage, duration) in self.block_rejection_timeout_steps.iter() { + let rejections_amount = + ((f64::from(self.total_weight) / 100.0) * f64::from(*percentage)) as u64; + block_rejection_timeout_steps.insert(rejections_amount, *duration); + } + self.get_block_status( &block.header.signer_signature_hash(), &block.block_id(), chain_state, sortdb, counters, + &block_rejection_timeout_steps, ) } @@ -300,15 +309,8 @@ impl SignerCoordinator { chain_state: &mut StacksChainState, sortdb: &SortitionDB, counters: &Counters, + block_rejection_timeout_steps: &BTreeMap, ) -> Result, NakamotoNodeError> { - // build a BTreeMap of the various timeout steps - let mut block_rejection_timeout_steps = BTreeMap::::new(); - for (percentage, duration) in self.block_rejection_timeout_steps.iter() { - let rejections_amount = - ((f64::from(self.total_weight) / 100.0) * f64::from(*percentage)) as u64; - block_rejection_timeout_steps.insert(rejections_amount, *duration); - } - // the amount of current rejections (used to eventually modify the timeout) let mut rejections: u64 = 0; // default timeout (the 0 entry must be always present) From d4171d7cce23cee936319a97fe40b965b90470c4 Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Thu, 30 Jan 2025 16:05:10 +0000 Subject: [PATCH 28/38] do not reset rejections_timer on block_status updates --- testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index dfddd0a7b2..eb213710e2 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -326,7 +326,7 @@ impl SignerCoordinator { let mut block_status_tracker = BlockStatus::default(); // this is used to track the start of the waiting cycle - let mut rejections_timer = Instant::now(); + let rejections_timer = Instant::now(); loop { // At every iteration wait for the block_status. // Exit when the amount of confirmations/rejections reaches the threshold (or until timeout) @@ -410,7 +410,6 @@ impl SignerCoordinator { "rejections" => rejections, "rejections_timeout" => rejections_timeout.as_secs(), "rejections_threshold" => self.total_weight.saturating_sub(self.weight_threshold)); - rejections_timer = Instant::now(); Counters::set( &counters.naka_miner_current_rejections_timeout_secs, From fbc3b7d34124acbc2b3d9ced95dacf5a475c22b5 Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Thu, 30 Jan 2025 17:38:08 +0000 Subject: [PATCH 29/38] moved block_rejection_timeout_steps computation directly in SignerCoordinator::new() --- .../src/nakamoto_node/signer_coordinator.rs | 43 +++++++++---------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index eb213710e2..1fdb609ad1 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -13,7 +13,7 @@ // You should have received a copy of the GNU General Public License // along with this program. If not, see . -use std::collections::{BTreeMap, HashMap}; +use std::collections::BTreeMap; use std::ops::Bound::Included; use std::sync::atomic::AtomicBool; use std::sync::{Arc, Mutex}; @@ -72,7 +72,7 @@ pub struct SignerCoordinator { /// burn block has arrived since this thread started. burn_tip_at_start: ConsensusHash, /// The timeout configuration based on the percentage of rejections - block_rejection_timeout_steps: HashMap, + block_rejection_timeout_steps: BTreeMap, } impl SignerCoordinator { @@ -108,6 +108,14 @@ impl SignerCoordinator { let miners_contract_id = boot_code_id(MINERS_NAME, is_mainnet); let miners_session = StackerDBSession::new(&rpc_socket.to_string(), miners_contract_id); + // build a BTreeMap of the various timeout steps + let mut block_rejection_timeout_steps = BTreeMap::::new(); + for (percentage, duration) in config.miner.block_rejection_timeout_steps.iter() { + let rejections_amount = + ((f64::from(listener.total_weight) / 100.0) * f64::from(*percentage)) as u64; + block_rejection_timeout_steps.insert(rejections_amount, *duration); + } + let mut sc = Self { message_key, is_mainnet, @@ -118,7 +126,7 @@ impl SignerCoordinator { keep_running, listener_thread: None, burn_tip_at_start: burn_tip_at_start.clone(), - block_rejection_timeout_steps: config.miner.block_rejection_timeout_steps.clone(), + block_rejection_timeout_steps, }; // Spawn the signer DB listener thread @@ -279,21 +287,12 @@ impl SignerCoordinator { } } - // build a BTreeMap of the various timeout steps - let mut block_rejection_timeout_steps = BTreeMap::::new(); - for (percentage, duration) in self.block_rejection_timeout_steps.iter() { - let rejections_amount = - ((f64::from(self.total_weight) / 100.0) * f64::from(*percentage)) as u64; - block_rejection_timeout_steps.insert(rejections_amount, *duration); - } - self.get_block_status( &block.header.signer_signature_hash(), &block.block_id(), chain_state, sortdb, counters, - &block_rejection_timeout_steps, ) } @@ -309,19 +308,18 @@ impl SignerCoordinator { chain_state: &mut StacksChainState, sortdb: &SortitionDB, counters: &Counters, - block_rejection_timeout_steps: &BTreeMap, ) -> Result, NakamotoNodeError> { // the amount of current rejections (used to eventually modify the timeout) let mut rejections: u64 = 0; // default timeout (the 0 entry must be always present) - let mut rejections_timeout = - block_rejection_timeout_steps - .get(&rejections) - .ok_or_else(|| { - NakamotoNodeError::SigningCoordinatorFailure( - "Invalid rejection timeout step function definition".into(), - ) - })?; + let mut rejections_timeout = self + .block_rejection_timeout_steps + .get(&rejections) + .ok_or_else(|| { + NakamotoNodeError::SigningCoordinatorFailure( + "Invalid rejection timeout step function definition".into(), + ) + })?; // this is used for comparing block_status to identify if it has been changed from the previous event let mut block_status_tracker = BlockStatus::default(); @@ -397,7 +395,8 @@ impl SignerCoordinator { if rejections != block_status.total_reject_weight as u64 { rejections = block_status.total_reject_weight as u64; - let rejections_timeout_tuple = block_rejection_timeout_steps + let rejections_timeout_tuple = self + .block_rejection_timeout_steps .range((Included(0), Included(rejections))) .last() .ok_or_else(|| { From 790c1e1db2bc85accccb4ba0b1fa500de2a30fdb Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Fri, 31 Jan 2025 10:39:36 +0000 Subject: [PATCH 30/38] merged with develop, reintroduces old logic for condition variables, improved rejections comparison --- .../src/nakamoto_node/signer_coordinator.rs | 19 +++++++------------ .../src/nakamoto_node/stackerdb_listener.rs | 16 ++++++++++++---- testnet/stacks-node/src/tests/signer/mod.rs | 4 ---- 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index 1fdb609ad1..ec827033ba 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -40,9 +40,7 @@ use stacks::util_lib::boot::boot_code_id; use super::stackerdb_listener::StackerDBListenerComms; use super::Error as NakamotoNodeError; use crate::event_dispatcher::StackerDBChannel; -use crate::nakamoto_node::stackerdb_listener::{ - BlockStatus, StackerDBListener, EVENT_RECEIVER_POLL, -}; +use crate::nakamoto_node::stackerdb_listener::{StackerDBListener, EVENT_RECEIVER_POLL}; use crate::neon::Counters; use crate::Config; @@ -320,8 +318,6 @@ impl SignerCoordinator { "Invalid rejection timeout step function definition".into(), ) })?; - // this is used for comparing block_status to identify if it has been changed from the previous event - let mut block_status_tracker = BlockStatus::default(); // this is used to track the start of the waiting cycle let rejections_timer = Instant::now(); @@ -333,20 +329,19 @@ impl SignerCoordinator { block_signer_sighash, EVENT_RECEIVER_POLL, |status| { + // rejections-based timeout expired? if rejections_timer.elapsed() > *rejections_timeout { return false; } - if *status != block_status_tracker { + // number or rejections changed? + if status.total_reject_weight as u64 != rejections { return false; } - return true; + // enough signatures? + return status.total_weight_signed < self.weight_threshold; }, )? { - Some(status) => { - // keep track of the last status - block_status_tracker = status.clone(); - status - } + Some(status) => status, None => { // If we just received a timeout, we should check if the burnchain // tip has changed or if we received this signed block already in diff --git a/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs b/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs index ddaa14a8fd..cf9ce4b6d7 100644 --- a/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs +++ b/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs @@ -337,8 +337,10 @@ impl StackerDBListener { block.gathered_signatures.insert(slot_id, signature); block.responded_signers.insert(signer_pubkey); - // Signal to anyone waiting on this block that we have a new status - cvar.notify_all(); + if block.total_weight_signed >= self.weight_threshold { + // Signal to anyone waiting on this block that we have enough signatures + cvar.notify_all(); + } // Update the idle timestamp for this signer self.update_idle_timestamp( @@ -394,8 +396,14 @@ impl StackerDBListener { "server_version" => rejected_data.metadata.server_version, ); - // Signal to anyone waiting on this block that we have a new status - cvar.notify_all(); + if block + .total_reject_weight + .saturating_add(self.weight_threshold) + > self.total_weight + { + // Signal to anyone waiting on this block that we have enough rejections + cvar.notify_all(); + } // Update the idle timestamp for this signer self.update_idle_timestamp( diff --git a/testnet/stacks-node/src/tests/signer/mod.rs b/testnet/stacks-node/src/tests/signer/mod.rs index 836a9eee0a..a68a4c77fb 100644 --- a/testnet/stacks-node/src/tests/signer/mod.rs +++ b/testnet/stacks-node/src/tests/signer/mod.rs @@ -86,7 +86,6 @@ pub struct RunningNodes { pub counters: Counters, pub coord_channel: Arc>, pub conf: NeonConfig, - pub counters: Counters, } /// A test harness for running a v0 or v1 signer integration test @@ -939,8 +938,6 @@ fn setup_stx_btc_node( let coord_channel = run_loop.coordinator_channels(); - let run_loop_counters = run_loop.counters(); - let run_loop_thread = thread::spawn(move || run_loop.start(None, 0)); // Give the run loop some time to start up! @@ -978,6 +975,5 @@ fn setup_stx_btc_node( coord_channel, counters, conf: naka_conf, - counters: run_loop_counters, } } From 54aed16067a48c71dada0cd1fa8067d0866b143a Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Fri, 31 Jan 2025 10:41:03 +0000 Subject: [PATCH 31/38] rollback BlockStatus Default and PartialEq --- testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs b/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs index cf9ce4b6d7..834c59fa95 100644 --- a/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs +++ b/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs @@ -50,7 +50,7 @@ pub static TEST_IGNORE_SIGNERS: LazyLock> = LazyLock::new(TestFla /// waking up to check timeouts? pub static EVENT_RECEIVER_POLL: Duration = Duration::from_millis(500); -#[derive(Debug, Clone, Default, PartialEq)] +#[derive(Debug, Clone)] pub struct BlockStatus { pub responded_signers: HashSet, pub gathered_signatures: BTreeMap, From eb883ee9eef299631e8adda64d4c5a05de804c5a Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Fri, 31 Jan 2025 15:42:24 +0000 Subject: [PATCH 32/38] improved error messages on timeout --- testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index ec827033ba..4adba66be1 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -380,7 +380,7 @@ impl SignerCoordinator { "rejections_threshold" => self.total_weight.saturating_sub(self.weight_threshold) ); return Err(NakamotoNodeError::SigningCoordinatorFailure( - "Gave up while trying reaching the threshold".into(), + "Timed out while waiting for signatures".into(), )); } @@ -437,7 +437,7 @@ impl SignerCoordinator { "rejections_threshold" => self.total_weight.saturating_sub(self.weight_threshold) ); return Err(NakamotoNodeError::SigningCoordinatorFailure( - "Gave up while trying reaching the threshold".into(), + "Timed out while waiting for signatures".into(), )); } else { continue; From fe429f472fe3e99b6cf1909fc5574262109fd36c Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Fri, 31 Jan 2025 17:56:55 +0000 Subject: [PATCH 33/38] report rejection step in miner log, ensure signe_test shutdown in integration test --- testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs | 5 +++-- testnet/stacks-node/src/tests/signer/v0.rs | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index 4adba66be1..7a778580d7 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -390,7 +390,7 @@ impl SignerCoordinator { if rejections != block_status.total_reject_weight as u64 { rejections = block_status.total_reject_weight as u64; - let rejections_timeout_tuple = self + let (rejections_step, new_rejections_timeout) = self .block_rejection_timeout_steps .range((Included(0), Included(rejections))) .last() @@ -399,10 +399,11 @@ impl SignerCoordinator { "Invalid rejection timeout step function definition".into(), ) })?; - rejections_timeout = rejections_timeout_tuple.1; + rejections_timeout = new_rejections_timeout; info!("Number of received rejections updated, resetting timeout"; "rejections" => rejections, "rejections_timeout" => rejections_timeout.as_secs(), + "rejections_step" => rejections_step, "rejections_threshold" => self.total_weight.saturating_sub(self.weight_threshold)); Counters::set( diff --git a/testnet/stacks-node/src/tests/signer/v0.rs b/testnet/stacks-node/src/tests/signer/v0.rs index 9fa4fac3c0..e055ad82f2 100644 --- a/testnet/stacks-node/src/tests/signer/v0.rs +++ b/testnet/stacks-node/src/tests/signer/v0.rs @@ -8111,6 +8111,9 @@ fn block_validation_check_rejection_timeout_heuristic() { // reset reject/ignore TEST_REJECT_ALL_BLOCK_PROPOSAL.set(vec![]); TEST_IGNORE_ALL_BLOCK_PROPOSALS.set(vec![]); + + info!("------------------------- Shutdown -------------------------"); + signer_test.shutdown(); } /// Test scenario: From 6a238a76228c16c3a15e91a6d1352175299b5862 Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Fri, 31 Jan 2025 18:09:48 +0000 Subject: [PATCH 34/38] added set_miner_current_rejections_timeout and set_miner_current_rejections to Counters struct --- .../src/nakamoto_node/signer_coordinator.rs | 7 ++----- testnet/stacks-node/src/run_loop/neon.rs | 12 ++++++++++-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index 7a778580d7..97b22ef68e 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -406,11 +406,8 @@ impl SignerCoordinator { "rejections_step" => rejections_step, "rejections_threshold" => self.total_weight.saturating_sub(self.weight_threshold)); - Counters::set( - &counters.naka_miner_current_rejections_timeout_secs, - rejections_timeout.as_secs(), - ); - Counters::set(&counters.naka_miner_current_rejections, rejections); + counters.set_miner_current_rejections_timeout(rejections_timeout.as_secs()); + counters.set_miner_current_rejections(rejections); } if block_status diff --git a/testnet/stacks-node/src/run_loop/neon.rs b/testnet/stacks-node/src/run_loop/neon.rs index 1d372f1051..3ef3e45ccb 100644 --- a/testnet/stacks-node/src/run_loop/neon.rs +++ b/testnet/stacks-node/src/run_loop/neon.rs @@ -139,12 +139,12 @@ impl Counters { fn inc(_ctr: &RunLoopCounter) {} #[cfg(test)] - pub fn set(ctr: &RunLoopCounter, value: u64) { + fn set(ctr: &RunLoopCounter, value: u64) { ctr.0.store(value, Ordering::SeqCst); } #[cfg(not(test))] - pub fn set(_ctr: &RunLoopCounter, _value: u64) {} + fn set(_ctr: &RunLoopCounter, _value: u64) {} pub fn bump_blocks_processed(&self) { Counters::inc(&self.blocks_processed); @@ -217,6 +217,14 @@ impl Counters { pub fn set_microblocks_processed(&self, value: u64) { Counters::set(&self.microblocks_processed, value) } + + pub fn set_miner_current_rejections_timeout(&self, value: u64) { + Counters::set(&self.naka_miner_current_rejections_timeout_secs, value) + } + + pub fn set_miner_current_rejections(&self, value: u64) { + Counters::set(&self.naka_miner_current_rejections, value) + } } /// Coordinating a node running in neon mode. From 8d2bb0d80db3210a44730d3252ffe0197a2e8f37 Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Sat, 1 Feb 2025 10:54:30 +0100 Subject: [PATCH 35/38] refactored block_validation_check_rejection_timeout_heuristic test --- testnet/stacks-node/src/tests/signer/v0.rs | 183 ++++----------------- 1 file changed, 33 insertions(+), 150 deletions(-) diff --git a/testnet/stacks-node/src/tests/signer/v0.rs b/testnet/stacks-node/src/tests/signer/v0.rs index e055ad82f2..26f95d9ce0 100644 --- a/testnet/stacks-node/src/tests/signer/v0.rs +++ b/testnet/stacks-node/src/tests/signer/v0.rs @@ -7947,166 +7947,49 @@ fn block_validation_check_rejection_timeout_heuristic() { // note we just use mined nakamoto_blocks as the second block is not going to be confirmed - info!("------------------------- Check Rejections-based timeout with 1 rejection -------------------------"); + let mut test_rejections = |signer_split_index: usize, expected_timeout: u64| { + let blocks_before = test_observer::get_mined_nakamoto_blocks().len(); + let (ignore_signers, reject_signers) = all_signers.split_at(signer_split_index); - let blocks_before = test_observer::get_mined_nakamoto_blocks().len(); + info!("------------------------- Check Rejections-based timeout with {} rejections -------------------------", reject_signers.len()); - TEST_REJECT_ALL_BLOCK_PROPOSAL.set(vec![all_signers[19]]); - TEST_IGNORE_ALL_BLOCK_PROPOSALS.set(all_signers[0..19].to_vec()); + TEST_REJECT_ALL_BLOCK_PROPOSAL.set(reject_signers.to_vec()); + TEST_IGNORE_ALL_BLOCK_PROPOSALS.set(ignore_signers.to_vec()); - next_block_and( - &mut signer_test.running_nodes.btc_regtest_controller, - 30, - || Ok(test_observer::get_mined_nakamoto_blocks().len() > blocks_before), - ) - .unwrap(); - - signer_test - .wait_for_block_rejections(timeout.as_secs(), &[all_signers[19]]) - .unwrap(); - - wait_for(60, || { - Ok(signer_test - .running_nodes - .counters - .naka_miner_current_rejections - .get() - >= 1) - }) - .unwrap(); - assert_eq!( - signer_test - .running_nodes - .counters - .naka_miner_current_rejections_timeout_secs - .get(), - 123 - ); - - info!("------------------------- Check Rejections-based timeout with 2 rejections -------------------------"); - - let blocks_before = test_observer::get_mined_nakamoto_blocks().len(); - - TEST_REJECT_ALL_BLOCK_PROPOSAL.set(vec![all_signers[18], all_signers[19]]); - TEST_IGNORE_ALL_BLOCK_PROPOSALS.set(all_signers[0..18].to_vec()); - - next_block_and( - &mut signer_test.running_nodes.btc_regtest_controller, - 30, - || Ok(test_observer::get_mined_nakamoto_blocks().len() > blocks_before), - ) - .unwrap(); - - signer_test - .wait_for_block_rejections(timeout.as_secs(), &[all_signers[18], all_signers[19]]) - .unwrap(); - - wait_for(60, || { - Ok(signer_test - .running_nodes - .counters - .naka_miner_current_rejections - .get() - >= 2) - }) - .unwrap(); - assert_eq!( - signer_test - .running_nodes - .counters - .naka_miner_current_rejections_timeout_secs - .get(), - 20 - ); - - info!("------------------------- Check Rejections-based timeout with 3 rejections -------------------------"); - - let blocks_before = test_observer::get_mined_nakamoto_blocks().len(); - - TEST_REJECT_ALL_BLOCK_PROPOSAL.set(vec![all_signers[17], all_signers[18], all_signers[19]]); - TEST_IGNORE_ALL_BLOCK_PROPOSALS.set(all_signers[0..17].to_vec()); - - next_block_and( - &mut signer_test.running_nodes.btc_regtest_controller, - 30, - || Ok(test_observer::get_mined_nakamoto_blocks().len() > blocks_before), - ) - .unwrap(); - - signer_test - .wait_for_block_rejections( - timeout.as_secs(), - &[all_signers[17], all_signers[18], all_signers[19]], + next_block_and( + &mut signer_test.running_nodes.btc_regtest_controller, + 30, + || Ok(test_observer::get_mined_nakamoto_blocks().len() > blocks_before), ) .unwrap(); - wait_for(60, || { - Ok(signer_test - .running_nodes - .counters - .naka_miner_current_rejections - .get() - >= 3) - }) - .unwrap(); - - assert_eq!( signer_test - .running_nodes - .counters - .naka_miner_current_rejections_timeout_secs - .get(), - 10 - ); - - info!("------------------------- Check Rejections-based timeout with 4 rejections -------------------------"); - - let blocks_before = test_observer::get_mined_nakamoto_blocks().len(); - - TEST_REJECT_ALL_BLOCK_PROPOSAL.set(vec![ - all_signers[16], - all_signers[17], - all_signers[18], - all_signers[19], - ]); - TEST_IGNORE_ALL_BLOCK_PROPOSALS.set(all_signers[0..16].to_vec()); - - next_block_and( - &mut signer_test.running_nodes.btc_regtest_controller, - 30, - || Ok(test_observer::get_mined_nakamoto_blocks().len() > blocks_before), - ) - .unwrap(); + .wait_for_block_rejections(timeout.as_secs(), &reject_signers) + .unwrap(); - signer_test - .wait_for_block_rejections( - timeout.as_secs(), - &[ - all_signers[16], - all_signers[17], - all_signers[18], - all_signers[19], - ], - ) + wait_for(60, || { + Ok(signer_test + .running_nodes + .counters + .naka_miner_current_rejections + .get() + >= reject_signers.len() as u64) + }) .unwrap(); + assert_eq!( + signer_test + .running_nodes + .counters + .naka_miner_current_rejections_timeout_secs + .get(), + expected_timeout + ); + }; - wait_for(60, || { - Ok(signer_test - .running_nodes - .counters - .naka_miner_current_rejections - .get() - >= 4) - }) - .unwrap(); - assert_eq!( - signer_test - .running_nodes - .counters - .naka_miner_current_rejections_timeout_secs - .get(), - 99 - ); + test_rejections(19, 123); + test_rejections(18, 20); + test_rejections(17, 10); + test_rejections(16, 99); // reset reject/ignore TEST_REJECT_ALL_BLOCK_PROPOSAL.set(vec![]); From 151c844734122174e75a0f801d2d0f28c4085a08 Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Sat, 1 Feb 2025 11:00:18 +0100 Subject: [PATCH 36/38] use From based cast for u32->u64 --- testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index 97b22ef68e..ff08633ac6 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -334,7 +334,7 @@ impl SignerCoordinator { return false; } // number or rejections changed? - if status.total_reject_weight as u64 != rejections { + if u64::from(status.total_reject_weight) != rejections { return false; } // enough signatures? @@ -388,8 +388,8 @@ impl SignerCoordinator { } }; - if rejections != block_status.total_reject_weight as u64 { - rejections = block_status.total_reject_weight as u64; + if rejections != u64::from(block_status.total_reject_weight) { + rejections = u64::from(block_status.total_reject_weight); let (rejections_step, new_rejections_timeout) = self .block_rejection_timeout_steps .range((Included(0), Included(rejections))) From f66c8944082a0040b3bb399a9f676a9f07497dff Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Sat, 1 Feb 2025 11:08:20 +0100 Subject: [PATCH 37/38] removed useless line --- testnet/stacks-node/src/tests/signer/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/testnet/stacks-node/src/tests/signer/mod.rs b/testnet/stacks-node/src/tests/signer/mod.rs index 5f41ff816a..6b355fe5aa 100644 --- a/testnet/stacks-node/src/tests/signer/mod.rs +++ b/testnet/stacks-node/src/tests/signer/mod.rs @@ -936,7 +936,6 @@ fn setup_stx_btc_node( let counters = run_loop.counters(); let coord_channel = run_loop.coordinator_channels(); - let run_loop_thread = thread::spawn(move || run_loop.start(None, 0)); // Give the run loop some time to start up! From 400c806aaa812d4565b94d57be60527040fb31e0 Mon Sep 17 00:00:00 2001 From: Roberto De Ioris Date: Mon, 3 Feb 2025 08:26:54 +0100 Subject: [PATCH 38/38] use u32 for rejections counter --- .../src/nakamoto_node/signer_coordinator.rs | 16 ++++++++-------- testnet/stacks-node/src/run_loop/neon.rs | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index ff08633ac6..2138b7e767 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -70,7 +70,7 @@ pub struct SignerCoordinator { /// burn block has arrived since this thread started. burn_tip_at_start: ConsensusHash, /// The timeout configuration based on the percentage of rejections - block_rejection_timeout_steps: BTreeMap, + block_rejection_timeout_steps: BTreeMap, } impl SignerCoordinator { @@ -107,10 +107,10 @@ impl SignerCoordinator { let miners_session = StackerDBSession::new(&rpc_socket.to_string(), miners_contract_id); // build a BTreeMap of the various timeout steps - let mut block_rejection_timeout_steps = BTreeMap::::new(); + let mut block_rejection_timeout_steps = BTreeMap::::new(); for (percentage, duration) in config.miner.block_rejection_timeout_steps.iter() { let rejections_amount = - ((f64::from(listener.total_weight) / 100.0) * f64::from(*percentage)) as u64; + ((f64::from(listener.total_weight) / 100.0) * f64::from(*percentage)) as u32; block_rejection_timeout_steps.insert(rejections_amount, *duration); } @@ -308,7 +308,7 @@ impl SignerCoordinator { counters: &Counters, ) -> Result, NakamotoNodeError> { // the amount of current rejections (used to eventually modify the timeout) - let mut rejections: u64 = 0; + let mut rejections: u32 = 0; // default timeout (the 0 entry must be always present) let mut rejections_timeout = self .block_rejection_timeout_steps @@ -334,7 +334,7 @@ impl SignerCoordinator { return false; } // number or rejections changed? - if u64::from(status.total_reject_weight) != rejections { + if status.total_reject_weight != rejections { return false; } // enough signatures? @@ -388,8 +388,8 @@ impl SignerCoordinator { } }; - if rejections != u64::from(block_status.total_reject_weight) { - rejections = u64::from(block_status.total_reject_weight); + if rejections != block_status.total_reject_weight { + rejections = block_status.total_reject_weight; let (rejections_step, new_rejections_timeout) = self .block_rejection_timeout_steps .range((Included(0), Included(rejections))) @@ -406,7 +406,7 @@ impl SignerCoordinator { "rejections_step" => rejections_step, "rejections_threshold" => self.total_weight.saturating_sub(self.weight_threshold)); - counters.set_miner_current_rejections_timeout(rejections_timeout.as_secs()); + counters.set_miner_current_rejections_timeout_secs(rejections_timeout.as_secs()); counters.set_miner_current_rejections(rejections); } diff --git a/testnet/stacks-node/src/run_loop/neon.rs b/testnet/stacks-node/src/run_loop/neon.rs index 3ef3e45ccb..299335f35f 100644 --- a/testnet/stacks-node/src/run_loop/neon.rs +++ b/testnet/stacks-node/src/run_loop/neon.rs @@ -218,12 +218,12 @@ impl Counters { Counters::set(&self.microblocks_processed, value) } - pub fn set_miner_current_rejections_timeout(&self, value: u64) { + pub fn set_miner_current_rejections_timeout_secs(&self, value: u64) { Counters::set(&self.naka_miner_current_rejections_timeout_secs, value) } - pub fn set_miner_current_rejections(&self, value: u64) { - Counters::set(&self.naka_miner_current_rejections, value) + pub fn set_miner_current_rejections(&self, value: u32) { + Counters::set(&self.naka_miner_current_rejections, u64::from(value)) } }