Skip to content
This repository has been archived by the owner on Jan 22, 2025. It is now read-only.

Fixup flaky tests #21617

Merged
merged 2 commits into from
Dec 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 60 additions & 23 deletions local-cluster/src/local_cluster.rs
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ impl LocalCluster {
let mut listener_config = safe_clone_config(&config.validator_configs[0]);
listener_config.voting_disabled = true;
(0..config.num_listeners).for_each(|_| {
cluster.add_validator(
cluster.add_validator_listener(
&listener_config,
0,
Arc::new(Keypair::new()),
Expand Down Expand Up @@ -324,11 +324,50 @@ impl LocalCluster {
}
}

/// Set up validator without voting or staking accounts
pub fn add_validator_listener(
&mut self,
validator_config: &ValidatorConfig,
stake: u64,
validator_keypair: Arc<Keypair>,
voting_keypair: Option<Arc<Keypair>>,
socket_addr_space: SocketAddrSpace,
) -> Pubkey {
self.do_add_validator(
validator_config,
true,
stake,
validator_keypair,
voting_keypair,
socket_addr_space,
)
}

/// Set up validator with voting and staking accounts
pub fn add_validator(
&mut self,
validator_config: &ValidatorConfig,
stake: u64,
validator_keypair: Arc<Keypair>,
voting_keypair: Option<Arc<Keypair>>,
socket_addr_space: SocketAddrSpace,
) -> Pubkey {
self.do_add_validator(
validator_config,
false,
stake,
validator_keypair,
voting_keypair,
socket_addr_space,
)
}

fn do_add_validator(
&mut self,
validator_config: &ValidatorConfig,
is_listener: bool,
stake: u64,
validator_keypair: Arc<Keypair>,
mut voting_keypair: Option<Arc<Keypair>>,
socket_addr_space: SocketAddrSpace,
) -> Pubkey {
Expand All @@ -347,30 +386,28 @@ impl LocalCluster {
let contact_info = validator_node.info.clone();
let (ledger_path, _blockhash) = create_new_tmp_ledger!(&self.genesis_config);

if validator_config.voting_disabled {
// Give the validator some lamports to setup vote accounts
if is_listener {
// setup as a listener
info!("listener {} ", validator_pubkey,);
} else {
// Give the validator some lamports to setup vote accounts
if should_create_vote_pubkey {
let validator_balance = Self::transfer_with_client(
&client,
&self.funding_keypair,
&validator_pubkey,
stake * 2 + 2,
);
info!(
"validator {} balance {}",
validator_pubkey, validator_balance
);
Self::setup_vote_and_stake_accounts(
&client,
voting_keypair.as_ref().unwrap(),
&validator_keypair,
stake,
)
.unwrap();
}
} else if should_create_vote_pubkey {
let validator_balance = Self::transfer_with_client(
&client,
&self.funding_keypair,
&validator_pubkey,
stake * 2 + 2,
);
info!(
"validator {} balance {}",
validator_pubkey, validator_balance
);
Self::setup_vote_and_stake_accounts(
&client,
voting_keypair.as_ref().unwrap(),
&validator_keypair,
stake,
)
.unwrap();
}

let mut config = safe_clone_config(validator_config);
Expand Down
69 changes: 54 additions & 15 deletions local-cluster/tests/local_cluster.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3190,13 +3190,27 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b
let (validator_a_pubkey, validator_b_pubkey, validator_c_pubkey) =
(validators[0], validators[1], validators[2]);

// Disable voting on all validators other than validator B to ensure neither of the below two
// scenarios occur:
// 1. If the cluster immediately forks on restart while we're killing validators A and C,
// with Validator B on one side, and `A` and `C` on a heavier fork, it's possible that the lockouts
// on `A` and `C`'s latest votes do not extend past validator B's latest vote. Then validator B
// will be stuck unable to vote, but also unable generate a switching proof to the heavier fork.
//
// 2. Validator A doesn't vote past `next_slot_on_a` before we can kill it. This is essential
// because if validator A votes past `next_slot_on_a`, and then we copy over validator B's ledger
// below only for slots <= `next_slot_on_a`, validator A will not know how it's last vote chains
// to the otehr forks, and may violate switching proofs on restart.
let mut validator_configs =
make_identical_validator_configs(&ValidatorConfig::default(), node_stakes.len());

validator_configs[0].voting_disabled = true;
validator_configs[2].voting_disabled = true;

let mut config = ClusterConfig {
cluster_lamports: 100_000,
node_stakes: node_stakes.clone(),
validator_configs: make_identical_validator_configs(
&ValidatorConfig::default(),
node_stakes.len(),
),
node_stakes,
validator_configs,
validator_keys: Some(validator_keys),
slots_per_epoch,
stakers_slot_offset: slots_per_epoch,
Expand All @@ -3213,9 +3227,23 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b
let val_b_ledger_path = cluster.ledger_path(&validator_b_pubkey);
let val_c_ledger_path = cluster.ledger_path(&validator_c_pubkey);

info!(
"val_a {} ledger path {:?}",
validator_a_pubkey, val_a_ledger_path
);
info!(
"val_b {} ledger path {:?}",
validator_b_pubkey, val_b_ledger_path
);
info!(
"val_c {} ledger path {:?}",
validator_c_pubkey, val_c_ledger_path
);

// Immediately kill validator A, and C
let validator_a_info = cluster.exit_node(&validator_a_pubkey);
let validator_c_info = cluster.exit_node(&validator_c_pubkey);
info!("Exiting validators A and C");
let mut validator_a_info = cluster.exit_node(&validator_a_pubkey);
let mut validator_c_info = cluster.exit_node(&validator_c_pubkey);

// Step 1:
// Let validator B, (D) run for a while.
Expand All @@ -3224,7 +3252,8 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b
let elapsed = now.elapsed();
assert!(
elapsed <= Duration::from_secs(30),
"LocalCluster nodes failed to log enough tower votes in {} secs",
"Validator B failed to vote on any slot >= {} in {} secs",
next_slot_on_a,
elapsed.as_secs()
);
sleep(Duration::from_millis(100));
Expand Down Expand Up @@ -3269,29 +3298,38 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b
}

// Step 3:
// Restart A so that it can vote for the slots in B's fork
// Restart A with voting enabled so that it can vote on B's fork
// up to `next_slot_on_a`, thereby optimistcally confirming `next_slot_on_a`
info!("Restarting A");
validator_a_info.config.voting_disabled = false;
cluster.restart_node(
&validator_a_pubkey,
validator_a_info,
SocketAddrSpace::Unspecified,
);

info!("Waiting for A to vote");
let mut last_print = Instant::now();
info!("Waiting for A to vote on slot descended from slot `next_slot_on_a`");
let now = Instant::now();
loop {
if let Some((last_vote_slot, _)) =
last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey)
{
if last_vote_slot >= next_slot_on_a {
info!("Validator A has caught up: {}", last_vote_slot);
info!(
"Validator A has caught up and voted on slot: {}",
last_vote_slot
);
break;
} else if last_print.elapsed().as_secs() >= 10 {
info!("Validator A latest vote: {}", last_vote_slot);
last_print = Instant::now();
}
}

if now.elapsed().as_secs() >= 30 {
panic!(
"Validator A has not seen optimistic confirmation slot > {} in 30 seconds",
next_slot_on_a
);
}

sleep(Duration::from_millis(20));
}

Expand Down Expand Up @@ -3319,6 +3357,7 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b
// Step 4:
// Run validator C only to make it produce and vote on its own fork.
info!("Restart validator C again!!!");
validator_c_info.config.voting_disabled = false;
cluster.restart_node(
&validator_c_pubkey,
validator_c_info,
Expand Down