From 4b812097e1d9a87b35a6b8bae2319f9f6f8614c9 Mon Sep 17 00:00:00 2001 From: Ivan Shumkov Date: Sun, 3 Nov 2024 17:23:54 +0700 Subject: [PATCH] chore: panic to restart drive --- packages/rs-drive-abci/src/abci/error.rs | 9 --------- .../src/abci/handler/finalize_block.rs | 13 +++++++++++-- .../rs-drive-abci/src/abci/handler/info.rs | 19 +++++++++++++------ .../src/abci/handler/prepare_proposal.rs | 19 +++++++++++++------ .../src/abci/handler/process_proposal.rs | 19 +++++++++++++------ 5 files changed, 50 insertions(+), 29 deletions(-) diff --git a/packages/rs-drive-abci/src/abci/error.rs b/packages/rs-drive-abci/src/abci/error.rs index 5e4a123140..857321a16e 100644 --- a/packages/rs-drive-abci/src/abci/error.rs +++ b/packages/rs-drive-abci/src/abci/error.rs @@ -90,13 +90,4 @@ pub enum AbciError { /// Generic with code should only be used in tests #[error("invalid state transition error: {0}")] InvalidStateTransition(#[from] ConsensusError), - - /// Drive storage root hash is not matching with app hash stored in PlatformState - #[error("drive and platform state app hash mismatch")] - AppHashMismatch { - /// Storage root hash - drive_storage_root_hash: [u8; 32], - /// App hash stored in PlatformState - platform_state_app_hash: [u8; 32], - }, } diff --git a/packages/rs-drive-abci/src/abci/handler/finalize_block.rs b/packages/rs-drive-abci/src/abci/handler/finalize_block.rs index ef13fe2267..3fb92640d9 100644 --- a/packages/rs-drive-abci/src/abci/handler/finalize_block.rs +++ b/packages/rs-drive-abci/src/abci/handler/finalize_block.rs @@ -67,10 +67,19 @@ where )); } - // TODO: document this + // We had a chain halt on mainnet on block 32326. Compaction happened + // and transaction.commit() returned an error. Due to a bug in tenderdash, + // validators just proceeded on next block without committing data but keeping + // updated cache. To keep consistency with mainnet chain we have to skip + // commit of this block now on. // TODO: verify that chain id is evo1 if !(app.platform().config.network == Network::Dash && block_height == 32326) { - app.commit_transaction(platform_version)?; + // This is simplified solution until we have a better way to handle + // We still have caches in memory that corresponds to the data that + // we weren't able to commit. Solution is to restart the Drive, so all caches + // will be restored from the disk and try to process this block again + app.commit_transaction(platform_version) + .expect("commit transaction"); } app.platform() diff --git a/packages/rs-drive-abci/src/abci/handler/info.rs b/packages/rs-drive-abci/src/abci/handler/info.rs index 12468cecf9..5aa6cdb130 100644 --- a/packages/rs-drive-abci/src/abci/handler/info.rs +++ b/packages/rs-drive-abci/src/abci/handler/info.rs @@ -42,16 +42,23 @@ where ) .unwrap()?; - // TODO: Document this + // We had a chain halt on mainnet on block 32326. Compaction happened + // and transaction.commit() returned an error. Due to a bug in tenderdash, + // validators just proceeded on next block without committing data but keeping + // updated cache. To keep consistency with mainnet chain we allow app hashes to be + // different for this block. // TODO: verify that chain id is evo1 #[allow(clippy::collapsible_if)] if !(app.platform().config.network == Network::Dash && last_block_height == 32326) { + // App hash in memory must be equal to app hash on disk if drive_storage_root_hash != platform_state_app_hash { - return Err(AbciError::AppHashMismatch { - drive_storage_root_hash, - platform_state_app_hash, - } - .into()); + // We panic because we can't recover from this situation. + // Better to restart the Drive, so we might self-heal the node + // reloading state form the disk + panic!( + "drive and platform state app hash mismatch: drive_storage_root_hash: {:?}, platform_state_app_hash: {:?}", + drive_storage_root_hash, platform_state_app_hash + ); } } diff --git a/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs b/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs index 777d2b9c0f..d630efdb1c 100644 --- a/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs +++ b/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs @@ -54,16 +54,23 @@ where ) .unwrap()?; - // TODO: Document this + // We had a chain halt on mainnet on block 32326. Compaction happened + // and transaction.commit() returned an error. Due to a bug in tenderdash, + // validators just proceeded on next block without committing data but keeping + // updated cache. To keep consistency with mainnet chain we allow app hashes to be + // different for this block. // TODO: verify that chain id is evo1 #[allow(clippy::collapsible_if)] if !(app.platform().config.network == Network::Dash && request.height == 32327) { + // App hash in memory must be equal to app hash on disk if drive_storage_root_hash != platform_state_app_hash { - return Err(AbciError::AppHashMismatch { - drive_storage_root_hash, - platform_state_app_hash, - } - .into()); + // We panic because we can't recover from this situation. + // Better to restart the Drive, so we might self-heal the node + // reloading state form the disk + panic!( + "drive and platform state app hash mismatch: drive_storage_root_hash: {:?}, platform_state_app_hash: {:?}", + drive_storage_root_hash, platform_state_app_hash + ); } } diff --git a/packages/rs-drive-abci/src/abci/handler/process_proposal.rs b/packages/rs-drive-abci/src/abci/handler/process_proposal.rs index f2cb4d9e61..acd137b241 100644 --- a/packages/rs-drive-abci/src/abci/handler/process_proposal.rs +++ b/packages/rs-drive-abci/src/abci/handler/process_proposal.rs @@ -198,16 +198,23 @@ where ) .unwrap()?; - // TODO: Document this + // We had a chain halt on mainnet on block 32326. Compaction happened + // and transaction.commit() returned an error. Due to a bug in tenderdash, + // validators just proceeded on next block without committing data but keeping + // updated cache. To keep consistency with mainnet chain we allow app hashes to be + // different for this block. // TODO: verify that chain id is evo1 #[allow(clippy::collapsible_if)] if !(app.platform().config.network == Network::Dash && request.height == 32327) { + // App hash in memory must be equal to app hash on disk if drive_storage_root_hash != platform_state_app_hash { - return Err(AbciError::AppHashMismatch { - drive_storage_root_hash, - platform_state_app_hash, - } - .into()); + // We panic because we can't recover from this situation. + // Better to restart the Drive, so we might self-heal the node + // reloading state form the disk + panic!( + "drive and platform state app hash mismatch: drive_storage_root_hash: {:?}, platform_state_app_hash: {:?}", + drive_storage_root_hash, platform_state_app_hash + ); } }