Skip to content

Commit

Permalink
FatalVMError shouldn't create "Delayed materialization code invariant"
Browse files Browse the repository at this point in the history
  • Loading branch information
igor-aptos committed Feb 16, 2024
1 parent 73fd9d0 commit 6c5cd6d
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 17 deletions.
14 changes: 9 additions & 5 deletions aptos-move/block-executor/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,16 @@

use aptos_types::delayed_fields::PanicError;

// The same module access path for module was both read & written during speculative executions.
// This may trigger a race due to the Move-VM loader cache implementation, and mitigation requires
// aborting the parallel execution pipeline and falling back to the sequential execution.
// TODO: provide proper multi-versioning for code (like data) for the cache.
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct ModulePathReadWriteError;
pub(crate) enum ParallelBlockExecutionError {
// The same module access path for module was both read & written during speculative executions.
// This may trigger a race due to the Move-VM loader cache implementation, and mitigation requires
// aborting the parallel execution pipeline and falling back to the sequential execution.
// TODO: provide proper multi-versioning for code (like data) for the cache.
ModulePathReadWriteError,
/// unrecoverable VM error
FatalVMError,
}

// This is separate error because we need to match the error variant to provide a specialized
// fallback logic if a resource group serialization error occurs.
Expand Down
25 changes: 16 additions & 9 deletions aptos-move/block-executor/src/executor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use aptos_aggregator::{
types::{code_invariant_error, expect_ok, PanicOr},
};
use aptos_drop_helper::DEFAULT_DROPPER;
use aptos_logger::{debug, error};
use aptos_logger::{debug, error, info};
use aptos_mvhashmap::{
types::{Incarnation, MVDelayedFieldsError, TxnIndex, ValueWithLayout},
unsync_map::UnsyncMap,
Expand Down Expand Up @@ -106,7 +106,7 @@ where
executor: &E,
base_view: &S,
latest_view: ParallelState<T, X>,
) -> Result<bool, PanicOr<ModulePathReadWriteError>> {
) -> Result<bool, PanicOr<ParallelBlockExecutionError>> {
let _timer = TASK_EXECUTE_SECONDS.start_timer();
let txn = &signature_verified_block[idx_to_execute as usize];

Expand Down Expand Up @@ -293,7 +293,9 @@ where
// Module R/W is an expected fallback behavior, no alert is required.
debug!("[Execution] At txn {}, Module read & write", idx_to_execute);

return Err(PanicOr::Or(ModulePathReadWriteError));
return Err(PanicOr::Or(
ParallelBlockExecutionError::ModulePathReadWriteError,
));
}
Ok(updates_outside)
}
Expand Down Expand Up @@ -438,7 +440,7 @@ where
shared_counter: &AtomicU32,
executor: &E,
block: &[T],
) -> Result<(), PanicOr<ModulePathReadWriteError>> {
) -> Result<(), PanicOr<ParallelBlockExecutionError>> {
let mut block_limit_processor = shared_commit_state.acquire();

while let Some((txn_idx, incarnation)) = scheduler.try_commit() {
Expand Down Expand Up @@ -486,7 +488,9 @@ where
scheduler.add_to_commit_queue(txn_idx);
}

last_input_output.check_fatal_vm_error(txn_idx)?;
last_input_output
.check_fatal_vm_error(txn_idx)
.map_err(PanicOr::Or)?;
// Handle a potential vm error, then check invariants on the recorded outputs.
last_input_output.check_execution_status_during_commit(txn_idx)?;

Expand Down Expand Up @@ -729,7 +733,7 @@ where
shared_counter: &AtomicU32,
shared_commit_state: &ExplicitSyncWrapper<BlockGasLimitProcessor<T>>,
final_results: &ExplicitSyncWrapper<Vec<E::Output>>,
) -> Result<(), PanicOr<ModulePathReadWriteError>> {
) -> Result<(), PanicOr<ParallelBlockExecutionError>> {
// Make executor for each task. TODO: fast concurrent executor.
let init_timer = VM_INIT_SECONDS.start_timer();
let executor = E::init(*executor_arguments);
Expand Down Expand Up @@ -891,7 +895,7 @@ where
&final_results,
) {
// If there are multiple errors, they all get logged:
// ModulePathReadWriteError variant is logged at construction,
// ModulePathReadWriteError and FatalVMErrorvariant is logged at construction,
// and below we log CodeInvariantErrors.
if let PanicOr::CodeInvariantError(err_msg) = err {
alert!("[BlockSTM] worker loop: CodeInvariantError({:?})", err_msg);
Expand Down Expand Up @@ -1027,7 +1031,10 @@ where
if let Some(commit_hook) = &self.transaction_commit_hook {
commit_hook.on_execution_aborted(idx as TxnIndex);
}
alert!("Fatal VM error by transaction {}", idx as TxnIndex);
error!(
"Sequential execution FatalVMError by transaction {}",
idx as TxnIndex
);
// Record the status indicating the unrecoverable VM failure.
return Err(SequentialBlockExecutionError::ErrorToReturn(
BlockExecutionError::FatalVMError(err),
Expand Down Expand Up @@ -1296,7 +1303,7 @@ where
// Clear by re-initializing the speculative logs.
init_speculative_logs(signature_verified_block.len());

debug!("parallel execution requiring fallback");
info!("parallel execution requiring fallback");
}

// If we didn't run parallel or it didn't finish successfully - run sequential
Expand Down
12 changes: 9 additions & 3 deletions aptos-move/block-executor/src/txn_last_input_output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@

use crate::{
captured_reads::CapturedReads,
errors::ParallelBlockExecutionError,
explicit_sync_wrapper::ExplicitSyncWrapper,
task::{ExecutionStatus, TransactionOutput},
types::{InputOutputKey, ReadWriteSummary},
};
use aptos_aggregator::types::code_invariant_error;
use aptos_logger::error;
use aptos_mvhashmap::types::{TxnIndex, ValueWithLayout};
use aptos_types::{
delayed_fields::PanicError, fee_statement::FeeStatement,
Expand Down Expand Up @@ -204,13 +206,17 @@ impl<T: Transaction, O: TransactionOutput<Txn = T>, E: Debug + Send + Clone>
)
}

pub(crate) fn check_fatal_vm_error(&self, txn_idx: TxnIndex) -> Result<(), PanicError> {
pub(crate) fn check_fatal_vm_error(
&self,
txn_idx: TxnIndex,
) -> Result<(), ParallelBlockExecutionError> {
if let Some(status) = self.outputs[txn_idx as usize].load_full() {
if let ExecutionStatus::Abort(err) = status.as_ref() {
return Err(code_invariant_error(format!(
error!(
"FatalVMError from parallel execution {:?} at txn {}",
err, txn_idx
)));
);
return Err(ParallelBlockExecutionError::FatalVMError);
}
}
Ok(())
Expand Down

0 comments on commit 6c5cd6d

Please sign in to comment.