From f1eb60bfaee81703181b3985cb289e983a5195af Mon Sep 17 00:00:00 2001 From: Igor Date: Thu, 15 Feb 2024 09:51:19 -0800 Subject: [PATCH] FatalVMError shouldn't create "Delayed materialization code invariant" --- aptos-move/block-executor/src/errors.rs | 14 +++++++---- aptos-move/block-executor/src/executor.rs | 25 ++++++++++++------- .../src/txn_last_input_output.rs | 12 ++++++--- 3 files changed, 34 insertions(+), 17 deletions(-) diff --git a/aptos-move/block-executor/src/errors.rs b/aptos-move/block-executor/src/errors.rs index 97f9a6db335af..ec1c35724b61e 100644 --- a/aptos-move/block-executor/src/errors.rs +++ b/aptos-move/block-executor/src/errors.rs @@ -4,12 +4,16 @@ use aptos_types::delayed_fields::PanicError; -// The same module access path for module was both read & written during speculative executions. -// This may trigger a race due to the Move-VM loader cache implementation, and mitigation requires -// aborting the parallel execution pipeline and falling back to the sequential execution. -// TODO: provide proper multi-versioning for code (like data) for the cache. #[derive(Clone, Debug, PartialEq, Eq)] -pub(crate) struct ModulePathReadWriteError; +pub(crate) enum ParallelBlockExecutionError { + // The same module access path for module was both read & written during speculative executions. + // This may trigger a race due to the Move-VM loader cache implementation, and mitigation requires + // aborting the parallel execution pipeline and falling back to the sequential execution. + // TODO: provide proper multi-versioning for code (like data) for the cache. + ModulePathReadWriteError, + /// unrecoverable VM error + FatalVMError, +} // This is separate error because we need to match the error variant to provide a specialized // fallback logic if a resource group serialization error occurs. diff --git a/aptos-move/block-executor/src/executor.rs b/aptos-move/block-executor/src/executor.rs index eb4039fbae12e..39820a09460b4 100644 --- a/aptos-move/block-executor/src/executor.rs +++ b/aptos-move/block-executor/src/executor.rs @@ -25,7 +25,7 @@ use aptos_aggregator::{ types::{code_invariant_error, expect_ok, PanicOr}, }; use aptos_drop_helper::DEFAULT_DROPPER; -use aptos_logger::{debug, error}; +use aptos_logger::{debug, error, info}; use aptos_mvhashmap::{ types::{Incarnation, MVDelayedFieldsError, TxnIndex, ValueWithLayout}, unsync_map::UnsyncMap, @@ -106,7 +106,7 @@ where executor: &E, base_view: &S, latest_view: ParallelState, - ) -> Result> { + ) -> Result> { let _timer = TASK_EXECUTE_SECONDS.start_timer(); let txn = &signature_verified_block[idx_to_execute as usize]; @@ -293,7 +293,9 @@ where // Module R/W is an expected fallback behavior, no alert is required. debug!("[Execution] At txn {}, Module read & write", idx_to_execute); - return Err(PanicOr::Or(ModulePathReadWriteError)); + return Err(PanicOr::Or( + ParallelBlockExecutionError::ModulePathReadWriteError, + )); } Ok(updates_outside) } @@ -438,7 +440,7 @@ where shared_counter: &AtomicU32, executor: &E, block: &[T], - ) -> Result<(), PanicOr> { + ) -> Result<(), PanicOr> { let mut block_limit_processor = shared_commit_state.acquire(); while let Some((txn_idx, incarnation)) = scheduler.try_commit() { @@ -486,7 +488,9 @@ where scheduler.add_to_commit_queue(txn_idx); } - last_input_output.check_fatal_vm_error(txn_idx)?; + last_input_output + .check_fatal_vm_error(txn_idx) + .map_err(PanicOr::Or)?; // Handle a potential vm error, then check invariants on the recorded outputs. last_input_output.check_execution_status_during_commit(txn_idx)?; @@ -729,7 +733,7 @@ where shared_counter: &AtomicU32, shared_commit_state: &ExplicitSyncWrapper>, final_results: &ExplicitSyncWrapper>, - ) -> Result<(), PanicOr> { + ) -> Result<(), PanicOr> { // Make executor for each task. TODO: fast concurrent executor. let init_timer = VM_INIT_SECONDS.start_timer(); let executor = E::init(*executor_arguments); @@ -891,7 +895,7 @@ where &final_results, ) { // If there are multiple errors, they all get logged: - // ModulePathReadWriteError variant is logged at construction, + // ModulePathReadWriteError and FatalVMErrorvariant is logged at construction, // and below we log CodeInvariantErrors. if let PanicOr::CodeInvariantError(err_msg) = err { alert!("[BlockSTM] worker loop: CodeInvariantError({:?})", err_msg); @@ -1027,7 +1031,10 @@ where if let Some(commit_hook) = &self.transaction_commit_hook { commit_hook.on_execution_aborted(idx as TxnIndex); } - alert!("Fatal VM error by transaction {}", idx as TxnIndex); + error!( + "Sequential execution FatalVMError by transaction {}", + idx as TxnIndex + ); // Record the status indicating the unrecoverable VM failure. return Err(SequentialBlockExecutionError::ErrorToReturn( BlockExecutionError::FatalVMError(err), @@ -1296,7 +1303,7 @@ where // Clear by re-initializing the speculative logs. init_speculative_logs(signature_verified_block.len()); - debug!("parallel execution requiring fallback"); + info!("parallel execution requiring fallback"); } // If we didn't run parallel or it didn't finish successfully - run sequential diff --git a/aptos-move/block-executor/src/txn_last_input_output.rs b/aptos-move/block-executor/src/txn_last_input_output.rs index 0c2bbb9be605e..aa264ca4e374b 100644 --- a/aptos-move/block-executor/src/txn_last_input_output.rs +++ b/aptos-move/block-executor/src/txn_last_input_output.rs @@ -3,11 +3,13 @@ use crate::{ captured_reads::CapturedReads, + errors::ParallelBlockExecutionError, explicit_sync_wrapper::ExplicitSyncWrapper, task::{ExecutionStatus, TransactionOutput}, types::{InputOutputKey, ReadWriteSummary}, }; use aptos_aggregator::types::code_invariant_error; +use aptos_logger::error; use aptos_mvhashmap::types::{TxnIndex, ValueWithLayout}; use aptos_types::{ delayed_fields::PanicError, fee_statement::FeeStatement, @@ -204,13 +206,17 @@ impl, E: Debug + Send + Clone> ) } - pub(crate) fn check_fatal_vm_error(&self, txn_idx: TxnIndex) -> Result<(), PanicError> { + pub(crate) fn check_fatal_vm_error( + &self, + txn_idx: TxnIndex, + ) -> Result<(), ParallelBlockExecutionError> { if let Some(status) = self.outputs[txn_idx as usize].load_full() { if let ExecutionStatus::Abort(err) = status.as_ref() { - return Err(code_invariant_error(format!( + error!( "FatalVMError from parallel execution {:?} at txn {}", err, txn_idx - ))); + ); + return Err(ParallelBlockExecutionError::FatalVMError); } } Ok(())