Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

core: improve chain rewinding mechanism #29196

Merged
merged 12 commits into from
Mar 13, 2024
225 changes: 170 additions & 55 deletions core/blockchain.go
Original file line number Diff line number Diff line change
Expand Up @@ -616,6 +616,159 @@ func (bc *BlockChain) SetSafe(header *types.Header) {
}
}

// rewindPathHead implements the logic of rewindHead in the context of hash scheme.
func (bc *BlockChain) rewindHashHead(root common.Hash) (*types.Header, uint64) {
var (
limit uint64 // The oldest block that will be searched for this rewinding
beyondRoot = root == common.Hash{} // Flag whether we're beyond the requested root (no root, always true)
pivot = rawdb.ReadLastPivotNumber(bc.db) // Associated block number of pivot point state
head = bc.CurrentBlock() // Head block of the chain
rootNumber uint64 // Associated block number of requested root

start = time.Now() // Timestamp the rewinding is restarted
logged = time.Now() // Timestamp last progress log was printed
)
// The oldest block to be searched is determined by the pivot block or a constant
// searching threshold. The rationale behind this is as follows:
//
// - Snap sync is selected if the pivot block is available. The earliest available
// state is the pivot block itself, so there is no sense in going further back.
//
// - Full sync is selected if the pivot block does not exist. The hash database
// periodically flushes the state to disk, and the used searching threshold is
// considered sufficient to find a persistent state, even for the testnet. It
// might be not enough for a chain that is nearly empty. In the worst case,
// the entire chain is reset to genesis, and snap sync is re-enabled on top,
// which is still acceptable.
if pivot != nil {
limit = *pivot
} else if head.Number.Uint64() > params.FullImmutabilityThreshold {
limit = head.Number.Uint64() - params.FullImmutabilityThreshold
}
for {
logger := log.Trace
if time.Since(logged) > time.Second*8 {
logged = time.Now()
logger = log.Info
}
logger("Block state missing, rewinding further", "number", head.Number, "hash", head.Hash(), "elapsed", common.PrettyDuration(time.Since(start)))

// If a root threshold was requested but not yet crossed, check
if !beyondRoot && head.Root == root {
beyondRoot, rootNumber = true, head.Number.Uint64()
}
// If the associated state is not reachable, continue searching
// backwards until an available state is found.
if !bc.HasState(head.Root) {
// If search limit is reached, return the genesis block as
// the new chain head.
if head.Number.Uint64() < limit {
log.Info("Rewinding limit reached, resetting to genesis", "number", head.Number, "hash", head.Hash(), "limit", limit)
return bc.genesisBlock.Header(), rootNumber
}
// If the chain is gapped in the middle, return the genesis
// block as the new chain head.
parent := bc.GetHeader(head.ParentHash, head.Number.Uint64()-1)
if parent == nil {
log.Error("Missing block in the middle, resetting to genesis", "number", head.Number.Uint64()-1, "hash", head.ParentHash)
return bc.genesisBlock.Header(), rootNumber
}
head = parent

// If the genesis block is reached, stop searching.
if head.Number.Uint64() == 0 {
log.Info("Genesis block reached", "number", head.Number, "hash", head.Hash())
return head, rootNumber
}
continue // keep rewinding
}
// Once the available state is found, ensure that the requested root has already
// been crossed. If not, continue rewinding.
if beyondRoot || head.Number.Uint64() == 0 {
log.Info("Rewound to block with state", "number", head.Number, "hash", head.Hash())
return head, rootNumber
}
log.Debug("Skipping block with threshold state", "number", head.Number, "hash", head.Hash(), "root", head.Root)
head = bc.GetHeader(head.ParentHash, head.Number.Uint64()-1) // Keep rewinding
}
}

// rewindPathHead implements the logic of rewindHead in the context of path scheme.
func (bc *BlockChain) rewindPathHead(root common.Hash) (*types.Header, uint64) {
var (
head = bc.CurrentBlock() // Head block of the chain
pivot = rawdb.ReadLastPivotNumber(bc.db) // Associated block number of pivot block
rootNumber uint64 // Associated block number of requested root

// BeyondRoot represents whether the requested root is already
// crossed. The flag value is set to true if the root is empty.
beyondRoot = root == common.Hash{}

// noState represents if the target state requested for search
// is unavailable and impossible to be recovered.
noState = !bc.HasState(root) && !bc.stateRecoverable(root)

start = time.Now() // Timestamp the rewinding is restarted
logged = time.Now() // Timestamp last progress log was printed
)
// Rewind the head block tag until an available state is found.
for {
logger := log.Trace
if time.Since(logged) > time.Second*8 {
logged = time.Now()
logger = log.Info
}
logger("Block state missing, rewinding further", "number", head.Number, "hash", head.Hash(), "elapsed", common.PrettyDuration(time.Since(start)))

// If a root threshold was requested but not yet crossed, check
if !beyondRoot && head.Root == root {
beyondRoot, rootNumber = true, head.Number.Uint64()
}
// If the root threshold hasn't been crossed but the available
// state is reached, quickly determine if the target state is
// possible to be reached or not.
if !beyondRoot && noState && bc.HasState(head.Root) {
beyondRoot = true
log.Info("Disable the search for unattainable state", "root", root)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this case the one where the snapshot persistent layer is below 90K? In the current implementation, won't we end up just having a broken snapshot layer that's always stale and the chain moving forward with the trie only? Shouldn't we trigger some snapshot-fixing in this case?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this case the one where the snapshot persistent layer is below 90K?

True, the snapshot state is impossible to recover.

In this case, we will not search for the snapshot state, and return the block as the head with "first seen state".

won't we end up just having a broken snapshot layer that's always stale and the chain moving forward with the trie only?

We don't. As in this case, rootNumber is returned as 0, the snapshot recovery mechanism is disabled.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But in that case, won't we simply leave the snapshot layer forever dangling in that old state?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, in case of a head mismatch, the snapshot gets invalidated and auto-regenerated?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

exactly

}
// Check if the associated state is available or recoverable if
// the requested root has already been crossed.
if beyondRoot && (bc.HasState(head.Root) || bc.stateRecoverable(head.Root)) {
break
}
// If pivot block is reached, return the genesis block as the
// new chain head. Theoretically there must be a persistent
// state before or at the pivot block, prevent endless rewinding
// towards the genesis just in case.
if pivot != nil && *pivot >= head.Number.Uint64() {
log.Info("Pivot block reached, resetting to genesis", "number", head.Number, "hash", head.Hash())
return bc.genesisBlock.Header(), rootNumber
}
head = bc.GetHeader(head.ParentHash, head.Number.Uint64()-1) // Keep rewinding
}
// Recover if the target state if it's not available yet.
if !bc.HasState(head.Root) {
if err := bc.triedb.Recover(head.Root); err != nil {
log.Crit("Failed to rollback state", "err", err)
}
}
log.Info("Rewound to block with state", "number", head.Number, "hash", head.Hash())
return head, rootNumber
}

// rewindHead searches the available states in the database and returns the associated
// block as the new head block.
//
// If the given root is not empty, then the rewind should attempt to pass the specified
// state root and return the associated block number as well. If the root is deemed
// impossible to pass, 0 is returned.
rjl493456442 marked this conversation as resolved.
Show resolved Hide resolved
func (bc *BlockChain) rewindHead(root common.Hash) (*types.Header, uint64) {
if bc.triedb.Scheme() == rawdb.PathScheme {
return bc.rewindPathHead(root)
}
return bc.rewindHashHead(root)
}

// setHeadBeyondRoot rewinds the local chain to a new head with the extra condition
// that the rewind must pass the specified state root. This method is meant to be
// used when rewinding with snapshots enabled to ensure that we go back further than
Expand All @@ -634,79 +787,40 @@ func (bc *BlockChain) setHeadBeyondRoot(head uint64, time uint64, root common.Ha
}
defer bc.chainmu.Unlock()

// Track the block number of the requested root hash
var rootNumber uint64 // (no root == always 0)

// Retrieve the last pivot block to short circuit rollbacks beyond it and the
// current freezer limit to start nuking id underflown
pivot := rawdb.ReadLastPivotNumber(bc.db)
frozen, _ := bc.db.Ancients()
var (
// Track the block number of the requested root hash
rootNumber uint64 // (no root == always 0)

// Retrieve the last pivot block to short circuit rollbacks beyond it
// and the current freezer limit to start nuking it's underflown.
pivot = rawdb.ReadLastPivotNumber(bc.db)
)
updateFn := func(db ethdb.KeyValueWriter, header *types.Header) (*types.Header, bool) {
// Rewind the blockchain, ensuring we don't end up with a stateless head
// block. Note, depth equality is permitted to allow using SetHead as a
// chain reparation mechanism without deleting any data!
if currentBlock := bc.CurrentBlock(); currentBlock != nil && header.Number.Uint64() <= currentBlock.Number.Uint64() {
newHeadBlock := bc.GetBlock(header.Hash(), header.Number.Uint64())
if newHeadBlock == nil {
log.Error("Gap in the chain, rewinding to genesis", "number", header.Number, "hash", header.Hash())
newHeadBlock = bc.genesisBlock
} else {
// Block exists. Keep rewinding until either we find one with state
// or until we exceed the optional threshold root hash
beyondRoot := (root == common.Hash{}) // Flag whether we're beyond the requested root (no root, always true)

for {
// If a root threshold was requested but not yet crossed, check
if root != (common.Hash{}) && !beyondRoot && newHeadBlock.Root() == root {
beyondRoot, rootNumber = true, newHeadBlock.NumberU64()
}
if !bc.HasState(newHeadBlock.Root()) && !bc.stateRecoverable(newHeadBlock.Root()) {
log.Trace("Block state missing, rewinding further", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash())
if pivot == nil || newHeadBlock.NumberU64() > *pivot {
parent := bc.GetBlock(newHeadBlock.ParentHash(), newHeadBlock.NumberU64()-1)
if parent != nil {
newHeadBlock = parent
continue
}
log.Error("Missing block in the middle, aiming genesis", "number", newHeadBlock.NumberU64()-1, "hash", newHeadBlock.ParentHash())
newHeadBlock = bc.genesisBlock
} else {
log.Trace("Rewind passed pivot, aiming genesis", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash(), "pivot", *pivot)
newHeadBlock = bc.genesisBlock
}
}
if beyondRoot || newHeadBlock.NumberU64() == 0 {
if !bc.HasState(newHeadBlock.Root()) && bc.stateRecoverable(newHeadBlock.Root()) {
// Rewind to a block with recoverable state. If the state is
// missing, run the state recovery here.
if err := bc.triedb.Recover(newHeadBlock.Root()); err != nil {
log.Crit("Failed to rollback state", "err", err) // Shouldn't happen
}
log.Debug("Rewound to block with state", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash())
}
break
}
log.Debug("Skipping block with threshold state", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash(), "root", newHeadBlock.Root())
newHeadBlock = bc.GetBlock(newHeadBlock.ParentHash(), newHeadBlock.NumberU64()-1) // Keep rewinding
}
}
var newHeadBlock *types.Header
newHeadBlock, rootNumber = bc.rewindHead(root)
rawdb.WriteHeadBlockHash(db, newHeadBlock.Hash())

// Degrade the chain markers if they are explicitly reverted.
// In theory we should update all in-memory markers in the
// last step, however the direction of SetHead is from high
// to low, so it's safe to update in-memory markers directly.
bc.currentBlock.Store(newHeadBlock.Header())
headBlockGauge.Update(int64(newHeadBlock.NumberU64()))
bc.currentBlock.Store(newHeadBlock)
headBlockGauge.Update(int64(newHeadBlock.Number.Uint64()))

// The head state is missing, which is only possible in the path-based
// scheme. This situation occurs when the chain head is rewound below
// the pivot point. In this scenario, there is no possible recovery
// approach except for rerunning a snap sync. Do nothing here until the
// state syncer picks it up.
if !bc.HasState(newHeadBlock.Root()) {
log.Info("Chain is stateless, wait state sync", "number", newHeadBlock.Number(), "hash", newHeadBlock.Hash())
if !bc.HasState(newHeadBlock.Root) {
if newHeadBlock.Number.Uint64() != 0 {
log.Crit("Chain is stateless at a non-genesis block")
}
log.Info("Chain is stateless, wait state sync", "number", newHeadBlock.Number, "hash", newHeadBlock.Hash())
}
}
// Rewind the snap block in a simpleton way to the target head
Expand All @@ -733,6 +847,7 @@ func (bc *BlockChain) setHeadBeyondRoot(head uint64, time uint64, root common.Ha
// intent afterwards is full block importing, delete the chain segment
// between the stateful-block and the sethead target.
var wipe bool
frozen, _ := bc.db.Ancients()
if headNumber+1 < frozen {
wipe = pivot == nil || headNumber >= *pivot
}
Expand Down
Loading