Skip to content

Commit

Permalink
unify bn/vc doppelganger detection (#4398)
Browse files Browse the repository at this point in the history
* fix REST liveness endpoint responding even when gossip is not enabled
* fix VC exit code on doppelganger hit
* fix activation epoch not being updated correctly on long deposit
queues
* fix activation epoch being set incorrectly when updating validator
* move most implementation logic to `validator_pool`, add tests
* ensure consistent logging between VC and BN
* add docs
  • Loading branch information
arnetheduck authored Dec 9, 2022
1 parent 9df19f6 commit 6e2a024
Show file tree
Hide file tree
Showing 22 changed files with 478 additions and 622 deletions.
18 changes: 12 additions & 6 deletions AllTests-mainnet.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,11 +139,6 @@ OK: 1/1 Fail: 0/1 Skip: 0/1
+ Tail block only in common OK
```
OK: 2/2 Fail: 0/2 Skip: 0/2
## Doppelganger protection test suite
```diff
+ doppelgangerCheck() test OK
```
OK: 1/1 Fail: 0/1 Skip: 0/1
## EF - SSZ generic types
```diff
Testing basic_vector inputs - invalid Skip
Expand Down Expand Up @@ -508,6 +503,17 @@ OK: 4/4 Fail: 0/4 Skip: 0/4
+ [SyncQueue] hasEndGap() test OK
```
OK: 23/23 Fail: 0/23 Skip: 0/23
## Validator pool
```diff
+ Activation after check OK
+ Doppelganger for already active validator OK
+ Doppelganger for genesis validator OK
+ Doppelganger for validator that activates in future epoch OK
+ Doppelganger for validator that activates in previous epoch OK
+ Doppelganger for validator that activates in same epoch as check OK
+ Future activation after check OK
```
OK: 7/7 Fail: 0/7 Skip: 0/7
## Zero signature sanity checks
```diff
+ SSZ serialization roundtrip of SignedBeaconBlockHeader OK
Expand Down Expand Up @@ -614,4 +620,4 @@ OK: 2/2 Fail: 0/2 Skip: 0/2
OK: 9/9 Fail: 0/9 Skip: 0/9

---TOTAL---
OK: 339/344 Fail: 0/344 Skip: 5/344
OK: 345/350 Fail: 0/350 Skip: 5/350
35 changes: 14 additions & 21 deletions beacon_chain/gossip_processing/eth2_processor.nim
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ import
light_client_pool, sync_committee_msg_pool],
../validators/validator_pool,
../beacon_clock,
"."/[gossip_validation, block_processor, batch_validation]
"."/[gossip_validation, block_processor, batch_validation],
../nimbus_binary_common

export
results, taskpools, block_clearance, blockchain_dag, exit_pool, attestation_pool,
Expand Down Expand Up @@ -247,14 +248,20 @@ proc setupDoppelgangerDetection*(self: var Eth2Processor, slot: Slot) =
# can be up to around 10,000 Wei. Thus, skipping attestations isn't cheap
# and one should gauge the likelihood of this simultaneous launch to tune
# the epoch delay to one's perceived risk.
if self.doppelgangerDetectionEnabled:
self.doppelgangerDetection.broadcastStartEpoch = slot.epoch

# Round up to ensure that we cover the entire epoch - used by rest api also
self.doppelgangerDetection.broadcastStartEpoch =
(slot + SLOTS_PER_EPOCH - 1).epoch

if self.doppelgangerDetectionEnabled:
notice "Setting up doppelganger detection",
epoch = slot.epoch,
broadcast_epoch = self.doppelgangerDetection.broadcastStartEpoch,
nodestart_epoch = self.doppelgangerDetection.nodeLaunchSlot.epoch()

proc clearDoppelgangerProtection*(self: var Eth2Processor) =
self.doppelgangerDetection.broadcastStartEpoch = FAR_FUTURE_EPOCH

proc checkForPotentialDoppelganger(
self: var Eth2Processor, attestation: Attestation,
attesterIndices: openArray[ValidatorIndex]) =
Expand All @@ -275,28 +282,14 @@ proc checkForPotentialDoppelganger(
validator = self.validatorPool[].getValidator(validatorPubkey)

if not(isNil(validator)):
let res = validator.doppelgangerCheck(attestation.data.slot.epoch(),
broadcastStartEpoch)
if res.isOk() and not(res.get()):
warn "We believe you are currently running another instance of the " &
"same validator. We've disconnected you from the network as " &
"this presents a significant slashing risk. Possible next steps "&
"are (a) making sure you've disconnected your validator from " &
"your old machine before restarting the client; and (b) running " &
"the client again with the gossip-slashing-protection option " &
"disabled, only if you are absolutely sure this is the only " &
"instance of your validator running, and reporting the issue " &
"at https://github.com/status-im/nimbus-eth2/issues.",
if validator.triggersDoppelganger(broadcastStartEpoch):
warn "Doppelganger attestation",
validator = shortLog(validator),
start_slot = validator.startSlot,
validator_index = validatorIndex,
activation_epoch = validator.activationEpoch.get(),
activation_epoch = validator.activationEpoch,
broadcast_epoch = broadcastStartEpoch,
attestation = shortLog(attestation)
# Avoid colliding with
# https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Process%20Exit%20Codes
const QuitDoppelganger = 129
quit QuitDoppelganger
quitDoppelganger()

proc processAttestation*(
self: ref Eth2Processor, src: MsgSource,
Expand Down
17 changes: 16 additions & 1 deletion beacon_chain/nimbus_beacon_node.nim
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,8 @@ proc init*(T: type BeaconNode,
SlashingProtectionDB.init(
getStateField(dag.headState, genesis_validators_root),
config.validatorsDir(), SlashingDbName)
validatorPool = newClone(ValidatorPool.init(slashingProtectionDB))
validatorPool = newClone(ValidatorPool.init(
slashingProtectionDB, config.doppelgangerDetection))

keymanagerInitResult = initKeymanagerServer(config, restServer)
keymanagerHost = if keymanagerInitResult.server != nil:
Expand Down Expand Up @@ -963,6 +964,17 @@ proc updateSyncCommitteeTopics(node: BeaconNode, slot: Slot) =

node.network.updateSyncnetsMetadata(syncnets)

proc updateDoppelganger(node: BeaconNode, epoch: Epoch) =
if not node.processor[].doppelgangerDetectionEnabled:
return

# broadcastStartEpoch is set to FAR_FUTURE_EPOCH when we're not monitoring
# gossip - it is only viable to assert liveness in epochs where gossip is
# active
if epoch > node.processor[].doppelgangerDetection.broadcastStartEpoch:
for validator in node.attachedValidators[]:
validator.updateDoppelganger(epoch - 1)

proc updateGossipStatus(node: BeaconNode, slot: Slot) {.async.} =
## Subscribe to subnets that we are providing stability for or aggregating
## and unsubscribe from the ones that are no longer relevant.
Expand Down Expand Up @@ -1049,6 +1061,8 @@ proc updateGossipStatus(node: BeaconNode, slot: Slot) {.async.} =
headSlot = head.slot,
headDistance

node.processor[].clearDoppelgangerProtection()

let forkDigests = node.forkDigests()

discard $eip4844ImplementationMissing & "nimbus_beacon_node.nim:updateGossipStatus check EIP4844 removeMessageHandlers"
Expand Down Expand Up @@ -1076,6 +1090,7 @@ proc updateGossipStatus(node: BeaconNode, slot: Slot) {.async.} =
addMessageHandlers[gossipFork](node, forkDigests[gossipFork], slot)

node.gossipState = targetGossipState
node.updateDoppelganger(slot.epoch)
node.updateAttestationSubnetHandlers(slot)
node.updateBlocksGossipStatus(slot, isBehind)
node.updateLightClientGossipStatus(slot, isBehind)
Expand Down
13 changes: 12 additions & 1 deletion beacon_chain/nimbus_binary_common.nim
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import
chronos, confutils, presto, toml_serialization, metrics,
chronicles, chronicles/helpers as chroniclesHelpers, chronicles/topics_registry,
stew/io2,
presto,

# Local modules
./spec/[helpers],
Expand Down Expand Up @@ -413,3 +412,15 @@ proc initKeymanagerServer*(
nil

KeymanagerInitResult(server: keymanagerServer, token: token)

proc quitDoppelganger*() =
# Avoid colliding with
# https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Process%20Exit%20Codes
# This error code is used to permanently shut down validators
fatal "Doppelganger detection triggered! It appears a validator loaded into " &
"this process is already live on the network - the validator is at high " &
"risk of being slashed due to the same keys being used in two setups. " &
"See https://nimbus.guide/doppelganger-detection.html for more information!"

const QuitDoppelganger = 129
quit QuitDoppelganger
5 changes: 1 addition & 4 deletions beacon_chain/nimbus_signing_node.nim
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,7 @@ proc initValidators(sn: var SigningNode): bool =
let feeRecipient = default(Eth1Address)
case keystore.kind
of KeystoreKind.Local:
# Signing node is not supposed to know genesis time, so we just set
# `start_slot` to GENESIS_SLOT.
sn.attachedValidators.addLocalValidator(
keystore, feeRecipient, GENESIS_SLOT)
discard sn.attachedValidators.addLocalValidator(keystore, feeRecipient)
publicKeyIdents.add("\"0x" & keystore.pubkey.toHex() & "\"")
of KeystoreKind.Remote:
error "Signing node do not support remote validators",
Expand Down
23 changes: 16 additions & 7 deletions beacon_chain/nimbus_validator_client.nim
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ proc new*(T: type ValidatorClientRef,
graffitiBytes: config.graffiti.get(defaultGraffitiBytes()),
nodesAvailable: newAsyncEvent(),
forksAvailable: newAsyncEvent(),
gracefulExit: newAsyncEvent(),
doppelExit: newAsyncEvent(),
indicesAvailable: newAsyncEvent(),
dynamicFeeRecipientsStore: newClone(DynamicFeeRecipientsStore.init()),
sigintHandleFut: waitSignal(SIGINT),
Expand All @@ -225,7 +225,7 @@ proc new*(T: type ValidatorClientRef,
nodesAvailable: newAsyncEvent(),
forksAvailable: newAsyncEvent(),
indicesAvailable: newAsyncEvent(),
gracefulExit: newAsyncEvent(),
doppelExit: newAsyncEvent(),
dynamicFeeRecipientsStore: newClone(DynamicFeeRecipientsStore.init()),
sigintHandleFut: newFuture[void]("sigint_placeholder"),
sigtermHandleFut: newFuture[void]("sigterm_placeholder")
Expand Down Expand Up @@ -255,7 +255,8 @@ proc asyncInit(vc: ValidatorClientRef): Future[ValidatorClientRef] {.async.} =
SlashingProtectionDB.init(
vc.beaconGenesis.genesis_validators_root,
vc.config.validatorsDir(), "slashing_protection")
validatorPool = newClone(ValidatorPool.init(slashingProtectionDB))
validatorPool = newClone(ValidatorPool.init(
slashingProtectionDB, vc.config.doppelgangerDetection))

vc.attachedValidators = validatorPool

Expand Down Expand Up @@ -315,10 +316,10 @@ proc asyncRun*(vc: ValidatorClientRef) {.async.} =
vc.keymanagerServer.router.installKeymanagerHandlers(vc.keymanagerHost[])
vc.keymanagerServer.start()

var exitEventFut = vc.gracefulExit.wait()
var doppelEventFut = vc.doppelExit.wait()
try:
vc.runSlotLoopFut = runSlotLoop(vc, vc.beaconClock.now(), onSlotStart)
discard await race(vc.runSlotLoopFut, exitEventFut)
discard await race(vc.runSlotLoopFut, doppelEventFut)
if not(vc.runSlotLoopFut.finished()):
notice "Received shutdown event, exiting"
except CancelledError:
Expand All @@ -329,12 +330,20 @@ proc asyncRun*(vc: ValidatorClientRef) {.async.} =

await vc.shutdownMetrics()
vc.shutdownSlashingProtection()

if doppelEventFut.finished:
# Critically, database has been shut down - the rest doesn't matter, we need
# to stop as soon as possible
# TODO we need to actually quit _before_ any other async tasks have had the
# chance to happen
quitDoppelganger()

debug "Stopping main processing loop"
var pending: seq[Future[void]]
if not(vc.runSlotLoopFut.finished()):
pending.add(vc.runSlotLoopFut.cancelAndWait())
if not(exitEventFut.finished()):
pending.add(exitEventFut.cancelAndWait())
if not(doppelEventFut.finished()):
pending.add(doppelEventFut.cancelAndWait())
debug "Stopping running services"
pending.add(vc.fallbackService.stop())
pending.add(vc.forkService.stop())
Expand Down
19 changes: 5 additions & 14 deletions beacon_chain/rpc/rest_key_management_api.nim
Original file line number Diff line number Diff line change
Expand Up @@ -127,22 +127,13 @@ proc handleAddRemoteValidatorReq(host: KeymanagerHost,
let res = importKeystore(host.validatorPool[], host.validatorsDir, keystore)
if res.isOk:
let
slot = host.getBeaconTimeFn().slotOrZero
validator = host.getValidatorData(keystore.pubkey)
data = host.getValidatorData(keystore.pubkey)
feeRecipient = host.getSuggestedFeeRecipient(keystore.pubkey).valueOr(
host.defaultFeeRecipient)
index =
if validator.isSome():
Opt.some(validator.get().index)
else:
Opt.none(ValidatorIndex)
activationEpoch =
if validator.isSome():
Opt.some(validator.get().validator.activation_epoch)
else:
Opt.none(Epoch)
host.validatorPool[].addRemoteValidator(
res.get, index, feeRecipient, slot, activationEpoch)
v = host.validatorPool[].addRemoteValidator(res.get, feeRecipient)
if data.isSome():
v.updateValidator(data.get().index, data.get().validator.activation_epoch)

RequestItemStatus(status: $KeystoreStatus.imported)
else:
case res.error().status
Expand Down
6 changes: 6 additions & 0 deletions beacon_chain/rpc/rest_validator_api.nim
Original file line number Diff line number Diff line change
Expand Up @@ -858,6 +858,7 @@ proc installValidatorApiHandlers*(router: var RestRouter, node: BeaconNode) =

return RestApiResponse.response("", Http200, "text/plain")

# https://github.com/ethereum/beacon-APIs/blob/master/apis/validator/liveness.yaml
router.api(MethodPost, "/eth/v1/validator/liveness/{epoch}") do (
epoch: Epoch, contentBody: Option[ContentBody]) -> RestApiResponse:
let
Expand All @@ -878,6 +879,11 @@ proc installValidatorApiHandlers*(router: var RestRouter, node: BeaconNode) =
if (res < prevEpoch) or (res > nextEpoch):
return RestApiResponse.jsonError(Http400, InvalidEpochValueError,
"Requested epoch is more than one epoch from current epoch")

if res < node.processor[].doppelgangerDetection.broadcastStartEpoch:
# We can't accurately respond if we're not in sync and aren't
# processing gossip
return RestApiResponse.jsonError(Http503, BeaconNodeInSyncError)
res
indexList =
block:
Expand Down
45 changes: 22 additions & 23 deletions beacon_chain/validator_client/attestation_service.nim
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ type
proc serveAttestation(service: AttestationServiceRef, adata: AttestationData,
duty: DutyAndProof): Future[bool] {.async.} =
let vc = service.client
let validator = vc.getValidator(duty.data.pubkey).valueOr: return false
let validator = vc.getValidatorForDuties(duty.data.pubkey, adata.slot).valueOr:
return false
let fork = vc.forkAtEpoch(adata.slot.epoch)

doAssert(validator.index.isSome())
Expand Down Expand Up @@ -259,23 +260,25 @@ proc produceAndPublishAggregates(service: AttestationServiceRef,
block:
var res: seq[AggregateItem]
for duty in duties:
let validator = vc.attachedValidators[].getValidator(duty.data.pubkey)
if not(isNil(validator)):
if (duty.data.slot != slot) or
(duty.data.committee_index != committeeIndex):
error "Inconsistent validator duties during aggregate signing",
duty_slot = duty.data.slot, slot = slot,
duty_committee_index = duty.data.committee_index,
committee_index = committeeIndex
continue
if duty.slotSig.isSome():
let slotSignature = duty.slotSig.get()
if is_aggregator(duty.data.committee_length, slotSignature):
res.add(AggregateItem(
aggregator_index: uint64(duty.data.validator_index),
selection_proof: slotSignature,
validator: validator
))
let validator = vc.attachedValidators[].getValidatorForDuties(
duty.data.pubkey, slot).valueOr:
continue

if (duty.data.slot != slot) or
(duty.data.committee_index != committeeIndex):
error "Inconsistent validator duties during aggregate signing",
duty_slot = duty.data.slot, slot = slot,
duty_committee_index = duty.data.committee_index,
committee_index = committeeIndex
continue
if duty.slotSig.isSome():
let slotSignature = duty.slotSig.get()
if is_aggregator(duty.data.committee_length, slotSignature):
res.add(AggregateItem(
aggregator_index: uint64(duty.data.validator_index),
selection_proof: slotSignature,
validator: validator
))
res

if len(aggregateItems) > 0:
Expand Down Expand Up @@ -395,11 +398,7 @@ proc spawnAttestationTasks(service: AttestationServiceRef,

var dutiesSkipped: seq[string]
for index, duties in dutiesByCommittee:
let (protectedDuties, skipped) = vc.doppelgangerFilter(duties)
if len(skipped) > 0: dutiesSkipped.add(skipped)
if len(protectedDuties) > 0:
asyncSpawn service.publishAttestationsAndAggregates(slot, index,
protectedDuties)
asyncSpawn service.publishAttestationsAndAggregates(slot, index, duties)
if len(dutiesSkipped) > 0:
info "Doppelganger protection disabled validator duties",
validators = len(dutiesSkipped)
Expand Down
6 changes: 1 addition & 5 deletions beacon_chain/validator_client/block_service.nim
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,6 @@ proc publishBlock(vc: ValidatorClientRef, currentSlot, slot: Slot,
slot = slot
wall_slot = currentSlot

if not(vc.doppelgangerCheck(validator)):
info "Block has not been produced (doppelganger check still active)"
return

debug "Publishing block", delay = vc.getDelay(slot.block_deadline()),
genesis_root = genesisRoot,
graffiti = graffiti, fork = fork
Expand Down Expand Up @@ -316,7 +312,7 @@ proc proposeBlock(vc: ValidatorClientRef, slot: Slot,
if sres.isSome():
let
currentSlot = sres.get()
validator = vc.getValidator(proposerKey).valueOr: return
validator = vc.getValidatorForDuties(proposerKey, slot).valueOr: return
await vc.publishBlock(currentSlot, slot, validator)
except CancelledError as exc:
debug "Block proposing was interrupted", slot = slot,
Expand Down
Loading

0 comments on commit 6e2a024

Please sign in to comment.