Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

unify bn/vc doppelganger detection #4398

Merged
merged 5 commits into from
Dec 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions AllTests-mainnet.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,11 +139,6 @@ OK: 1/1 Fail: 0/1 Skip: 0/1
+ Tail block only in common OK
```
OK: 2/2 Fail: 0/2 Skip: 0/2
## Doppelganger protection test suite
```diff
+ doppelgangerCheck() test OK
```
OK: 1/1 Fail: 0/1 Skip: 0/1
## EF - SSZ generic types
```diff
Testing basic_vector inputs - invalid Skip
Expand Down Expand Up @@ -508,6 +503,17 @@ OK: 4/4 Fail: 0/4 Skip: 0/4
+ [SyncQueue] hasEndGap() test OK
```
OK: 23/23 Fail: 0/23 Skip: 0/23
## Validator pool
```diff
+ Activation after check OK
+ Doppelganger for already active validator OK
+ Doppelganger for genesis validator OK
+ Doppelganger for validator that activates in future epoch OK
+ Doppelganger for validator that activates in previous epoch OK
+ Doppelganger for validator that activates in same epoch as check OK
+ Future activation after check OK
```
OK: 7/7 Fail: 0/7 Skip: 0/7
## Zero signature sanity checks
```diff
+ SSZ serialization roundtrip of SignedBeaconBlockHeader OK
Expand Down Expand Up @@ -614,4 +620,4 @@ OK: 2/2 Fail: 0/2 Skip: 0/2
OK: 9/9 Fail: 0/9 Skip: 0/9

---TOTAL---
OK: 339/344 Fail: 0/344 Skip: 5/344
OK: 345/350 Fail: 0/350 Skip: 5/350
35 changes: 14 additions & 21 deletions beacon_chain/gossip_processing/eth2_processor.nim
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ import
light_client_pool, sync_committee_msg_pool],
../validators/validator_pool,
../beacon_clock,
"."/[gossip_validation, block_processor, batch_validation]
"."/[gossip_validation, block_processor, batch_validation],
../nimbus_binary_common

export
results, taskpools, block_clearance, blockchain_dag, exit_pool, attestation_pool,
Expand Down Expand Up @@ -247,14 +248,20 @@ proc setupDoppelgangerDetection*(self: var Eth2Processor, slot: Slot) =
# can be up to around 10,000 Wei. Thus, skipping attestations isn't cheap
# and one should gauge the likelihood of this simultaneous launch to tune
# the epoch delay to one's perceived risk.
if self.doppelgangerDetectionEnabled:
self.doppelgangerDetection.broadcastStartEpoch = slot.epoch

# Round up to ensure that we cover the entire epoch - used by rest api also
self.doppelgangerDetection.broadcastStartEpoch =
(slot + SLOTS_PER_EPOCH - 1).epoch

if self.doppelgangerDetectionEnabled:
notice "Setting up doppelganger detection",
epoch = slot.epoch,
broadcast_epoch = self.doppelgangerDetection.broadcastStartEpoch,
nodestart_epoch = self.doppelgangerDetection.nodeLaunchSlot.epoch()

proc clearDoppelgangerProtection*(self: var Eth2Processor) =
self.doppelgangerDetection.broadcastStartEpoch = FAR_FUTURE_EPOCH

proc checkForPotentialDoppelganger(
self: var Eth2Processor, attestation: Attestation,
attesterIndices: openArray[ValidatorIndex]) =
Expand All @@ -275,28 +282,14 @@ proc checkForPotentialDoppelganger(
validator = self.validatorPool[].getValidator(validatorPubkey)

if not(isNil(validator)):
let res = validator.doppelgangerCheck(attestation.data.slot.epoch(),
broadcastStartEpoch)
if res.isOk() and not(res.get()):
warn "We believe you are currently running another instance of the " &
"same validator. We've disconnected you from the network as " &
"this presents a significant slashing risk. Possible next steps "&
"are (a) making sure you've disconnected your validator from " &
"your old machine before restarting the client; and (b) running " &
"the client again with the gossip-slashing-protection option " &
"disabled, only if you are absolutely sure this is the only " &
"instance of your validator running, and reporting the issue " &
"at https://github.com/status-im/nimbus-eth2/issues.",
if validator.triggersDoppelganger(broadcastStartEpoch):
warn "Doppelganger attestation",
validator = shortLog(validator),
start_slot = validator.startSlot,
validator_index = validatorIndex,
activation_epoch = validator.activationEpoch.get(),
activation_epoch = validator.activationEpoch,
broadcast_epoch = broadcastStartEpoch,
attestation = shortLog(attestation)
# Avoid colliding with
# https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Process%20Exit%20Codes
const QuitDoppelganger = 129
quit QuitDoppelganger
quitDoppelganger()

proc processAttestation*(
self: ref Eth2Processor, src: MsgSource,
Expand Down
17 changes: 16 additions & 1 deletion beacon_chain/nimbus_beacon_node.nim
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,8 @@ proc init*(T: type BeaconNode,
SlashingProtectionDB.init(
getStateField(dag.headState, genesis_validators_root),
config.validatorsDir(), SlashingDbName)
validatorPool = newClone(ValidatorPool.init(slashingProtectionDB))
validatorPool = newClone(ValidatorPool.init(
slashingProtectionDB, config.doppelgangerDetection))

keymanagerInitResult = initKeymanagerServer(config, restServer)
keymanagerHost = if keymanagerInitResult.server != nil:
Expand Down Expand Up @@ -963,6 +964,17 @@ proc updateSyncCommitteeTopics(node: BeaconNode, slot: Slot) =

node.network.updateSyncnetsMetadata(syncnets)

proc updateDoppelganger(node: BeaconNode, epoch: Epoch) =
if not node.processor[].doppelgangerDetectionEnabled:
return

# broadcastStartEpoch is set to FAR_FUTURE_EPOCH when we're not monitoring
# gossip - it is only viable to assert liveness in epochs where gossip is
# active
if epoch > node.processor[].doppelgangerDetection.broadcastStartEpoch:
for validator in node.attachedValidators[]:
validator.updateDoppelganger(epoch - 1)

proc updateGossipStatus(node: BeaconNode, slot: Slot) {.async.} =
## Subscribe to subnets that we are providing stability for or aggregating
## and unsubscribe from the ones that are no longer relevant.
Expand Down Expand Up @@ -1049,6 +1061,8 @@ proc updateGossipStatus(node: BeaconNode, slot: Slot) {.async.} =
headSlot = head.slot,
headDistance

node.processor[].clearDoppelgangerProtection()

let forkDigests = node.forkDigests()

discard $eip4844ImplementationMissing & "nimbus_beacon_node.nim:updateGossipStatus check EIP4844 removeMessageHandlers"
Expand Down Expand Up @@ -1076,6 +1090,7 @@ proc updateGossipStatus(node: BeaconNode, slot: Slot) {.async.} =
addMessageHandlers[gossipFork](node, forkDigests[gossipFork], slot)

node.gossipState = targetGossipState
node.updateDoppelganger(slot.epoch)
node.updateAttestationSubnetHandlers(slot)
node.updateBlocksGossipStatus(slot, isBehind)
node.updateLightClientGossipStatus(slot, isBehind)
Expand Down
13 changes: 12 additions & 1 deletion beacon_chain/nimbus_binary_common.nim
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import
chronos, confutils, presto, toml_serialization, metrics,
chronicles, chronicles/helpers as chroniclesHelpers, chronicles/topics_registry,
stew/io2,
presto,

# Local modules
./spec/[helpers],
Expand Down Expand Up @@ -413,3 +412,15 @@ proc initKeymanagerServer*(
nil

KeymanagerInitResult(server: keymanagerServer, token: token)

proc quitDoppelganger*() =
# Avoid colliding with
# https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Process%20Exit%20Codes
# This error code is used to permanently shut down validators
fatal "Doppelganger detection triggered! It appears a validator loaded into " &
"this process is already live on the network - the validator is at high " &
"risk of being slashed due to the same keys being used in two setups. " &
"See https://nimbus.guide/doppelganger-detection.html for more information!"

const QuitDoppelganger = 129
quit QuitDoppelganger
5 changes: 1 addition & 4 deletions beacon_chain/nimbus_signing_node.nim
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,7 @@ proc initValidators(sn: var SigningNode): bool =
let feeRecipient = default(Eth1Address)
case keystore.kind
of KeystoreKind.Local:
# Signing node is not supposed to know genesis time, so we just set
# `start_slot` to GENESIS_SLOT.
sn.attachedValidators.addLocalValidator(
keystore, feeRecipient, GENESIS_SLOT)
discard sn.attachedValidators.addLocalValidator(keystore, feeRecipient)
publicKeyIdents.add("\"0x" & keystore.pubkey.toHex() & "\"")
of KeystoreKind.Remote:
error "Signing node do not support remote validators",
Expand Down
23 changes: 16 additions & 7 deletions beacon_chain/nimbus_validator_client.nim
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ proc new*(T: type ValidatorClientRef,
graffitiBytes: config.graffiti.get(defaultGraffitiBytes()),
nodesAvailable: newAsyncEvent(),
forksAvailable: newAsyncEvent(),
gracefulExit: newAsyncEvent(),
doppelExit: newAsyncEvent(),
indicesAvailable: newAsyncEvent(),
dynamicFeeRecipientsStore: newClone(DynamicFeeRecipientsStore.init()),
sigintHandleFut: waitSignal(SIGINT),
Expand All @@ -225,7 +225,7 @@ proc new*(T: type ValidatorClientRef,
nodesAvailable: newAsyncEvent(),
forksAvailable: newAsyncEvent(),
indicesAvailable: newAsyncEvent(),
gracefulExit: newAsyncEvent(),
doppelExit: newAsyncEvent(),
dynamicFeeRecipientsStore: newClone(DynamicFeeRecipientsStore.init()),
sigintHandleFut: newFuture[void]("sigint_placeholder"),
sigtermHandleFut: newFuture[void]("sigterm_placeholder")
Expand Down Expand Up @@ -255,7 +255,8 @@ proc asyncInit(vc: ValidatorClientRef): Future[ValidatorClientRef] {.async.} =
SlashingProtectionDB.init(
vc.beaconGenesis.genesis_validators_root,
vc.config.validatorsDir(), "slashing_protection")
validatorPool = newClone(ValidatorPool.init(slashingProtectionDB))
validatorPool = newClone(ValidatorPool.init(
slashingProtectionDB, vc.config.doppelgangerDetection))

vc.attachedValidators = validatorPool

Expand Down Expand Up @@ -315,10 +316,10 @@ proc asyncRun*(vc: ValidatorClientRef) {.async.} =
vc.keymanagerServer.router.installKeymanagerHandlers(vc.keymanagerHost[])
vc.keymanagerServer.start()

var exitEventFut = vc.gracefulExit.wait()
var doppelEventFut = vc.doppelExit.wait()
try:
vc.runSlotLoopFut = runSlotLoop(vc, vc.beaconClock.now(), onSlotStart)
discard await race(vc.runSlotLoopFut, exitEventFut)
discard await race(vc.runSlotLoopFut, doppelEventFut)
if not(vc.runSlotLoopFut.finished()):
notice "Received shutdown event, exiting"
except CancelledError:
Expand All @@ -329,12 +330,20 @@ proc asyncRun*(vc: ValidatorClientRef) {.async.} =

await vc.shutdownMetrics()
vc.shutdownSlashingProtection()

if doppelEventFut.finished:
# Critically, database has been shut down - the rest doesn't matter, we need
# to stop as soon as possible
# TODO we need to actually quit _before_ any other async tasks have had the
# chance to happen
quitDoppelganger()

debug "Stopping main processing loop"
var pending: seq[Future[void]]
if not(vc.runSlotLoopFut.finished()):
pending.add(vc.runSlotLoopFut.cancelAndWait())
if not(exitEventFut.finished()):
pending.add(exitEventFut.cancelAndWait())
if not(doppelEventFut.finished()):
pending.add(doppelEventFut.cancelAndWait())
debug "Stopping running services"
pending.add(vc.fallbackService.stop())
pending.add(vc.forkService.stop())
Expand Down
19 changes: 5 additions & 14 deletions beacon_chain/rpc/rest_key_management_api.nim
Original file line number Diff line number Diff line change
Expand Up @@ -127,22 +127,13 @@ proc handleAddRemoteValidatorReq(host: KeymanagerHost,
let res = importKeystore(host.validatorPool[], host.validatorsDir, keystore)
if res.isOk:
let
slot = host.getBeaconTimeFn().slotOrZero
validator = host.getValidatorData(keystore.pubkey)
data = host.getValidatorData(keystore.pubkey)
feeRecipient = host.getSuggestedFeeRecipient(keystore.pubkey).valueOr(
host.defaultFeeRecipient)
index =
if validator.isSome():
Opt.some(validator.get().index)
else:
Opt.none(ValidatorIndex)
activationEpoch =
if validator.isSome():
Opt.some(validator.get().validator.activation_epoch)
else:
Opt.none(Epoch)
host.validatorPool[].addRemoteValidator(
res.get, index, feeRecipient, slot, activationEpoch)
v = host.validatorPool[].addRemoteValidator(res.get, feeRecipient)
if data.isSome():
v.updateValidator(data.get().index, data.get().validator.activation_epoch)

RequestItemStatus(status: $KeystoreStatus.imported)
else:
case res.error().status
Expand Down
6 changes: 6 additions & 0 deletions beacon_chain/rpc/rest_validator_api.nim
Original file line number Diff line number Diff line change
Expand Up @@ -858,6 +858,7 @@ proc installValidatorApiHandlers*(router: var RestRouter, node: BeaconNode) =

return RestApiResponse.response("", Http200, "text/plain")

# https://github.com/ethereum/beacon-APIs/blob/master/apis/validator/liveness.yaml
router.api(MethodPost, "/eth/v1/validator/liveness/{epoch}") do (
epoch: Epoch, contentBody: Option[ContentBody]) -> RestApiResponse:
let
Expand All @@ -878,6 +879,11 @@ proc installValidatorApiHandlers*(router: var RestRouter, node: BeaconNode) =
if (res < prevEpoch) or (res > nextEpoch):
return RestApiResponse.jsonError(Http400, InvalidEpochValueError,
"Requested epoch is more than one epoch from current epoch")

if res < node.processor[].doppelgangerDetection.broadcastStartEpoch:
# We can't accurately respond if we're not in sync and aren't
# processing gossip
return RestApiResponse.jsonError(Http503, BeaconNodeInSyncError)
res
indexList =
block:
Expand Down
45 changes: 22 additions & 23 deletions beacon_chain/validator_client/attestation_service.nim
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ type
proc serveAttestation(service: AttestationServiceRef, adata: AttestationData,
duty: DutyAndProof): Future[bool] {.async.} =
let vc = service.client
let validator = vc.getValidator(duty.data.pubkey).valueOr: return false
let validator = vc.getValidatorForDuties(duty.data.pubkey, adata.slot).valueOr:
return false
let fork = vc.forkAtEpoch(adata.slot.epoch)

doAssert(validator.index.isSome())
Expand Down Expand Up @@ -259,23 +260,25 @@ proc produceAndPublishAggregates(service: AttestationServiceRef,
block:
var res: seq[AggregateItem]
for duty in duties:
let validator = vc.attachedValidators[].getValidator(duty.data.pubkey)
if not(isNil(validator)):
if (duty.data.slot != slot) or
(duty.data.committee_index != committeeIndex):
error "Inconsistent validator duties during aggregate signing",
duty_slot = duty.data.slot, slot = slot,
duty_committee_index = duty.data.committee_index,
committee_index = committeeIndex
continue
if duty.slotSig.isSome():
let slotSignature = duty.slotSig.get()
if is_aggregator(duty.data.committee_length, slotSignature):
res.add(AggregateItem(
aggregator_index: uint64(duty.data.validator_index),
selection_proof: slotSignature,
validator: validator
))
let validator = vc.attachedValidators[].getValidatorForDuties(
duty.data.pubkey, slot).valueOr:
continue

if (duty.data.slot != slot) or
(duty.data.committee_index != committeeIndex):
error "Inconsistent validator duties during aggregate signing",
duty_slot = duty.data.slot, slot = slot,
duty_committee_index = duty.data.committee_index,
committee_index = committeeIndex
continue
if duty.slotSig.isSome():
let slotSignature = duty.slotSig.get()
if is_aggregator(duty.data.committee_length, slotSignature):
res.add(AggregateItem(
aggregator_index: uint64(duty.data.validator_index),
selection_proof: slotSignature,
validator: validator
))
res

if len(aggregateItems) > 0:
Expand Down Expand Up @@ -395,11 +398,7 @@ proc spawnAttestationTasks(service: AttestationServiceRef,

var dutiesSkipped: seq[string]
for index, duties in dutiesByCommittee:
let (protectedDuties, skipped) = vc.doppelgangerFilter(duties)
if len(skipped) > 0: dutiesSkipped.add(skipped)
if len(protectedDuties) > 0:
asyncSpawn service.publishAttestationsAndAggregates(slot, index,
protectedDuties)
asyncSpawn service.publishAttestationsAndAggregates(slot, index, duties)
if len(dutiesSkipped) > 0:
info "Doppelganger protection disabled validator duties",
validators = len(dutiesSkipped)
Expand Down
6 changes: 1 addition & 5 deletions beacon_chain/validator_client/block_service.nim
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,6 @@ proc publishBlock(vc: ValidatorClientRef, currentSlot, slot: Slot,
slot = slot
wall_slot = currentSlot

if not(vc.doppelgangerCheck(validator)):
info "Block has not been produced (doppelganger check still active)"
return

debug "Publishing block", delay = vc.getDelay(slot.block_deadline()),
genesis_root = genesisRoot,
graffiti = graffiti, fork = fork
Expand Down Expand Up @@ -316,7 +312,7 @@ proc proposeBlock(vc: ValidatorClientRef, slot: Slot,
if sres.isSome():
let
currentSlot = sres.get()
validator = vc.getValidator(proposerKey).valueOr: return
validator = vc.getValidatorForDuties(proposerKey, slot).valueOr: return
await vc.publishBlock(currentSlot, slot, validator)
except CancelledError as exc:
debug "Block proposing was interrupted", slot = slot,
Expand Down
Loading