Skip to content

Commit

Permalink
feat: gating_passive.test.ts
Browse files Browse the repository at this point in the history
This test:
- updates the aztec network deployment, allowing validators to use each other as boot nodes
- applies the "network-requirements" network shaping
- permanently disables the boot node
- runs 3 epochs during which it:
  - kills 25% of the validators
  - asserts that we miss less than 50% of slots

Other work in this branch includes:
- add `ignoreDroppedReceiptsFor` TX wait options
  - this allows sending a TX to one node, and awaiting it on another since we need time for p2p propagation
  - we need this since we have shifted the PXE to point at the top-level validator service, which load balances across individuals
  - this may help with #9613
- scalable loki deployment for prod
- more visible logging for core sequencer operations
- better error handling during the setup of l2 contracts
- better error handling in the pxe
- rename the network shaping charts to "aztec-chaos-scenarios"
  • Loading branch information
just-mitch committed Nov 15, 2024
1 parent 10c64fc commit 58ce04b
Show file tree
Hide file tree
Showing 49 changed files with 768 additions and 238 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -713,6 +713,10 @@ jobs:
values: 16-validators
runner_type: 16core-tester-x86
timeout: 60
- test: gating-passive.test.ts
values: 16-validators
runner_type: 16core-tester-x86
timeout: 60
steps:
- uses: actions/checkout@v4
with: { ref: "${{ env.GIT_COMMIT }}" }
Expand Down
6 changes: 6 additions & 0 deletions spartan/aztec-chaos-scenarios/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apiVersion: v2
name: aztec-chaos-scenarios
description: Chaos scenarios for spartan using chaos-mesh
type: application
version: 0.1.0
appVersion: "1.0.0"
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{{/*
Create a default fully qualified app name.
*/}}
{{- define "network-shaping.fullname" -}}
{{- define "aztec-chaos-scenarios.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
Expand All @@ -14,17 +14,12 @@ Create a default fully qualified app name.
{{- end }}
{{- end }}

{{/*
Selector labels
*/}}
{{- define "chaos-mesh.selectorLabels" -}}
{{- end }}

{{/*
Common labels
*/}}
{{- define "network-shaping.labels" -}}
app.kubernetes.io/name: {{ include "network-shaping.fullname" . }}
{{- define "aztec-chaos-scenarios.labels" -}}
app.kubernetes.io/name: {{ include "aztec-chaos-scenarios.fullname" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
Expand Down
21 changes: 21 additions & 0 deletions spartan/aztec-chaos-scenarios/templates/boot-node-failure.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{{- if .Values.bootNodeFailure.enabled }}
---
apiVersion: chaos-mesh.org/v1alpha1
kind: PodChaos
metadata:
name: {{ .Values.global.targetNamespace }}-boot-node-failure
namespace: {{ .Values.global.chaosMeshNamespace }}
labels:
{{- include "aztec-chaos-scenarios.labels" . | nindent 4 }}
annotations:
"helm.sh/resource-policy": keep
spec:
action: pod-failure
mode: all
selector:
namespaces:
- {{ .Values.global.targetNamespace }}
labelSelectors:
app: boot-node
duration: {{ .Values.bootNodeFailure.duration }}
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ metadata:
name: {{ .Values.global.targetNamespace }}-latency
namespace: {{ .Values.global.chaosMeshNamespace }}
labels:
{{- include "network-shaping.labels" . | nindent 4 }}
{{- include "aztec-chaos-scenarios.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "0"
Expand All @@ -31,7 +31,7 @@ metadata:
name: {{ .Values.global.targetNamespace }}-bandwidth
namespace: {{ .Values.global.chaosMeshNamespace }}
labels:
{{- include "network-shaping.labels" . | nindent 4 }}
{{- include "aztec-chaos-scenarios.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "0"
Expand All @@ -57,7 +57,7 @@ metadata:
name: {{ .Values.global.targetNamespace }}-packet-loss
namespace: {{ .Values.global.chaosMeshNamespace }}
labels:
{{- include "network-shaping.labels" . | nindent 4 }}
{{- include "aztec-chaos-scenarios.labels" . | nindent 4 }}
annotations:
"helm.sh/resource-policy": keep
spec:
Expand All @@ -72,26 +72,4 @@ spec:
duration: 8760h
{{- end }}

{{- if .Values.networkShaping.conditions.killProvers.enabled }}
---
apiVersion: chaos-mesh.org/v1alpha1
kind: PodChaos
metadata:
name: {{ .Values.global.targetNamespace }}-kill-provers
namespace: {{ .Values.global.chaosMeshNamespace }}
labels:
{{- include "network-shaping.labels" . | nindent 4 }}
annotations:
"helm.sh/resource-policy": keep
spec:
action: pod-failure
mode: all
selector:
namespaces:
- {{ .Values.global.targetNamespace }}
labelSelectors:
app: prover-node
duration: {{ .Values.networkShaping.conditions.killProvers.duration }}
{{- end }}

{{- end }}
21 changes: 21 additions & 0 deletions spartan/aztec-chaos-scenarios/templates/prover-failure.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{{- if .Values.proverFailure.enabled }}
---
apiVersion: chaos-mesh.org/v1alpha1
kind: PodChaos
metadata:
name: {{ .Values.global.targetNamespace }}-prover-failure
namespace: {{ .Values.global.chaosMeshNamespace }}
labels:
{{- include "aztec-chaos-scenarios.labels" . | nindent 4 }}
annotations:
"helm.sh/resource-policy": keep
spec:
action: pod-failure
mode: all
selector:
namespaces:
- {{ .Values.global.targetNamespace }}
labelSelectors:
app: prover-node
duration: {{ .Values.proverFailure.duration }}
{{- end }}
21 changes: 21 additions & 0 deletions spartan/aztec-chaos-scenarios/templates/validator-kill.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{{- if .Values.validatorKill.enabled }}
---
apiVersion: chaos-mesh.org/v1alpha1
kind: PodChaos
metadata:
name: {{ .Values.global.targetNamespace }}-validator-kill
namespace: {{ .Values.global.chaosMeshNamespace }}
labels:
{{- include "aztec-chaos-scenarios.labels" . | nindent 4 }}
annotations:
"helm.sh/resource-policy": keep
spec:
action: pod-kill
mode: fixed-percent
value: {{ .Values.validatorKill.percent | quote }}
selector:
namespaces:
- {{ .Values.global.targetNamespace }}
labelSelectors:
app: validator
{{- end }}
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
nameOverride: null
fullnameOverride: null

global:
# When deploying, override the namespace to where spartan will deploy to, this will apply all chaos experiments to all pods within that namespace
# run deployment with --values global.namespace=your-namespace
Expand All @@ -7,12 +10,12 @@ global:
# Network shaping configuration
networkShaping:
# Master switch to enable network shaping
enabled: true
enabled: false

# Default settings
defaultSettings:
mode: all
# Set duration to 1 year so the the experiment will run indefinitely unless overridden
# Set duration to 1 year so the experiment will run indefinitely unless overridden
duration: 8760h

# Network conditions to apply
Expand Down Expand Up @@ -62,9 +65,17 @@ networkShaping:
# Buffer = smoother bandwidth restriction but higher memory usage
buffer: 1000

killProvers:
enabled: false
duration: 13m
proverFailure:
enabled: false
duration: 13m

validatorKill:
enabled: false
percent: 30

bootNodeFailure:
enabled: false
duration: 60m
## Here are some exciting example configurations created by claude:
# Example use cases for different configurations:

Expand Down
6 changes: 6 additions & 0 deletions spartan/aztec-chaos-scenarios/values/boot-node-failure.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
global:
namespace: "smoke"

bootNodeFailure:
enabled: true
duration: 60m
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ networkShaping:
packetLoss:
enabled: true
loss: "0.5"
correlation: "60"
correlation: "60"
23 changes: 23 additions & 0 deletions spartan/aztec-chaos-scenarios/values/network-requirements.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Imposes the network conditions that are stated as requirements for node operators
global:
namespace: "smoke"

networkShaping:
enabled: true
conditions:
latency:
enabled: true
delay:
# Regional network latency (e.g., cross-country)
latency: 100ms
jitter: 20ms
correlation: "75"
bandwidth:
enabled: true
rate: 250mbps
limit: 125000000
buffer: 25000
packetLoss:
enabled: true
loss: "0.5"
correlation: "60"
6 changes: 6 additions & 0 deletions spartan/aztec-chaos-scenarios/values/prover-failure.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
global:
namespace: "smoke"

proverFailure:
enabled: true
duration: 13m
File renamed without changes.
6 changes: 6 additions & 0 deletions spartan/aztec-chaos-scenarios/values/validator-kill.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
global:
namespace: "smoke"

validatorKill:
enabled: true
percent: 25
2 changes: 1 addition & 1 deletion spartan/aztec-network/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ http://{{ include "aztec-network.fullname" . }}-boot-node-0.{{ include "aztec-ne
{{- if .Values.validator.externalTcpHost -}}
http://{{ .Values.validator.externalTcpHost }}:{{ .Values.validator.service.nodePort }}
{{- else -}}
http://{{ include "aztec-network.fullname" . }}-validator-0.{{ include "aztec-network.fullname" . }}-validator.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.validator.service.nodePort }}
http://{{ include "aztec-network.fullname" . }}-validator.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.validator.service.nodePort }}
{{- end -}}
{{- end -}}

Expand Down
2 changes: 1 addition & 1 deletion spartan/aztec-network/templates/prover-node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ spec:
app: prover-node
spec:
initContainers:
- name: wait-for-boot-node
- name: wait-for-services
image: {{ .Values.images.curl.image }}
command:
- /bin/sh
Expand Down
2 changes: 1 addition & 1 deletion spartan/aztec-network/templates/pxe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ spec:
- name: ETHEREUM_HOST
value: {{ include "aztec-network.ethereumHost" . | quote }}
- name: AZTEC_NODE_URL
value: {{ include "aztec-network.bootNodeUrl" . | quote }}
value: {{ include "aztec-network.validatorUrl" . | quote }}
- name: LOG_JSON
value: "1"
- name: LOG_LEVEL
Expand Down
2 changes: 1 addition & 1 deletion spartan/aztec-network/templates/setup-l2-contracts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ spec:
echo "L2 contracts initialized"
env:
- name: PXE_URL
value: {{ include "aztec-network.pxeUrl" . | quote }}
value: {{ include "aztec-network.bootNodeUrl" . | quote }}
- name: DEBUG
value: "aztec:*"
- name: LOG_LEVEL
Expand Down
4 changes: 2 additions & 2 deletions spartan/aztec-network/templates/transaction-bot.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ spec:
{{- if .Values.bot.nodeUrl }}
value: "{{ .Values.bot.nodeUrl }}"
{{- else }}
value: {{ include "aztec-network.bootNodeUrl" . | quote }}
value: {{ include "aztec-network.validatorUrl" . | quote }}
{{- end }}
command:
- /bin/sh
Expand All @@ -47,7 +47,7 @@ spec:
{{- if .Values.bot.nodeUrl }}
value: "{{ .Values.bot.nodeUrl }}"
{{- else }}
value: {{ include "aztec-network.bootNodeUrl" . | quote }}
value: {{ include "aztec-network.validatorUrl" . | quote }}
{{- end }}
- name: LOG_JSON
value: "1"
Expand Down
Loading

0 comments on commit 58ce04b

Please sign in to comment.