From 23786be68cdb6f35b6919cde5af57ab70f9741ad Mon Sep 17 00:00:00 2001 From: "Steven Platt, PhD" <31355889+stevenplatt@users.noreply.github.com> Date: Wed, 2 Oct 2024 16:26:01 -0400 Subject: [PATCH] fix: Add persistent storage for aztec nodes in the spartan cluster (#8923) This PR includes two changes: - Adds persistent storage for Aztec nodes running the Spartan cluster - Repairs previously merged load balancer configurations # Persistent Storage Nodes that were previously configured with mounted volumes are now configured to use `volumeClaimTemplates`. Rather than directly configuring a `PersistentVolumeClaim`, a `volumeClaimTemplate` will automatically append index suffixes when replicas increase, so that there is not a storage conflict. ## Persistent Storage for Grafana The currently bundles Grafana instance uses a standard `PersistentVolumeClaim` since it is not expected to be deployed with replicas. Grafana also has an OS-level user defined it its container, which assumes ownership of the volume once it is mounted. To allow remounting, the user have to be defined in the helm chart. This is done using a `securityContext` in Grafana yaml template. # Repaired Load Balancer Config PR #8786 previously made network interfaces *either* internal or external. This meant that when the network was set as public, certain references to internal network interfaces were no longer reachable. Specifically items that address a node port ([bootNodeURL](https://github.com/AztecProtocol/aztec-packages/blob/master/spartan/aztec-network/templates/_helpers.tpl#L62) for example). This PR adds the load balancer as a second interface, without modifying the original. # Testing Code in this PR has been tested by by deployed the updated helm configurations to the Spartan cluster using command: `helm upgrade --install staging . -n staging --set network.public=true` As part of this change, replica counts have also validated to work without causing conflict for volume mounts, network interfaces or other resources. --- spartan/aztec-network/templates/anvil.yaml | 23 +++++++++++- .../aztec-network/templates/boot-node.yaml | 35 +++++++++++++++--- spartan/aztec-network/templates/metrics.yaml | 22 ++++++++++-- .../aztec-network/templates/prover-node.yaml | 34 +++++++++++++++--- spartan/aztec-network/templates/pxe.yaml | 23 +++++++++++- .../aztec-network/templates/validator.yaml | 36 ++++++++++++++++--- spartan/aztec-network/values.yaml | 9 +++-- 7 files changed, 161 insertions(+), 21 deletions(-) diff --git a/spartan/aztec-network/templates/anvil.yaml b/spartan/aztec-network/templates/anvil.yaml index a45c52d58b5..bdaea56bdec 100644 --- a/spartan/aztec-network/templates/anvil.yaml +++ b/spartan/aztec-network/templates/anvil.yaml @@ -69,4 +69,25 @@ spec: targetPort: {{ .Values.ethereum.service.targetPort }} {{- if and (eq .Values.ethereum.service.type "NodePort") .Values.ethereum.service.nodePort }} nodePort: {{ .Values.ethereum.service.nodePort }} - {{- end }} \ No newline at end of file + {{- end }} +--- +{{if .Values.network.public }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "aztec-network.fullname" . }}-ethereum-lb + labels: + {{- include "aztec-network.labels" . | nindent 4 }} +spec: + type: LoadBalancer + selector: + {{- include "aztec-network.selectorLabels" . | nindent 4 }} + app: ethereum + ports: + - protocol: TCP + port: {{ .Values.ethereum.service.port }} + targetPort: {{ .Values.ethereum.service.targetPort }} + {{- if and (eq .Values.ethereum.service.type "NodePort") .Values.ethereum.service.nodePort }} + nodePort: {{ .Values.ethereum.service.nodePort }} + {{- end }} +{{ end }} \ No newline at end of file diff --git a/spartan/aztec-network/templates/boot-node.yaml b/spartan/aztec-network/templates/boot-node.yaml index 6ea619b0059..ec2b2f762ae 100644 --- a/spartan/aztec-network/templates/boot-node.yaml +++ b/spartan/aztec-network/templates/boot-node.yaml @@ -123,11 +123,19 @@ spec: resources: {{- toYaml .Values.bootNode.resources | nindent 12 }} volumes: - - name: shared-volume - emptyDir: {} - name: scripts configMap: name: {{ include "aztec-network.fullname" . }}-deploy-contracts-script + volumeClaimTemplates: + - metadata: + name: shared-volume + labels: + {{- include "aztec-network.labels" . | nindent 8 }} + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: {{ .Values.bootNode.storage }} --- apiVersion: v1 kind: Service @@ -136,7 +144,25 @@ metadata: labels: {{- include "aztec-network.labels" . | nindent 4 }} spec: - type: {{if .Values.network.public }}"LoadBalancer"{{ else }}"ClusterIP"{{ end }} + type: ClusterIP + selector: + {{- include "aztec-network.selectorLabels" . | nindent 4 }} + app: boot-node + ports: + - port: {{ .Values.bootNode.service.p2pPort }} + name: p2p + - port: {{ .Values.bootNode.service.nodePort }} + name: node +--- +{{if .Values.network.public }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "aztec-network.fullname" . }}-boot-node-lb + labels: + {{- include "aztec-network.labels" . | nindent 4 }} +spec: + type: LoadBalancer selector: {{- include "aztec-network.selectorLabels" . | nindent 4 }} app: boot-node @@ -144,4 +170,5 @@ spec: - port: {{ .Values.bootNode.service.p2pPort }} name: p2p - port: {{ .Values.bootNode.service.nodePort }} - name: node \ No newline at end of file + name: node +{{ end }} \ No newline at end of file diff --git a/spartan/aztec-network/templates/metrics.yaml b/spartan/aztec-network/templates/metrics.yaml index ff62fad5f2c..fc3d6d3ee15 100644 --- a/spartan/aztec-network/templates/metrics.yaml +++ b/spartan/aztec-network/templates/metrics.yaml @@ -1,4 +1,3 @@ ---- apiVersion: apps/v1 kind: Deployment metadata: @@ -17,6 +16,8 @@ spec: {{- include "aztec-network.selectorLabels" . | nindent 8 }} app: metrics spec: + securityContext: + fsGroup: {{ .Values.metrics.fileSystemUser }} # required for Grafana write access to storage volume containers: - name: otel-collector image: "{{ .Values.images.otelCollector.image }}" @@ -58,6 +59,8 @@ spec: subPath: grafana-sources.yaml - name: grafana-storage mountPath: /var/lib/grafana + securityContext: + runAsUser: {{ .Values.metrics.fileSystemUser }} # required for Grafana write access to storage volume - name: jaeger image: "{{ .Values.images.jaeger.image }}" @@ -68,8 +71,21 @@ spec: configMap: name: {{ include "aztec-network.fullname" . }}-metrics - name: grafana-storage - emptyDir: {} - + persistentVolumeClaim: + claimName: {{ include "aztec-network.fullname" . }}-grafana-pvc +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "aztec-network.fullname" . }}-grafana-pvc + labels: + app: {{ include "aztec-network.name" . }} +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ .Values.metrics.storage }} --- apiVersion: v1 kind: Service diff --git a/spartan/aztec-network/templates/prover-node.yaml b/spartan/aztec-network/templates/prover-node.yaml index 3bdfc8effa8..dfcb8b4bbb5 100644 --- a/spartan/aztec-network/templates/prover-node.yaml +++ b/spartan/aztec-network/templates/prover-node.yaml @@ -82,12 +82,19 @@ spec: resources: {{- toYaml .Values.proverNode.resources | nindent 12 }} volumes: - - name: shared-volume - emptyDir: {} - name: scripts configMap: name: {{ include "aztec-network.fullname" . }}-configure-prover-env - + volumeClaimTemplates: + - metadata: + name: shared-volume + labels: + {{- include "aztec-network.labels" . | nindent 8 }} + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: {{ .Values.proverNode.storage }} --- apiVersion: v1 kind: ConfigMap @@ -106,10 +113,27 @@ metadata: labels: {{- include "aztec-network.labels" . | nindent 4 }} spec: - type: {{if .Values.network.public }}"LoadBalancer"{{ else }}"ClusterIP"{{ end }} + type: ClusterIP + selector: + {{- include "aztec-network.selectorLabels" . | nindent 4 }} + app: prover-node + ports: + - port: {{ .Values.proverNode.service.nodePort }} + name: node +--- +{{if .Values.network.public }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "aztec-network.fullname" . }}-prover-node-lb + labels: + {{- include "aztec-network.labels" . | nindent 4 }} +spec: + type: LoadBalancer selector: {{- include "aztec-network.selectorLabels" . | nindent 4 }} app: prover-node ports: - port: {{ .Values.proverNode.service.nodePort }} - name: node \ No newline at end of file + name: node +{{ end }} \ No newline at end of file diff --git a/spartan/aztec-network/templates/pxe.yaml b/spartan/aztec-network/templates/pxe.yaml index 52f1b8aed6a..3725960a0b4 100644 --- a/spartan/aztec-network/templates/pxe.yaml +++ b/spartan/aztec-network/templates/pxe.yaml @@ -66,7 +66,7 @@ metadata: labels: {{- include "aztec-network.labels" . | nindent 4 }} spec: - type: {{if .Values.network.public }}"LoadBalancer"{{ else }}"ClusterIP"{{ end }} + type: ClusterIP selector: {{- include "aztec-network.selectorLabels" . | nindent 4 }} app: pxe @@ -77,4 +77,25 @@ spec: {{- if and (eq .Values.pxe.service.type "NodePort") .Values.pxe.service.nodePort }} nodePort: {{ .Values.pxe.service.nodePort }} {{- end }} +--- +{{if .Values.network.public }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "aztec-network.fullname" . }}-pxe-lb + labels: + {{- include "aztec-network.labels" . | nindent 4 }} +spec: + type: LoadBalancer + selector: + {{- include "aztec-network.selectorLabels" . | nindent 4 }} + app: pxe + ports: + - protocol: TCP + port: {{ .Values.pxe.service.port }} + targetPort: {{ .Values.pxe.service.targetPort }} + {{- if and (eq .Values.pxe.service.type "NodePort") .Values.pxe.service.nodePort }} + nodePort: {{ .Values.pxe.service.nodePort }} + {{- end }} +{{ end }} {{- end }} \ No newline at end of file diff --git a/spartan/aztec-network/templates/validator.yaml b/spartan/aztec-network/templates/validator.yaml index 4f7785efd89..205a32a74cd 100644 --- a/spartan/aztec-network/templates/validator.yaml +++ b/spartan/aztec-network/templates/validator.yaml @@ -91,12 +91,19 @@ spec: resources: {{- toYaml .Values.validator.resources | nindent 12 }} volumes: - - name: shared-volume - emptyDir: {} - name: scripts configMap: name: {{ include "aztec-network.fullname" . }}-configure-validator-env - + volumeClaimTemplates: + - metadata: + name: shared-volume + labels: + {{- include "aztec-network.labels" . | nindent 8 }} + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: {{ .Values.validator.storage }} --- apiVersion: v1 kind: ConfigMap @@ -115,7 +122,25 @@ metadata: labels: {{- include "aztec-network.labels" . | nindent 4 }} spec: - type: {{if .Values.network.public }}"LoadBalancer"{{ else }}"ClusterIP"{{ end }} + type: ClusterIP + selector: + {{- include "aztec-network.selectorLabels" . | nindent 4 }} + app: validator + ports: + - port: {{ .Values.validator.service.p2pPort }} + name: p2p + - port: {{ .Values.validator.service.nodePort }} + name: node +--- +{{if .Values.network.public }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "aztec-network.fullname" . }}-validator-lb + labels: + {{- include "aztec-network.labels" . | nindent 4 }} +spec: + type: LoadBalancer selector: {{- include "aztec-network.selectorLabels" . | nindent 4 }} app: validator @@ -123,4 +148,5 @@ spec: - port: {{ .Values.validator.service.p2pPort }} name: p2p - port: {{ .Values.validator.service.nodePort }} - name: node \ No newline at end of file + name: node +{{ end }} \ No newline at end of file diff --git a/spartan/aztec-network/values.yaml b/spartan/aztec-network/values.yaml index 228a3750b99..588eac5c34a 100644 --- a/spartan/aztec-network/values.yaml +++ b/spartan/aztec-network/values.yaml @@ -46,9 +46,10 @@ bootNode: limits: memory: "4Gi" cpu: "4" + storage: "8Gi" validator: - replicas: 0 + replicas: 1 service: p2pPort: 40400 nodePort: 8080 @@ -68,6 +69,7 @@ validator: limits: memory: "28Gi" cpu: "7" + storage: "8Gi" proverNode: replicas: 1 @@ -84,6 +86,7 @@ proverNode: limits: memory: "120Gi" cpu: "15" + storage: "8Gi" pxe: logLevel: "debug" @@ -176,4 +179,6 @@ metrics: periodSeconds: 10 timeoutSeconds: 5 successThreshold: 1 - failureThreshold: 3 \ No newline at end of file + failureThreshold: 3 + storage: "8Gi" + fileSystemUser: 472 \ No newline at end of file