From 5851da6f388d077777a714fc9e5d574c1565b009 Mon Sep 17 00:00:00 2001 From: Jeff Zellner Date: Thu, 12 Sep 2024 16:14:45 -0600 Subject: [PATCH] primary-site: add KEDA based autoscaling support to inbox-listener (#92) ### Changelog - Feature: add support for built-in inbox-listener autoscaling, see [documentation](https://docs.foxglove.dev/docs/primary-sites/self-hosting/autoscaling#inbox-listener) for details ### Docs https://docs.foxglove.dev/docs/primary-sites/self-hosting/autoscaling#inbox-listener ### Description Autoscaling can now be enabled automatically for inbox-listener. Requires KEDA to be installed as a prerequisite. --- .../deployments/_inbox-container.tpl | 124 +++++++++++++++ .../templates/deployments/inbox-listener.yaml | 143 +++--------------- .../templates/services/site-controller.yaml | 17 +++ charts/primary-site/values.yaml | 19 +++ 4 files changed, 185 insertions(+), 118 deletions(-) create mode 100644 charts/primary-site/templates/deployments/_inbox-container.tpl create mode 100644 charts/primary-site/templates/services/site-controller.yaml diff --git a/charts/primary-site/templates/deployments/_inbox-container.tpl b/charts/primary-site/templates/deployments/_inbox-container.tpl new file mode 100644 index 0000000..18f5058 --- /dev/null +++ b/charts/primary-site/templates/deployments/_inbox-container.tpl @@ -0,0 +1,124 @@ +{{- define "primary-site.inbox-container" }} +template: + metadata: + labels: + app: inbox-listener + {{- range $key, $value := .Values.inboxListener.deployment.podLabels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + annotations: + {{- range $key, $value := .Values.inboxListener.deployment.podAnnotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + spec: + volumes: + - name: cloud-credentials + secret: + secretName: gcp-cloud-credential + optional: true + {{- if .Values.inboxListener.deployment.localScratch.enabled }} + - name: local-scratch + emptyDir: + sizeLimit: {{ .Values.inboxListener.deployment.localScratch.capacityBytes }} + {{- end }} + {{- if .Values.inboxListener.deployment.nodeSelectors }} + nodeSelector: + {{- range $key, $value := .Values.inboxListener.deployment.nodeSelectors }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end}} + {{- if .Values.inboxListener.deployment.serviceAccount.enabled }} + serviceAccount: inbox-listener + {{- end}} + containers: + - name: inbox-listener + image: us-central1-docker.pkg.dev/foxglove-images/images/inbox-listener:{{ .Chart.AppVersion }} + resources: + requests: + cpu: {{ .Values.inboxListener.deployment.resources.requests.cpu }} + memory: {{ .Values.inboxListener.deployment.resources.requests.memory }} + {{- if .Values.inboxListener.deployment.localScratch.enabled }} + ephemeral-storage: {{ .Values.inboxListener.deployment.localScratch.capacityBytes }} + {{- end}} + limits: + cpu: {{ .Values.inboxListener.deployment.resources.limits.cpu }} + memory: {{ .Values.inboxListener.deployment.resources.limits.memory }} + {{- if .Values.inboxListener.deployment.localScratch.enabled }} + ephemeral-storage: {{ .Values.inboxListener.deployment.localScratch.capacityBytes }} + {{- end}} + volumeMounts: + - mountPath: /secrets + name: cloud-credentials + {{- if .Values.inboxListener.deployment.localScratch.enabled }} + - mountPath: /local-scratch + name: local-scratch + {{- end }} + ports: + - name: metrics + containerPort: 6001 + envFrom: + - secretRef: + name: cloud-credentials + optional: true + - secretRef: + name: foxglove-site-token + optional: true + {{- range $k := .Values.globals.secrets }} + - secretRef: + name: {{ $k }} + {{- end }} + env: + {{ with lookup "v1" "Secret" .Release.Namespace "gcp-cloud-credential" }} + ## The lookup is required here. The pod may have access to GCP through other means, but + ## the credentials in this env var take precedence, even if it's empty. An empty variable + ## essentially blocks GCP access. + - name: GOOGLE_APPLICATION_CREDENTIALS + value: /secrets/credentials.json + {{ end }} + - name: FOXGLOVE_API_URL + value: "{{ .Values.globals.foxgloveApiUrl }}" + {{- if .Values.globals.siteToken }} + - name: FOXGLOVE_SITE_TOKEN + valueFrom: + secretKeyRef: + name: foxglove-site + key: token + optional: false + {{- end }} + - name: MODE + value: self-managed + - name: INBOX_STORAGE_PROVIDER + value: "{{ .Values.globals.inbox.storageProvider }}" + - name: STORAGE_INBOX_BUCKET_NAME + value: "{{ .Values.globals.inbox.bucketName }}" + - name: LAKE_STORAGE_PROVIDER + value: "{{ .Values.globals.lake.storageProvider }}" + - name: STORAGE_LAKE_BUCKET_NAME + value: "{{ .Values.globals.lake.bucketName }}" + - name: STORAGE_AZURE_STORAGE_ACCOUNT_NAME + value: "{{ .Values.globals.azure.storageAccountName }}" + - name: STORAGE_AZURE_SERVICE_URL + value: "{{ .Values.globals.azure.serviceUrl }}" + - name: AWS_REGION + value: "{{ .Values.globals.aws.region }}" + - name: AWS_SDK_LOAD_CONFIG + value: "true" + - name: PROMETHEUS_METRICS_NAMESPACE + value: "{{ .Values.inboxListener.deployment.metrics.namespace }}" + - name: PROMETHEUS_METRICS_SUBSYSTEM + value: "{{ .Values.inboxListener.deployment.metrics.subsystem }}" + {{- range $item := .Values.inboxListener.deployment.env }} + - name: {{ $item.name }} + value: {{ $item.value | quote}} + {{- end }} + {{- if .Values.inboxListener.autoscaling.enabled }} + - name: MAX_WAIT_FOR_WORK + value: {{ .Values.inboxListener.autoscaling.maxWaitForWork }} + {{- end }} + {{- if .Values.inboxListener.deployment.localScratch.enabled }} + - name: LOCAL_SCRATCH_ROOT + value: "/local-scratch" + - name: LOCAL_SCRATCH_CAPACITY_BYTES + value: "{{ .Values.inboxListener.deployment.localScratch.capacityBytes }}" + {{- end }} +{{- end -}} diff --git a/charts/primary-site/templates/deployments/inbox-listener.yaml b/charts/primary-site/templates/deployments/inbox-listener.yaml index b864550..6a383e8 100644 --- a/charts/primary-site/templates/deployments/inbox-listener.yaml +++ b/charts/primary-site/templates/deployments/inbox-listener.yaml @@ -1,3 +1,26 @@ +{{- if .Values.inboxListener.autoscaling.enabled }} +apiVersion: keda.sh/v1alpha1 +kind: ScaledJob +metadata: + name: inbox-listener-scaledjob +spec: + minReplicaCount: {{ .Values.inboxListener.autoscaling.minReplicas }} + maxReplicaCount: {{ .Values.inboxListener.autoscaling.maxReplicas }} + successfulJobsHistoryLimit: 50 + failedJobsHistoryLimit: 100 + pollingInterval: 30 + jobTargetRef: + parallelism: 1 + activeDeadlineSeconds: 86400 + {{ include "primary-site.inbox-container" . | nindent 4 }} + triggers: + - type: metrics-api + metadata: + format: "prometheus" + targetValue: "2" + url: "http://site-controller.{{.Release.Namespace}}.svc.cluster.local:6001/metrics" + valueLocation: "{{ with .Values.siteController.deployment.metrics.namespace }}{{ . }}{{else}}foxglove_data_platform{{ end }}_{{ with .Values.siteController.deployment.metrics.subsystem }}{{ . }}{{else}}site_controller{{ end }}_unleased_pending_import_count" +{{- else }} apiVersion: apps/v1 kind: Deployment metadata: @@ -14,121 +37,5 @@ spec: maxSurge: 25% maxUnavailable: 25% type: RollingUpdate - template: - metadata: - labels: - app: inbox-listener - {{- range $key, $value := .Values.inboxListener.deployment.podLabels }} - {{ $key }}: {{ $value | quote }} - {{- end }} - annotations: - {{- range $key, $value := .Values.inboxListener.deployment.podAnnotations }} - {{ $key }}: {{ $value | quote }} - {{- end }} - spec: - volumes: - - name: cloud-credentials - secret: - secretName: gcp-cloud-credential - optional: true - {{- if .Values.inboxListener.deployment.localScratch.enabled }} - - name: local-scratch - emptyDir: - sizeLimit: {{ .Values.inboxListener.deployment.localScratch.capacityBytes }} - {{- end }} - containers: - - name: inbox-listener - image: us-central1-docker.pkg.dev/foxglove-images/images/inbox-listener:{{ .Chart.AppVersion }} - resources: - requests: - cpu: {{ .Values.inboxListener.deployment.resources.requests.cpu }} - memory: {{ .Values.inboxListener.deployment.resources.requests.memory }} - {{- if .Values.inboxListener.deployment.localScratch.enabled }} - ephemeral-storage: {{ .Values.inboxListener.deployment.localScratch.capacityBytes }} - {{- end}} - limits: - cpu: {{ .Values.inboxListener.deployment.resources.limits.cpu }} - memory: {{ .Values.inboxListener.deployment.resources.limits.memory }} - {{- if .Values.inboxListener.deployment.localScratch.enabled }} - ephemeral-storage: {{ .Values.inboxListener.deployment.localScratch.capacityBytes }} - {{- end}} - volumeMounts: - - mountPath: /secrets - name: cloud-credentials - {{- if .Values.inboxListener.deployment.localScratch.enabled }} - - mountPath: /local-scratch - name: local-scratch - {{- end }} - ports: - - name: metrics - containerPort: 6001 - envFrom: - - secretRef: - name: cloud-credentials - optional: true - - secretRef: - name: foxglove-site-token - optional: true - {{- range $k := .Values.globals.secrets }} - - secretRef: - name: {{ $k }} - {{- end }} - env: - {{ with lookup "v1" "Secret" .Release.Namespace "gcp-cloud-credential" }} - ## The lookup is required here. The pod may have access to GCP through other means, but - ## the credentials in this env var take precedence, even if it's empty. An empty variable - ## essentially blocks GCP access. - - name: GOOGLE_APPLICATION_CREDENTIALS - value: /secrets/credentials.json - {{ end }} - - name: FOXGLOVE_API_URL - value: "{{ .Values.globals.foxgloveApiUrl }}" - {{- if .Values.globals.siteToken }} - - name: FOXGLOVE_SITE_TOKEN - valueFrom: - secretKeyRef: - name: foxglove-site - key: token - optional: false - {{- end }} - - name: MODE - value: self-managed - - name: INBOX_STORAGE_PROVIDER - value: "{{ .Values.globals.inbox.storageProvider }}" - - name: STORAGE_INBOX_BUCKET_NAME - value: "{{ .Values.globals.inbox.bucketName }}" - - name: LAKE_STORAGE_PROVIDER - value: "{{ .Values.globals.lake.storageProvider }}" - - name: STORAGE_LAKE_BUCKET_NAME - value: "{{ .Values.globals.lake.bucketName }}" - - name: STORAGE_AZURE_STORAGE_ACCOUNT_NAME - value: "{{ .Values.globals.azure.storageAccountName }}" - - name: STORAGE_AZURE_SERVICE_URL - value: "{{ .Values.globals.azure.serviceUrl }}" - - name: AWS_REGION - value: "{{ .Values.globals.aws.region }}" - - name: AWS_SDK_LOAD_CONFIG - value: "true" - - name: PROMETHEUS_METRICS_NAMESPACE - value: "{{ .Values.inboxListener.deployment.metrics.namespace }}" - - name: PROMETHEUS_METRICS_SUBSYSTEM - value: "{{ .Values.inboxListener.deployment.metrics.subsystem }}" - {{- range $item := .Values.inboxListener.deployment.env }} - - name: {{ $item.name }} - value: {{ $item.value | quote}} - {{- end }} - {{- if .Values.inboxListener.deployment.localScratch.enabled }} - - name: LOCAL_SCRATCH_ROOT - value: "/local-scratch" - - name: LOCAL_SCRATCH_CAPACITY_BYTES - value: "{{ .Values.inboxListener.deployment.localScratch.capacityBytes }}" - {{- end }} - {{- if .Values.inboxListener.deployment.serviceAccount.enabled }} - serviceAccount: inbox-listener - {{- end}} - {{- if .Values.inboxListener.deployment.nodeSelectors }} - nodeSelector: - {{- range $key, $value := .Values.inboxListener.deployment.nodeSelectors }} - {{ $key }}: {{ $value | quote }} - {{- end }} - {{- end}} + {{ include "primary-site.inbox-container" . | nindent 2 }} +{{- end }} diff --git a/charts/primary-site/templates/services/site-controller.yaml b/charts/primary-site/templates/services/site-controller.yaml new file mode 100644 index 0000000..90ca2bc --- /dev/null +++ b/charts/primary-site/templates/services/site-controller.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Service +metadata: + name: site-controller + annotations: + {{- range $key, $value := .Values.siteController.service.annotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} +spec: + type: ClusterIP + ports: + - name: metrics + port: 6001 + protocol: TCP + targetPort: 6001 + selector: + app: site-controller diff --git a/charts/primary-site/values.yaml b/charts/primary-site/values.yaml index 7351f58..68fba9d 100644 --- a/charts/primary-site/values.yaml +++ b/charts/primary-site/values.yaml @@ -97,6 +97,23 @@ inboxListener: ## annotations: ## eks.amazonaws.com/role-arn: arn:aws:iam::xxxxxxxxxxxx:role/foxglove-inbox-listener-sa-role + # To enable the autoscaling built into this chart, you must install KEDA first + # helm repo add kedacore https://kedacore.github.io/charts + # helm repo update + # helm install keda kedacore/keda --namespace keda --create-namespace + autoscaling: + enabled: false + # minReplicas can be raised if time to start processing incoming files is slower than desired + # 1 is a good default for almost all use-cases + minReplicas: 1 + # maxReplicas can be raised if you constantly have a very large number of incoming files to process + # it should be set to a value that allows your site to process incoming files at peak load + maxReplicas: 10 + # This value, supplied as duration string (https://pkg.go.dev/time#ParseDuration) determines how long a pod will + # wait for new work items. It is unlikely that this value should be changed. The value should only be set when + # using this auto-scaling. + maxWaitForWork: "30s" + streamService: service: annotations: {} @@ -127,6 +144,8 @@ streamService: ## eks.amazonaws.com/role-arn: arn:aws:iam::xxxxxxxxxxxx:role/foxglove-stream-service-sa-role siteController: + service: + annotations: {} deployment: resources: requests: