Skip to content

Commit

Permalink
second pass at zone awareness
Browse files Browse the repository at this point in the history
adding rollout operator
persisting tokens
setting replica counts and max unavailable

Signed-off-by: Edward Welch <edward.welch@grafana.com>
  • Loading branch information
slim-bean committed Mar 5, 2024
1 parent 5e27c1f commit 86888d4
Show file tree
Hide file tree
Showing 7 changed files with 91 additions and 15 deletions.
7 changes: 5 additions & 2 deletions production/helm/loki/Chart.lock
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,8 @@ dependencies:
- name: grafana-agent-operator
repository: https://grafana.github.io/helm-charts
version: 0.2.16
digest: sha256:56eeb13a669bc816c1452cde5d6dddc61f6893f8aff3da1d2b56ce3bdcbcf84d
generated: "2023-11-09T12:22:25.317696-03:00"
- name: rollout-operator
repository: https://grafana.github.io/helm-charts
version: 0.13.0
digest: sha256:ce0df9e286933f30653da8be12efea8e1549acdf10a527e459a2fa5ac3ef1636
generated: "2024-03-04T14:50:50.223409936-05:00"
5 changes: 5 additions & 0 deletions production/helm/loki/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ dependencies:
version: 0.2.16
repository: https://grafana.github.io/helm-charts
condition: monitoring.selfMonitoring.grafanaAgent.installOperator
- name: rollout-operator
alias: rollout_operator
repository: https://grafana.github.io/helm-charts
version: 0.13.0
condition: rollout_operator.enabled
maintainers:
- name: trevorwhitney
- name: jeschkies
18 changes: 18 additions & 0 deletions production/helm/loki/templates/ingester/_helpers-ingester.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,21 @@ livenessProbe:
{{- end }}
{{- end }}
{{- end -}}

{{/*
expects global context
*/}}
{{- define "loki.ingester.replicaCount" -}}
{{- ceil (divf .Values.ingester.replicas 3) -}}
{{- end -}}

{{/*
expects a dict
{
"replicas": replicas in a zone,
"ctx": global context
}
*/}}
{{- define "loki.ingester.maxUnavailable" -}}
{{- ceil (mulf .replicas (divf (int .ctx.Values.ingester.zoneAwareReplication.maxUnavailablePct) 100)) -}}
{{- end -}}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{{- $isDistributed := eq (include "loki.deployment.isDistributed" .) "true" -}}
{{- if and $isDistributed .Values.ingester.zoneAwareReplication.enabled }}
{{- $replicas := (include "loki.ingester.replicaCount" .) -}}
apiVersion: apps/v1
kind: StatefulSet
metadata:
Expand All @@ -10,13 +11,14 @@ metadata:
app.kubernetes.io/part-of: memberlist
rollout-group: ingester
name: ingester-zone-a
{{- with .Values.loki.annotations }}
annotations:
rollout-max-unavailable: "{{ include "loki.ingester.maxUnavailable" (dict "ctx" . "replicas" $replicas)}}"
{{- with .Values.loki.annotations }}
{{- toYaml . | nindent 4 }}
{{- end }}
{{- end }}
spec:
{{- if not .Values.ingester.autoscaling.enabled }}
replicas: {{ .Values.ingester.replicas }}
replicas: {{ $replicas }}
{{- end }}
podManagementPolicy: Parallel
serviceName: {{ include "loki.ingesterFullname" . }}-zone-a
Expand Down Expand Up @@ -93,6 +95,8 @@ spec:
args:
- -config.file=/etc/loki/config/config.yaml
- -ingester.availability-zone=zone-a
- -ingester.unregister-on-shutdown=false
- -ingester.tokens-file-path=/var/loki/ring-tokens
- -target=ingester
{{- with .Values.ingester.extraArgs }}
{{- toYaml . | nindent 12 }}
Expand Down Expand Up @@ -153,7 +157,7 @@ spec:
operator: NotIn
values:
- ingester-zone-a
topologyKey: {{ .Values.ingester.zoneAwareReplication.topologyKey }}
topologyKey: kubernetes.io/hostname
{{- with .Values.ingester.zoneAwareReplication.zoneA.extraAffinity }}
{{- toYaml . | nindent 8 }}
{{- end }}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{{- $isDistributed := eq (include "loki.deployment.isDistributed" .) "true" -}}
{{- if and $isDistributed .Values.ingester.zoneAwareReplication.enabled }}
{{- $replicas := (include "loki.ingester.replicaCount" .) -}}
apiVersion: apps/v1
kind: StatefulSet
metadata:
Expand All @@ -10,13 +11,14 @@ metadata:
app.kubernetes.io/part-of: memberlist
rollout-group: ingester
name: ingester-zone-b
{{- with .Values.loki.annotations }}
annotations:
rollout-max-unavailable: "{{ include "loki.ingester.maxUnavailable" (dict "ctx" . "replicas" $replicas)}}"
{{- with .Values.loki.annotations }}
{{- toYaml . | nindent 4 }}
{{- end }}
{{- end }}
spec:
{{- if not .Values.ingester.autoscaling.enabled }}
replicas: {{ .Values.ingester.replicas }}
replicas: {{ $replicas }}
{{- end }}
podManagementPolicy: Parallel
serviceName: {{ include "loki.ingesterFullname" . }}-zone-b
Expand Down Expand Up @@ -93,6 +95,8 @@ spec:
args:
- -config.file=/etc/loki/config/config.yaml
- -ingester.availability-zone=zone-b
- -ingester.unregister-on-shutdown=false
- -ingester.tokens-file-path=/var/loki/ring-tokens
- -target=ingester
{{- with .Values.ingester.extraArgs }}
{{- toYaml . | nindent 12 }}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{{- $isDistributed := eq (include "loki.deployment.isDistributed" .) "true" -}}
{{- if and $isDistributed .Values.ingester.zoneAwareReplication.enabled }}
{{- $replicas := (include "loki.ingester.replicaCount" .) -}}
apiVersion: apps/v1
kind: StatefulSet
metadata:
Expand All @@ -10,13 +11,14 @@ metadata:
app.kubernetes.io/part-of: memberlist
rollout-group: ingester
name: ingester-zone-c
{{- with .Values.loki.annotations }}
annotations:
rollout-max-unavailable: "{{ include "loki.ingester.maxUnavailable" (dict "ctx" . "replicas" $replicas)}}"
{{- with .Values.loki.annotations }}
{{- toYaml . | nindent 4 }}
{{- end }}
{{- end }}
spec:
{{- if not .Values.ingester.autoscaling.enabled }}
replicas: {{ .Values.ingester.replicas }}
replicas: {{ $replicas }}
{{- end }}
podManagementPolicy: Parallel
serviceName: {{ include "loki.ingesterFullname" . }}-zone-c
Expand Down Expand Up @@ -93,6 +95,8 @@ spec:
args:
- -config.file=/etc/loki/config/config.yaml
- -ingester.availability-zone=zone-c
- -ingester.unregister-on-shutdown=false
- -ingester.tokens-file-path=/var/loki/ring-tokens
- -target=ingester
{{- with .Values.ingester.extraArgs }}
{{- toYaml . | nindent 12 }}
Expand Down
44 changes: 41 additions & 3 deletions production/helm/loki/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1442,7 +1442,8 @@ backend:

# -- Configuration for the ingester
ingester:
# -- Number of replicas for the ingester
# -- Number of replicas for the ingester, when zoneAwareReplication.enabled is true, the total
# number of replicas will match this value with each zone having 1/3rd of the total replicas.
replicas: 0
# -- hostAliases to add
hostAliases: []
Expand Down Expand Up @@ -1570,24 +1571,34 @@ ingester:
# -- Set the optional grpc service protocol. Ex: "grpc", "http2" or "https"
grpc: ""
# -- Enabling zone awareness on ingesters will create 3 statefulests where all writes will send a replica to each zone.
# This is primarily intended to accellerate rollout operations by allowing for multiple ingesters within a single
# This is primarily intended to accelerate rollout operations by allowing for multiple ingesters within a single
# zone to be shutdown and restart simultaneously (the remaining 2 zones will be guaranteed to have at least one copy
# of the data).
# Note: This can be used to run Loki over multiple cloud provider availability zones however this is not currently
# recommended as Loki is not optimized for this and cross zone network traffic costs can become extremely high
# extremely quickly. Even with zone awareness enabled, it is recommended to run Loki in a single availability zone.
zoneAwareReplication:
# -- Enable zone awareness.
enabled: true
topologyKey: 'kubernetes.io/hostname'
# -- The percent of replicas in each zone that will be restarted at once. In a value of 0-100
maxUnavailablePct: 33
# -- zoneA configuration
zoneA:
# -- optionally define a node selector for this zone
nodeSelector: null
# -- optionally define extra affinity rules, by default different zones are not allowed to schedule on the same host
extraAffinity: {}
zoneB:
# -- optionally define a node selector for this zone
nodeSelector: null
# -- optionally define extra affinity rules, by default different zones are not allowed to schedule on the same host
extraAffinity: {}
zoneC:
# -- optionally define a node selector for this zone
nodeSelector: null
# -- optionally define extra affinity rules, by default different zones are not allowed to schedule on the same host
extraAffinity: {}
# -- The migration block allows migrating non zone aware ingesters to zone aware ingesters.
migration:
enabled: false
excludeDefaultZone: false
Expand Down Expand Up @@ -2292,6 +2303,33 @@ ruler:
# Subchart configurations
#
######################################################################################################################
# -- Setting for the Grafana Rollout Operator https://github.com/grafana/helm-charts/tree/main/charts/rollout-operator
rollout_operator:
enabled: true

# -- podSecurityContext is the pod security context for the rollout operator.
# When installing on OpenShift, override podSecurityContext settings with
#
# rollout_operator:
# podSecurityContext:
# fsGroup: null
# runAsGroup: null
# runAsUser: null
podSecurityContext:
fsGroup: 10001
runAsGroup: 10001
runAsNonRoot: true
runAsUser: 10001
seccompProfile:
type: RuntimeDefault

# Set the container security context
securityContext:
readOnlyRootFilesystem: true
capabilities:
drop: [ALL]
allowPrivilegeEscalation: false

# -- Configuration for the minio subchart
minio:
enabled: false
Expand Down

0 comments on commit 86888d4

Please sign in to comment.