diff --git a/bundles/rke2/uds-bundle.yaml b/bundles/rke2/uds-bundle.yaml index fe4024c8..a2ef0cc6 100644 --- a/bundles/rke2/uds-bundle.yaml +++ b/bundles/rke2/uds-bundle.yaml @@ -464,13 +464,29 @@ packages: value: *gitlab-init-security-context - path: gitlab.gitlab-exporter.init.containerSecurityContext value: *gitlab-init-security-context - - path: gitlab.gitaly.init.containerSecurityContext - value: *gitlab-init-security-context - path: global.redis.host value: "valkey-master.valkey.svc.cluster.local" - path: gitlab.gitlab-shell.enabled value: true - + # See gitlab's additional manifests, this priority class prevents gitaly from being evicted due to node pressure. + - path: gitlab.gitaly.priorityClassName + value: "gitlab-gitaly" + - path: gitlab.gitaly.securityContext.fsGroupChangePolicy # https://docs.gitlab.com/ee/administration/gitaly/kubernetes.html#persistent-volume-permissions + value: OnRootMismatch + - path: gitlab.gitaly.cgroups.initContainer.image.tag + value: v17.2.9 + - path: gitlab.gitaly.cgroups.initContainer.securityContext # These should be the default https://gitlab.com/gitlab-org/charts/gitlab/-/blob/master/charts/gitlab/charts/gitaly/values.yaml?ref_type=heads#L56 + value: # but are overridden somewhere to 1000 so setting them back to 0 here. + runAsUser: 0 + runAsGroup: 0 + privileged: true # privileged needs added, was not in upstream helm + - path: gitlab.gitaly.containerSecurityContext + value: + runAsUser: 1000 + allowPrivilegeEscalation: false + runAsNonRoot: true + capabilities: + drop: ["ALL"] variables: - name: MIGRATIONS_RESOURCES description: "Gitlab Migrations Resources" @@ -502,6 +518,10 @@ packages: - name: GITALY_RESOURCES description: "Gitlab Gitaly Resources" path: "gitlab.gitaly.resources" + - name: GITALY_CGROUPS # Set this per the docs, per the resources you've given to gitaly, and per your repo makeup - https://docs.gitlab.com/ee/administration/gitaly/kubernetes.html#constrain-git-processes-resource-usage + path: "gitlab.gitaly.cgroups" + default: + enabled: false - name: REGISTRY_REPLICAS description: "Gitlab Registry Min Replicas" path: "registry.hpa.minReplicas" diff --git a/config/uds-config.yaml b/config/uds-config.yaml index d1442dbf..602c88c8 100644 --- a/config/uds-config.yaml +++ b/config/uds-config.yaml @@ -149,6 +149,13 @@ variables: - "default" # Gitlab requires that the first shard always be named `default`: https://docs.gitlab.com/ee/administration/gitaly/configure_gitaly.html?tab=Helm+chart+%28Kubernetes%29#gitlab-requires-a-default-repository-storage - shard1 - shard2 + # Set this with an eye toward the GITALY_RESOURCES above and the link below. + GITALY_CGROUPS: # https://docs.gitlab.com/ee/administration/gitaly/kubernetes.html#constrain-git-processes-resource-usage + enabled: true + # Total limit across all repository cgroups, excludes Gitaly process + memoryBytes: 63350767616 # 59GiB + cpuShares: 1024 # This is a linux cgroups thing: https://medium.com/@weidagang/linux-beyond-the-basics-cpu-management-with-cpu-and-cpuset-cgroups-a4853bc645f5#:~:text=cpu.shares%20(The%20Foundation)%3A,The%20default%20value%20is%201024. + cpuQuotaUs: 1400000 # 14 cores REGISTRY_REPLICAS: 2 SHELL_REPLICAS: 2 sonarqube: diff --git a/docs/gitlab.md b/docs/gitlab.md index 1a51fd8d..af301758 100644 --- a/docs/gitlab.md +++ b/docs/gitlab.md @@ -17,3 +17,7 @@ You can confirm newly created repos are going to different shards by viewing the ![image](screenshots/ig-proj1-shard2.png) ![image](screenshots/joe-proj2-default.png) + +## Prevent Gitaly Shard OOM + +The UDS Config variable `GITALY_CGROUPS` allows adminstrators to improve the resiliency of each gitaly node. The proper values are heavily context specific so it is disabled by default. However, the value given to that variable will be substituted directly into the `gitlab.gitaly.cgroups` path [documented here](https://docs.gitlab.com/ee/administration/gitaly/kubernetes.html#constrain-git-processes-resource-usage). Adding these values will reduce the risk a single git call will cause the gitaly node (shard) to be killed with an OOM error. diff --git a/packages/additional-manifests/gitlab/gitaly-priority-class.yaml b/packages/additional-manifests/gitlab/gitaly-priority-class.yaml new file mode 100644 index 00000000..6500cf65 --- /dev/null +++ b/packages/additional-manifests/gitlab/gitaly-priority-class.yaml @@ -0,0 +1,10 @@ +# Per the documentation, we are assigning gitaly a high priority so other pods are evicted under node-pressure BEFORE gitaly is. +# On Priority Classes: https://kubernetes.io/docs/concepts/scheduling-eviction/pod-priority-preemption/#priorityclass +# The Gitaly Config docs: https://docs.gitlab.com/ee/administration/gitaly/kubernetes.html#use-priorityclass +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: gitlab-gitaly +value: 1000000 +globalDefault: false +description: "GitLab Gitaly priority class" diff --git a/packages/additional-manifests/pepr-policy-exemptions/gitlab-exemptions.yaml b/packages/additional-manifests/pepr-policy-exemptions/gitlab-exemptions.yaml index b2fca575..41d67cf5 100644 --- a/packages/additional-manifests/pepr-policy-exemptions/gitlab-exemptions.yaml +++ b/packages/additional-manifests/pepr-policy-exemptions/gitlab-exemptions.yaml @@ -17,3 +17,23 @@ spec: - `/etc/pki/ca-trust/extracted/openssl/` - `/etc/pki/ca-trust/extracted/java/` - `/etc/pki/ca-trust/extracted/edk2/`" + +--- +apiVersion: uds.dev/v1alpha1 +kind: Exemption +metadata: + name: gitaly-cgroups + namespace: uds-policy-exemptions +spec: + exemptions: + - policies: + - RestrictHostPathWrite + - RestrictVolumeTypes + - RequireNonRootUser + - DropAllCapabilities + - DisallowPrivileged + matcher: + namespace: gitlab + name: "gitlab-gitaly.*" + title: "gitlab gitaly exemptions" + description: "Gitaly is setting cgroup resource limitations to reduce the risk of a single large git call producing an OOM pod termination." diff --git a/packages/additional-manifests/zarf.yaml b/packages/additional-manifests/zarf.yaml index 1dbc282a..a8ed274d 100644 --- a/packages/additional-manifests/zarf.yaml +++ b/packages/additional-manifests/zarf.yaml @@ -19,6 +19,10 @@ components: namespace: gitlab files: - gitlab/envoy-filter.yaml + - gitlab/gitaly-priority-class.yaml + images: + - registry.gitlab.com/gitlab-org/build/cng/gitaly-init-cgroups:v17.2.9 # Remember to redeploy this package if changing stuff around this image in the gitlab package + - name: pepr-policy-exemptions required: true manifests: diff --git a/tasks.yaml b/tasks.yaml index 88dfdd25..b60cd292 100644 --- a/tasks.yaml +++ b/tasks.yaml @@ -31,7 +31,7 @@ tasks: ################ # Create ################ - - name: create-bundles + - name: create-bundle description: Create all UDS Bundles actions: - task: create-bundle-dependencies