diff --git a/cluster/manifests/event-logger/statefulset.yaml b/cluster/manifests/event-logger/statefulset.yaml index 4f555a966a..ec35469e6d 100644 --- a/cluster/manifests/event-logger/statefulset.yaml +++ b/cluster/manifests/event-logger/statefulset.yaml @@ -30,7 +30,7 @@ spec: serviceAccountName: kubernetes-event-logger containers: - name: logger - image: container-registry.zalando.net/teapot/event-logger:master-10 + image: container-registry.zalando.net/teapot/event-logger:master-11 args: - --snapshot-namespace=kube-system - --snapshot-name=kubernetes-event-logger diff --git a/cluster/manifests/flannel/daemonset.yaml b/cluster/manifests/flannel/daemonset.yaml index f5d3eb6a5a..42fa562358 100644 --- a/cluster/manifests/flannel/daemonset.yaml +++ b/cluster/manifests/flannel/daemonset.yaml @@ -57,7 +57,7 @@ spec: failureThreshold: 30 periodSeconds: 10 - name: kube-flannel - image: container-registry.zalando.net/teapot/flannel:v0.24.3-master-21 + image: container-registry.zalando.net/teapot/flannel:v0.24.4-master-22 command: - /opt/bin/flanneld args: diff --git a/cluster/manifests/kube-metrics-adapter/deployment.yaml b/cluster/manifests/kube-metrics-adapter/deployment.yaml index 73bbb5786d..366d613cab 100644 --- a/cluster/manifests/kube-metrics-adapter/deployment.yaml +++ b/cluster/manifests/kube-metrics-adapter/deployment.yaml @@ -27,7 +27,7 @@ spec: serviceAccountName: custom-metrics-apiserver containers: - name: kube-metrics-adapter - image: container-registry.zalando.net/teapot/kube-metrics-adapter:v0.2.2-11-g91acacb + image: container-registry.zalando.net/teapot/kube-metrics-adapter:v0.2.2-17-gc973a9e env: - name: AWS_REGION value: {{ .Cluster.Region }} diff --git a/cluster/manifests/kube-static-egress-controller/deployment.yaml b/cluster/manifests/kube-static-egress-controller/deployment.yaml index 2a756624d6..02e5d7e5af 100644 --- a/cluster/manifests/kube-static-egress-controller/deployment.yaml +++ b/cluster/manifests/kube-static-egress-controller/deployment.yaml @@ -30,7 +30,7 @@ spec: serviceAccountName: kube-static-egress-controller containers: - name: controller - image: container-registry.zalando.net/teapot/kube-static-egress-controller:v0.2.13-master-44 + image: container-registry.zalando.net/teapot/kube-static-egress-controller:v0.2.14-master-45 args: - "--provider=aws" - "--vpc-id={{.Cluster.ConfigItems.vpc_id}}" diff --git a/cluster/manifests/kubernetes-lifecycle-metrics/deployment.yaml b/cluster/manifests/kubernetes-lifecycle-metrics/deployment.yaml index ac665bafab..5dbed63ffe 100644 --- a/cluster/manifests/kubernetes-lifecycle-metrics/deployment.yaml +++ b/cluster/manifests/kubernetes-lifecycle-metrics/deployment.yaml @@ -31,7 +31,7 @@ spec: serviceAccountName: kubernetes-lifecycle-metrics containers: - name: kubernetes-lifecycle-metrics - image: "container-registry.zalando.net/teapot/kubernetes-lifecycle-metrics:master-14" + image: "container-registry.zalando.net/teapot/kubernetes-lifecycle-metrics:master-16" ports: - containerPort: 9090 protocol: TCP diff --git a/cluster/manifests/pdb-controller/deployment.yaml b/cluster/manifests/pdb-controller/deployment.yaml index 967a99f94f..845dabc822 100644 --- a/cluster/manifests/pdb-controller/deployment.yaml +++ b/cluster/manifests/pdb-controller/deployment.yaml @@ -27,7 +27,7 @@ spec: serviceAccountName: pdb-controller containers: - name: pdb-controller - image: container-registry.zalando.net/teapot/pdb-controller:master-29 + image: container-registry.zalando.net/teapot/pdb-controller:master-30 args: - --debug {{- if .Cluster.ConfigItems.pdb_controller_non_ready_ttl }} diff --git a/cluster/manifests/prometheus/statefulset.yaml b/cluster/manifests/prometheus/statefulset.yaml index b8cb8070fc..a3b2d40d33 100644 --- a/cluster/manifests/prometheus/statefulset.yaml +++ b/cluster/manifests/prometheus/statefulset.yaml @@ -35,7 +35,7 @@ spec: value: "1" initContainers: - name: generate-config - image: container-registry.zalando.net/library/alpine-3:3-20240226 + image: container-registry.zalando.net/library/alpine-3:3-20240325 command: - /bin/sh args: @@ -57,7 +57,7 @@ spec: mountPath: /prometheus containers: - name: prometheus - image: container-registry.zalando.net/teapot/prometheus:v2.50.1-master-53 + image: container-registry.zalando.net/teapot/prometheus:v2.51.0-master-54 args: - "--config.file=/prometheus/prometheus.yaml" - "--storage.tsdb.path=/prometheus/" diff --git a/cluster/manifests/skipper/deployment.yaml b/cluster/manifests/skipper/deployment.yaml index 891f321ccd..bc446c5355 100644 --- a/cluster/manifests/skipper/deployment.yaml +++ b/cluster/manifests/skipper/deployment.yaml @@ -1,5 +1,5 @@ {{ $internal_version := "v0.21.25-853" }} -{{ $canary_internal_version := "v0.21.25-853" }} +{{ $canary_internal_version := "v0.21.36-864" }} {{/* Optional canary arguments separated by "[cf724afc]" to allow whitespaces, e.g. "-foo=has a whitespace[cf724afc]-baz=qux" */}} {{ $canary_args := "" }} diff --git a/cluster/manifests/z-karpenter/08-karpenter.sh_nodeclaims.yaml b/cluster/manifests/z-karpenter/08-karpenter.sh_nodeclaims.yaml index 385747d0a9..bf72949365 100644 --- a/cluster/manifests/z-karpenter/08-karpenter.sh_nodeclaims.yaml +++ b/cluster/manifests/z-karpenter/08-karpenter.sh_nodeclaims.yaml @@ -4,7 +4,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.13.0 + controller-gen.kubebuilder.io/version: v0.14.0 name: nodeclaims.karpenter.sh spec: group: karpenter.sh @@ -51,10 +51,19 @@ spec: description: NodeClaim is the Schema for the NodeClaims API properties: apiVersion: - description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources type: string kind: - description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds type: string metadata: type: object @@ -62,10 +71,15 @@ spec: description: NodeClaimSpec describes the desired state of the NodeClaim properties: kubelet: - description: Kubelet defines args to be used when configuring kubelet on provisioned nodes. They are a subset of the upstream types, recognizing not all options may be supported. Wherever possible, the types and names should reflect the upstream kubelet types. + description: |- + Kubelet defines args to be used when configuring kubelet on provisioned nodes. + They are a subset of the upstream types, recognizing not all options may be supported. + Wherever possible, the types and names should reflect the upstream kubelet types. properties: clusterDNS: - description: clusterDNS is a list of IP addresses for the cluster DNS server. Note that not all providers may use all addresses. + description: |- + clusterDNS is a list of IP addresses for the cluster DNS server. + Note that not all providers may use all addresses. items: type: string type: array @@ -82,7 +96,9 @@ spec: - message: valid keys for evictionHard are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'] rule: self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']) evictionMaxPodGracePeriod: - description: EvictionMaxPodGracePeriod is the maximum allowed grace period (in seconds) to use when terminating pods in response to soft eviction thresholds being met. + description: |- + EvictionMaxPodGracePeriod is the maximum allowed grace period (in seconds) to use when terminating pods in + response to soft eviction thresholds being met. format: int32 type: integer evictionSoft: @@ -103,13 +119,22 @@ spec: - message: valid keys for evictionSoftGracePeriod are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'] rule: self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']) imageGCHighThresholdPercent: - description: ImageGCHighThresholdPercent is the percent of disk usage after which image garbage collection is always run. The percent is calculated by dividing this field value by 100, so this field must be between 0 and 100, inclusive. When specified, the value must be greater than ImageGCLowThresholdPercent. + description: |- + ImageGCHighThresholdPercent is the percent of disk usage after which image + garbage collection is always run. The percent is calculated by dividing this + field value by 100, so this field must be between 0 and 100, inclusive. + When specified, the value must be greater than ImageGCLowThresholdPercent. format: int32 maximum: 100 minimum: 0 type: integer imageGCLowThresholdPercent: - description: ImageGCLowThresholdPercent is the percent of disk usage before which image garbage collection is never run. Lowest disk usage to garbage collect to. The percent is calculated by dividing this field value by 100, so the field value must be between 0 and 100, inclusive. When specified, the value must be less than imageGCHighThresholdPercent + description: |- + ImageGCLowThresholdPercent is the percent of disk usage before which image + garbage collection is never run. Lowest disk usage to garbage collect to. + The percent is calculated by dividing this field value by 100, + so the field value must be between 0 and 100, inclusive. + When specified, the value must be less than imageGCHighThresholdPercent format: int32 maximum: 100 minimum: 0 @@ -129,12 +154,17 @@ spec: - message: kubeReserved value cannot be a negative resource quantity rule: self.all(x, !self[x].startsWith('-')) maxPods: - description: MaxPods is an override for the maximum number of pods that can run on a worker node instance. + description: |- + MaxPods is an override for the maximum number of pods that can run on + a worker node instance. format: int32 minimum: 0 type: integer podsPerCore: - description: PodsPerCore is an override for the number of pods that can run on a worker node instance based on the number of cpu cores. This value cannot exceed MaxPods, so, if MaxPods is a lower value, that value will be used. + description: |- + PodsPerCore is an override for the number of pods that can run on a worker node + instance based on the number of cpu cores. This value cannot exceed MaxPods, so, if + MaxPods is a lower value, that value will be used. format: int32 minimum: 0 type: integer @@ -178,7 +208,9 @@ spec: requirements: description: Requirements are layered with GetLabels and applied to every node. items: - description: A node selector requirement is a selector that contains values, a key, and an operator that relates the key and values. + description: |- + A node selector requirement with min values is a selector that contains values, a key, an operator that relates the key and values + and minValues that represent the requirement to have at least that many values. properties: key: description: The label key that the selector applies to. @@ -196,8 +228,17 @@ spec: rule: self != "kubernetes.io/hostname" - message: label domain "karpenter.k8s.aws" is restricted rule: self in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") + minValues: + description: |- + This field is ALPHA and can be dropped or replaced at any time + MinValues is the minimum number of unique values required to define the flexibility of the specific requirement. + maximum: 50 + minimum: 1 + type: integer operator: - description: Represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. type: string enum: - In @@ -207,7 +248,12 @@ spec: - Gt - Lt values: - description: An array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. If the operator is Gt or Lt, the values array must have a single element, which will be interpreted as an integer. This array is replaced during a strategic merge patch. + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. items: type: string type: array @@ -224,6 +270,8 @@ spec: rule: 'self.all(x, x.operator == ''In'' ? x.values.size() != 0 : true)' - message: requirements operator 'Gt' or 'Lt' must have a single positive integer value rule: 'self.all(x, (x.operator == ''Gt'' || x.operator == ''Lt'') ? (x.values.size() == 1 && int(x.values[0]) >= 0) : true)' + - message: requirements with 'minValues' must have at least that many values specified in the 'values' field + rule: 'self.all(x, (x.operator == ''In'' && has(x.minValues)) ? x.values.size() >= x.minValues : true)' resources: description: Resources models the resource requirements for the NodeClaim to launch properties: @@ -238,12 +286,21 @@ spec: type: object type: object startupTaints: - description: StartupTaints are taints that are applied to nodes upon startup which are expected to be removed automatically within a short period of time, typically by a DaemonSet that tolerates the taint. These are commonly used by daemonsets to allow initialization and enforce startup ordering. StartupTaints are ignored for provisioning purposes in that pods are not required to tolerate a StartupTaint in order to have nodes provisioned for them. + description: |- + StartupTaints are taints that are applied to nodes upon startup which are expected to be removed automatically + within a short period of time, typically by a DaemonSet that tolerates the taint. These are commonly used by + daemonsets to allow initialization and enforce startup ordering. StartupTaints are ignored for provisioning + purposes in that pods are not required to tolerate a StartupTaint in order to have nodes provisioned for them. items: - description: The node this Taint is attached to has the "effect" on any pod that does not tolerate the Taint. + description: |- + The node this Taint is attached to has the "effect" on + any pod that does not tolerate the Taint. properties: effect: - description: Required. The effect of the taint on pods that do not tolerate the taint. Valid effects are NoSchedule, PreferNoSchedule and NoExecute. + description: |- + Required. The effect of the taint on pods + that do not tolerate the taint. + Valid effects are NoSchedule, PreferNoSchedule and NoExecute. type: string enum: - NoSchedule @@ -255,7 +312,9 @@ spec: minLength: 1 pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*(\/))?([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$ timeAdded: - description: TimeAdded represents the time at which the taint was added. It is only written for NoExecute taints. + description: |- + TimeAdded represents the time at which the taint was added. + It is only written for NoExecute taints. format: date-time type: string value: @@ -270,10 +329,15 @@ spec: taints: description: Taints will be applied to the NodeClaim's node. items: - description: The node this Taint is attached to has the "effect" on any pod that does not tolerate the Taint. + description: |- + The node this Taint is attached to has the "effect" on + any pod that does not tolerate the Taint. properties: effect: - description: Required. The effect of the taint on pods that do not tolerate the taint. Valid effects are NoSchedule, PreferNoSchedule and NoExecute. + description: |- + Required. The effect of the taint on pods + that do not tolerate the taint. + Valid effects are NoSchedule, PreferNoSchedule and NoExecute. type: string enum: - NoSchedule @@ -285,7 +349,9 @@ spec: minLength: 1 pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*(\/))?([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$ timeAdded: - description: TimeAdded represents the time at which the taint was added. It is only written for NoExecute taints. + description: |- + TimeAdded represents the time at which the taint was added. + It is only written for NoExecute taints. format: date-time type: string value: @@ -325,10 +391,15 @@ spec: conditions: description: Conditions contains signals for health and readiness items: - description: 'Condition defines a readiness condition for a Knative resource. See: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#typical-status-properties' + description: |- + Condition defines a readiness condition for a Knative resource. + See: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#typical-status-properties properties: lastTransitionTime: - description: LastTransitionTime is the last time the condition transitioned from one status to another. We use VolatileTime in place of metav1.Time to exclude this from creating equality.Semantic differences (all other things held constant). + description: |- + LastTransitionTime is the last time the condition transitioned from one status to another. + We use VolatileTime in place of metav1.Time to exclude this from creating equality.Semantic + differences (all other things held constant). type: string message: description: A human readable message indicating details about the transition. @@ -337,7 +408,9 @@ spec: description: The reason for the condition's last transition. type: string severity: - description: Severity with which to treat failures of this type of condition. When this is not specified, it defaults to Error. + description: |- + Severity with which to treat failures of this type of condition. + When this is not specified, it defaults to Error. type: string status: description: Status of the condition, one of True, False, Unknown. @@ -360,6 +433,8 @@ spec: description: ProviderID of the corresponding node object type: string type: object + required: + - spec type: object served: true storage: true diff --git a/cluster/manifests/z-karpenter/09-karpenter.sh_nodepools.yaml b/cluster/manifests/z-karpenter/09-karpenter.sh_nodepools.yaml index 6ac7173d2c..f7f6165f50 100644 --- a/cluster/manifests/z-karpenter/09-karpenter.sh_nodepools.yaml +++ b/cluster/manifests/z-karpenter/09-karpenter.sh_nodepools.yaml @@ -4,7 +4,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.13.0 + controller-gen.kubebuilder.io/version: v0.14.0 name: nodepools.karpenter.sh spec: group: karpenter.sh @@ -31,15 +31,28 @@ spec: description: NodePool is the Schema for the NodePools API properties: apiVersion: - description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources type: string kind: - description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds type: string metadata: type: object spec: - description: NodePoolSpec is the top level nodepool specification. Nodepools launch nodes in response to pods that are unschedulable. A single nodepool is capable of managing a diverse set of nodes. Node properties are determined from a combination of nodepool and pod scheduling constraints. + description: |- + NodePoolSpec is the top level nodepool specification. Nodepools + launch nodes in response to pods that are unschedulable. A single nodepool + is capable of managing a diverse set of nodes. Node properties are determined + from a combination of nodepool and pod scheduling constraints. properties: disruption: default: @@ -47,20 +60,80 @@ spec: expireAfter: 720h description: Disruption contains the parameters that relate to Karpenter's disruption logic properties: + budgets: + default: + - nodes: 10% + description: |- + Budgets is a list of Budgets. + If there are multiple active budgets, Karpenter uses + the most restrictive value. If left undefined, + this will default to one budget with a value to 10%. + items: + description: |- + Budget defines when Karpenter will restrict the + number of Node Claims that can be terminating simultaneously. + properties: + duration: + description: |- + Duration determines how long a Budget is active since each Schedule hit. + Only minutes and hours are accepted, as cron does not work in seconds. + If omitted, the budget is always active. + This is required if Schedule is set. + This regex has an optional 0s at the end since the duration.String() always adds + a 0s at the end. + pattern: ^([0-9]+(m|h)+(0s)?)$ + type: string + nodes: + default: 10% + description: |- + Nodes dictates the maximum number of NodeClaims owned by this NodePool + that can be terminating at once. This is calculated by counting nodes that + have a deletion timestamp set, or are actively being deleted by Karpenter. + This field is required when specifying a budget. + This cannot be of type intstr.IntOrString since kubebuilder doesn't support pattern + checking for int nodes for IntOrString nodes. + Ref: https://github.com/kubernetes-sigs/controller-tools/blob/55efe4be40394a288216dab63156b0a64fb82929/pkg/crd/markers/validation.go#L379-L388 + pattern: ^((100|[0-9]{1,2})%|[0-9]+)$ + type: string + schedule: + description: |- + Schedule specifies when a budget begins being active, following + the upstream cronjob syntax. If omitted, the budget is always active. + Timezones are not supported. + This field is required if Duration is set. + pattern: ^(@(annually|yearly|monthly|weekly|daily|midnight|hourly))|((.+)\s(.+)\s(.+)\s(.+)\s(.+))$ + type: string + required: + - nodes + type: object + maxItems: 50 + type: array + x-kubernetes-validations: + - message: '''schedule'' must be set with ''duration''' + rule: self.all(x, has(x.schedule) == has(x.duration)) consolidateAfter: - description: ConsolidateAfter is the duration the controller will wait before attempting to terminate nodes that are underutilized. Refer to ConsolidationPolicy for how underutilization is considered. + description: |- + ConsolidateAfter is the duration the controller will wait + before attempting to terminate nodes that are underutilized. + Refer to ConsolidationPolicy for how underutilization is considered. pattern: ^(([0-9]+(s|m|h))+)|(Never)$ type: string consolidationPolicy: default: WhenUnderutilized - description: ConsolidationPolicy describes which nodes Karpenter can disrupt through its consolidation algorithm. This policy defaults to "WhenUnderutilized" if not specified + description: |- + ConsolidationPolicy describes which nodes Karpenter can disrupt through its consolidation + algorithm. This policy defaults to "WhenUnderutilized" if not specified enum: - WhenEmpty - WhenUnderutilized type: string expireAfter: default: 720h - description: ExpireAfter is the duration the controller will wait before terminating a node, measured from when the node is created. This is useful to implement features like eventually consistent node upgrade, memory leak protection, and disruption testing. + description: |- + ExpireAfter is the duration the controller will wait + before terminating a node, measured from when the node is created. This + is useful to implement features like eventually consistent node upgrade, + memory leak protection, and disruption testing. pattern: ^(([0-9]+(s|m|h))+)|(Never)$ type: string type: object @@ -79,21 +152,31 @@ spec: description: Limits define a set of bounds for provisioning capacity. type: object template: - description: Template contains the template of possibilities for the provisioning logic to launch a NodeClaim with. NodeClaims launched from this NodePool will often be further constrained than the template specifies. + description: |- + Template contains the template of possibilities for the provisioning logic to launch a NodeClaim with. + NodeClaims launched from this NodePool will often be further constrained than the template specifies. properties: metadata: properties: annotations: additionalProperties: type: string - description: 'Annotations is an unstructured key value map stored with a resource that may be set by external tools to store and retrieve arbitrary metadata. They are not queryable and should be preserved when modifying objects. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations' + description: |- + Annotations is an unstructured key value map stored with a resource that may be + set by external tools to store and retrieve arbitrary metadata. They are not + queryable and should be preserved when modifying objects. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations type: object labels: additionalProperties: type: string maxLength: 63 pattern: ^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$ - description: 'Map of string keys and values that can be used to organize and categorize (scope and select) objects. May match selectors of replication controllers and services. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels' + description: |- + Map of string keys and values that can be used to organize and categorize + (scope and select) objects. May match selectors of replication controllers + and services. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels type: object maxProperties: 100 x-kubernetes-validations: @@ -114,10 +197,15 @@ spec: description: NodeClaimSpec describes the desired state of the NodeClaim properties: kubelet: - description: Kubelet defines args to be used when configuring kubelet on provisioned nodes. They are a subset of the upstream types, recognizing not all options may be supported. Wherever possible, the types and names should reflect the upstream kubelet types. + description: |- + Kubelet defines args to be used when configuring kubelet on provisioned nodes. + They are a subset of the upstream types, recognizing not all options may be supported. + Wherever possible, the types and names should reflect the upstream kubelet types. properties: clusterDNS: - description: clusterDNS is a list of IP addresses for the cluster DNS server. Note that not all providers may use all addresses. + description: |- + clusterDNS is a list of IP addresses for the cluster DNS server. + Note that not all providers may use all addresses. items: type: string type: array @@ -134,7 +222,9 @@ spec: - message: valid keys for evictionHard are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'] rule: self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']) evictionMaxPodGracePeriod: - description: EvictionMaxPodGracePeriod is the maximum allowed grace period (in seconds) to use when terminating pods in response to soft eviction thresholds being met. + description: |- + EvictionMaxPodGracePeriod is the maximum allowed grace period (in seconds) to use when terminating pods in + response to soft eviction thresholds being met. format: int32 type: integer evictionSoft: @@ -155,13 +245,22 @@ spec: - message: valid keys for evictionSoftGracePeriod are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'] rule: self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']) imageGCHighThresholdPercent: - description: ImageGCHighThresholdPercent is the percent of disk usage after which image garbage collection is always run. The percent is calculated by dividing this field value by 100, so this field must be between 0 and 100, inclusive. When specified, the value must be greater than ImageGCLowThresholdPercent. + description: |- + ImageGCHighThresholdPercent is the percent of disk usage after which image + garbage collection is always run. The percent is calculated by dividing this + field value by 100, so this field must be between 0 and 100, inclusive. + When specified, the value must be greater than ImageGCLowThresholdPercent. format: int32 maximum: 100 minimum: 0 type: integer imageGCLowThresholdPercent: - description: ImageGCLowThresholdPercent is the percent of disk usage before which image garbage collection is never run. Lowest disk usage to garbage collect to. The percent is calculated by dividing this field value by 100, so the field value must be between 0 and 100, inclusive. When specified, the value must be less than imageGCHighThresholdPercent + description: |- + ImageGCLowThresholdPercent is the percent of disk usage before which image + garbage collection is never run. Lowest disk usage to garbage collect to. + The percent is calculated by dividing this field value by 100, + so the field value must be between 0 and 100, inclusive. + When specified, the value must be less than imageGCHighThresholdPercent format: int32 maximum: 100 minimum: 0 @@ -181,12 +280,17 @@ spec: - message: kubeReserved value cannot be a negative resource quantity rule: self.all(x, !self[x].startsWith('-')) maxPods: - description: MaxPods is an override for the maximum number of pods that can run on a worker node instance. + description: |- + MaxPods is an override for the maximum number of pods that can run on + a worker node instance. format: int32 minimum: 0 type: integer podsPerCore: - description: PodsPerCore is an override for the number of pods that can run on a worker node instance based on the number of cpu cores. This value cannot exceed MaxPods, so, if MaxPods is a lower value, that value will be used. + description: |- + PodsPerCore is an override for the number of pods that can run on a worker node + instance based on the number of cpu cores. This value cannot exceed MaxPods, so, if + MaxPods is a lower value, that value will be used. format: int32 minimum: 0 type: integer @@ -230,7 +334,9 @@ spec: requirements: description: Requirements are layered with GetLabels and applied to every node. items: - description: A node selector requirement is a selector that contains values, a key, and an operator that relates the key and values. + description: |- + A node selector requirement with min values is a selector that contains values, a key, an operator that relates the key and values + and minValues that represent the requirement to have at least that many values. properties: key: description: The label key that the selector applies to. @@ -250,8 +356,17 @@ spec: rule: self != "kubernetes.io/hostname" - message: label domain "karpenter.k8s.aws" is restricted rule: self in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") + minValues: + description: |- + This field is ALPHA and can be dropped or replaced at any time + MinValues is the minimum number of unique values required to define the flexibility of the specific requirement. + maximum: 50 + minimum: 1 + type: integer operator: - description: Represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. type: string enum: - In @@ -261,7 +376,12 @@ spec: - Gt - Lt values: - description: An array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. If the operator is Gt or Lt, the values array must have a single element, which will be interpreted as an integer. This array is replaced during a strategic merge patch. + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. items: type: string type: array @@ -278,6 +398,8 @@ spec: rule: 'self.all(x, x.operator == ''In'' ? x.values.size() != 0 : true)' - message: requirements operator 'Gt' or 'Lt' must have a single positive integer value rule: 'self.all(x, (x.operator == ''Gt'' || x.operator == ''Lt'') ? (x.values.size() == 1 && int(x.values[0]) >= 0) : true)' + - message: requirements with 'minValues' must have at least that many values specified in the 'values' field + rule: 'self.all(x, (x.operator == ''In'' && has(x.minValues)) ? x.values.size() >= x.minValues : true)' resources: description: Resources models the resource requirements for the NodeClaim to launch properties: @@ -291,13 +413,23 @@ spec: description: Requests describes the minimum required resources for the NodeClaim to launch type: object type: object + maxProperties: 0 startupTaints: - description: StartupTaints are taints that are applied to nodes upon startup which are expected to be removed automatically within a short period of time, typically by a DaemonSet that tolerates the taint. These are commonly used by daemonsets to allow initialization and enforce startup ordering. StartupTaints are ignored for provisioning purposes in that pods are not required to tolerate a StartupTaint in order to have nodes provisioned for them. + description: |- + StartupTaints are taints that are applied to nodes upon startup which are expected to be removed automatically + within a short period of time, typically by a DaemonSet that tolerates the taint. These are commonly used by + daemonsets to allow initialization and enforce startup ordering. StartupTaints are ignored for provisioning + purposes in that pods are not required to tolerate a StartupTaint in order to have nodes provisioned for them. items: - description: The node this Taint is attached to has the "effect" on any pod that does not tolerate the Taint. + description: |- + The node this Taint is attached to has the "effect" on + any pod that does not tolerate the Taint. properties: effect: - description: Required. The effect of the taint on pods that do not tolerate the taint. Valid effects are NoSchedule, PreferNoSchedule and NoExecute. + description: |- + Required. The effect of the taint on pods + that do not tolerate the taint. + Valid effects are NoSchedule, PreferNoSchedule and NoExecute. type: string enum: - NoSchedule @@ -309,7 +441,9 @@ spec: minLength: 1 pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*(\/))?([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$ timeAdded: - description: TimeAdded represents the time at which the taint was added. It is only written for NoExecute taints. + description: |- + TimeAdded represents the time at which the taint was added. + It is only written for NoExecute taints. format: date-time type: string value: @@ -324,10 +458,15 @@ spec: taints: description: Taints will be applied to the NodeClaim's node. items: - description: The node this Taint is attached to has the "effect" on any pod that does not tolerate the Taint. + description: |- + The node this Taint is attached to has the "effect" on + any pod that does not tolerate the Taint. properties: effect: - description: Required. The effect of the taint on pods that do not tolerate the taint. Valid effects are NoSchedule, PreferNoSchedule and NoExecute. + description: |- + Required. The effect of the taint on pods + that do not tolerate the taint. + Valid effects are NoSchedule, PreferNoSchedule and NoExecute. type: string enum: - NoSchedule @@ -339,7 +478,9 @@ spec: minLength: 1 pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*(\/))?([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$ timeAdded: - description: TimeAdded represents the time at which the taint was added. It is only written for NoExecute taints. + description: |- + TimeAdded represents the time at which the taint was added. + It is only written for NoExecute taints. format: date-time type: string value: @@ -359,7 +500,11 @@ spec: - spec type: object weight: - description: Weight is the priority given to the nodepool during scheduling. A higher numerical weight indicates that this nodepool will be ordered ahead of other nodepools with lower weights. A nodepool with no weight will be treated as if it is a nodepool with a weight of 0. + description: |- + Weight is the priority given to the nodepool during scheduling. A higher + numerical weight indicates that this nodepool will be ordered + ahead of other nodepools with lower weights. A nodepool with no weight + will be treated as if it is a nodepool with a weight of 0. format: int32 maximum: 100 minimum: 1 @@ -380,6 +525,8 @@ spec: description: Resources is the list of resources that have been provisioned. type: object type: object + required: + - spec type: object served: true storage: true diff --git a/cluster/manifests/z-karpenter/deployment.yaml b/cluster/manifests/z-karpenter/deployment.yaml index c56287b34d..44bb5bd4ec 100644 --- a/cluster/manifests/z-karpenter/deployment.yaml +++ b/cluster/manifests/z-karpenter/deployment.yaml @@ -33,7 +33,10 @@ spec: spec: dnsPolicy: Default serviceAccountName: karpenter + securityContext: + fsGroup: 65536 priorityClassName: "{{ .Cluster.ConfigItems.system_priority_class }}" + dnsPolicy: ClusterFirst containers: - name: controller securityContext: @@ -47,53 +50,53 @@ spec: drop: - ALL readOnlyRootFilesystem: true - image: "container-registry.zalando.net/teapot/karpenter:v0.34.1-main-18.custom" + image: "container-registry.zalando.net/teapot/karpenter:0.35.2-main-19.custom" imagePullPolicy: IfNotPresent env: - - name: ASSUME_ROLE_DURATION - value: 15m + - name: KUBERNETES_MIN_VERSION + value: 1.22.0-0 - name: AWS_REGION value: "{{ .Cluster.Region }}" - - name: BATCH_IDLE_DURATION - value: 1s - - name: BATCH_MAX_DURATION - value: 10s - name: CLUSTER_ENDPOINT value: https://kubernetes.default.svc.cluster.local. - - name: CLUSTER_NAME - value: "{{.Cluster.ID}}" - name: ENABLE_PROFILING value: 'false' - - name: FEATURE_GATES - value: Drift=false - - name: HEALTH_PROBE_PORT - value: '8081' - name: INTERRUPTION_QUEUE value: "{{.Cluster.LocalID}}-karpenter-interruption-queue" - name: ISOLATED_VPC value: "false" - name: KARPENTER_SERVICE value: karpenter - - name: KUBERNETES_MIN_VERSION - value: 1.22.0-0 - name: LOG_LEVEL value: {{ .Cluster.ConfigItems.karpenter_log_level }} - - name: MEMORY_LIMIT - valueFrom: - resourceFieldRef: - containerName: controller - divisor: '0' - resource: limits.memory - name: METRICS_PORT - value: '8000' - - name: RESERVED_ENIS - value: '0' + value: "8000" + - name: HEALTH_PROBE_PORT + value: "8081" - name: SYSTEM_NAMESPACE valueFrom: fieldRef: fieldPath: metadata.namespace + - name: MEMORY_LIMIT + valueFrom: + resourceFieldRef: + containerName: controller + divisor: "0" + resource: limits.memory + - name: FEATURE_GATES + value: "Drift=false,SpotToSpotConsolidation=true" + - name: BATCH_MAX_DURATION + value: "10s" + - name: BATCH_IDLE_DURATION + value: "1s" + - name: ASSUME_ROLE_DURATION + value: "15m" + - name: CLUSTER_NAME + value: "{{.Cluster.ID}}" - name: VM_MEMORY_OVERHEAD_PERCENT - value: '0.075' + value: "0.075" + - name: RESERVED_ENIS + value: "0" ports: - name: http-metrics containerPort: 8000 diff --git a/cluster/node-pools/master-default/userdata.yaml b/cluster/node-pools/master-default/userdata.yaml index 683044ec56..1f849269ba 100644 --- a/cluster/node-pools/master-default/userdata.yaml +++ b/cluster/node-pools/master-default/userdata.yaml @@ -273,7 +273,7 @@ write_files: - mountPath: /etc/kubernetes/ssl name: ssl-certs-kubernetes readOnly: true - - image: 926694233939.dkr.ecr.eu-central-1.amazonaws.com/production_namespace/teapot/k8s-authnz-webhook:master-127 + - image: 926694233939.dkr.ecr.eu-central-1.amazonaws.com/production_namespace/teapot/k8s-authnz-webhook:master-128 name: webhook ports: - containerPort: 8081