diff --git a/Makefile b/Makefile index cefb261439ae..fe8d7d12d081 100644 --- a/Makefile +++ b/Makefile @@ -161,7 +161,7 @@ delete: ## Delete the controller from your ~/.kube/config cluster helm uninstall karpenter --namespace karpenter docgen: ## Generate docs - $(WITH_GOFLAGS) ./hack/docgen.sh + KARPENTER_CORE_DIR=$(KARPENTER_CORE_DIR) $(WITH_GOFLAGS) ./hack/docgen.sh codegen: ## Auto generate files based on AWS APIs response $(WITH_GOFLAGS) ./hack/codegen.sh diff --git a/hack/docgen.sh b/hack/docgen.sh index f1fb0116df47..3bdb1c5a98ec 100755 --- a/hack/docgen.sh +++ b/hack/docgen.sh @@ -3,12 +3,12 @@ set -euo pipefail compatibilitymatrix() { go run hack/docs/version_compatibility.go hack/docs/compatibility-karpenter.yaml "$(git describe --exact-match --tags || echo "no tag")" - go run hack/docs/compatibilitymetrix_gen_docs.go website/content/en/preview/upgrade-guide.md hack/docs/compatibility-karpenter.yaml + go run hack/docs/compatibilitymetrix_gen_docs.go website/content/en/preview/upgrade-guide.md hack/docs/compatibility-karpenter.yaml 6 } compatibilitymatrix -go run hack/docs/metrics_gen_docs.go pkg/ $(KARPENTER_CORE_DIR)/pkg website/content/en/preview/concepts/metrics.md +go run hack/docs/metrics_gen_docs.go pkg/ ${KARPENTER_CORE_DIR}/pkg website/content/en/preview/concepts/metrics.md go run hack/docs/instancetypes_gen_docs.go website/content/en/preview/concepts/instance-types.md go run hack/docs/configuration_gen_docs.go website/content/en/preview/concepts/settings.md cd charts/karpenter && helm-docs \ No newline at end of file diff --git a/hack/docs/compatibility-karpenter.yaml b/hack/docs/compatibility-karpenter.yaml index fedbba2be9e5..5450544848ec 100644 --- a/hack/docs/compatibility-karpenter.yaml +++ b/hack/docs/compatibility-karpenter.yaml @@ -29,4 +29,7 @@ compatibility: maxK8sVersion: 1.27 - appVersion: 0.30.x minK8sVersion: 1.23 - maxK8sVersion: 1.27 \ No newline at end of file + maxK8sVersion: 1.27 + - appVersion: 0.31.x + minK8sVersion: 1.23 + maxK8sVersion: 1.28 \ No newline at end of file diff --git a/hack/docs/compatibilitymetrix_gen_docs.go b/hack/docs/compatibilitymetrix_gen_docs.go index 7f97ec4dbdbe..5f81c8f90020 100644 --- a/hack/docs/compatibilitymetrix_gen_docs.go +++ b/hack/docs/compatibilitymetrix_gen_docs.go @@ -18,6 +18,7 @@ import ( "fmt" "log" "os" + "strconv" "strings" "github.com/aws/karpenter/tools/kompat/pkg/kompat" @@ -54,5 +55,6 @@ func main() { if err != nil { log.Fatalf("unable to open %s to write generated output: %v", outputFileName, err) } - f.WriteString(topDoc + baseText.Markdown(kompat.Options{LastN: 5}) + bottomDoc) + numOfk8sVersion, _ := strconv.Atoi(os.Args[3]) + f.WriteString(topDoc + baseText.Markdown(kompat.Options{LastN: numOfk8sVersion}) + bottomDoc) } diff --git a/website/content/en/preview/concepts/instance-types.md b/website/content/en/preview/concepts/instance-types.md index 406f2d07eeb0..95752c33498f 100644 --- a/website/content/en/preview/concepts/instance-types.md +++ b/website/content/en/preview/concepts/instance-types.md @@ -4401,38 +4401,6 @@ below are the resources available with some assumptions and after the instance o |memory|180528Mi| |pods|89| |vpc.amazonaws.com/pod-eni|119| -## dl1 Family -### `dl1.24xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-category|dl| - |karpenter.k8s.aws/instance-cpu|96| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|true| - |karpenter.k8s.aws/instance-family|dl1| - |karpenter.k8s.aws/instance-generation|1| - |karpenter.k8s.aws/instance-gpu-count|8| - |karpenter.k8s.aws/instance-gpu-manufacturer|habana| - |karpenter.k8s.aws/instance-gpu-memory|32768| - |karpenter.k8s.aws/instance-gpu-name|gaudi-hl-205| - |karpenter.k8s.aws/instance-hypervisor|nitro| - |karpenter.k8s.aws/instance-local-nvme|4000| - |karpenter.k8s.aws/instance-memory|786432| - |karpenter.k8s.aws/instance-network-bandwidth|400000| - |karpenter.k8s.aws/instance-pods|737| - |karpenter.k8s.aws/instance-size|24xlarge| - |kubernetes.io/arch|amd64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|dl1.24xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |cpu|95690m| - |ephemeral-storage|17Gi| - |habana.ai/gaudi|8| - |memory|718987Mi| - |pods|737| - |vpc.amazonaws.com/pod-eni|62| ## f1 Family ### `f1.2xlarge` #### Labels @@ -5584,79 +5552,6 @@ below are the resources available with some assumptions and after the instance o |ephemeral-storage|17Gi| |memory|234021Mi| |pods|737| -## hpc7g Family -### `hpc7g.4xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-category|hpc| - |karpenter.k8s.aws/instance-cpu|16| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|true| - |karpenter.k8s.aws/instance-family|hpc7g| - |karpenter.k8s.aws/instance-generation|7| - |karpenter.k8s.aws/instance-hypervisor|nitro| - |karpenter.k8s.aws/instance-memory|131072| - |karpenter.k8s.aws/instance-network-bandwidth|200000| - |karpenter.k8s.aws/instance-pods|198| - |karpenter.k8s.aws/instance-size|4xlarge| - |kubernetes.io/arch|arm64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|hpc7g.4xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |cpu|15890m| - |ephemeral-storage|17Gi| - |memory|118649Mi| - |pods|198| -### `hpc7g.8xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-category|hpc| - |karpenter.k8s.aws/instance-cpu|32| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|true| - |karpenter.k8s.aws/instance-family|hpc7g| - |karpenter.k8s.aws/instance-generation|7| - |karpenter.k8s.aws/instance-hypervisor|nitro| - |karpenter.k8s.aws/instance-memory|131072| - |karpenter.k8s.aws/instance-network-bandwidth|200000| - |karpenter.k8s.aws/instance-pods|198| - |karpenter.k8s.aws/instance-size|8xlarge| - |kubernetes.io/arch|arm64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|hpc7g.8xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |cpu|31850m| - |ephemeral-storage|17Gi| - |memory|118649Mi| - |pods|198| -### `hpc7g.16xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-category|hpc| - |karpenter.k8s.aws/instance-cpu|64| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|true| - |karpenter.k8s.aws/instance-family|hpc7g| - |karpenter.k8s.aws/instance-generation|7| - |karpenter.k8s.aws/instance-hypervisor|nitro| - |karpenter.k8s.aws/instance-memory|131072| - |karpenter.k8s.aws/instance-network-bandwidth|200000| - |karpenter.k8s.aws/instance-pods|198| - |karpenter.k8s.aws/instance-size|16xlarge| - |kubernetes.io/arch|arm64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|hpc7g.16xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |cpu|63770m| - |ephemeral-storage|17Gi| - |memory|118649Mi| - |pods|198| ## i2 Family ### `i2.xlarge` #### Labels @@ -11891,192 +11786,6 @@ below are the resources available with some assumptions and after the instance o |memory|690421Mi| |nvidia.com/gpu|16| |pods|234| -## p3 Family -### `p3.2xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-category|p| - |karpenter.k8s.aws/instance-cpu|8| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|false| - |karpenter.k8s.aws/instance-family|p3| - |karpenter.k8s.aws/instance-generation|3| - |karpenter.k8s.aws/instance-gpu-count|1| - |karpenter.k8s.aws/instance-gpu-manufacturer|nvidia| - |karpenter.k8s.aws/instance-gpu-memory|16384| - |karpenter.k8s.aws/instance-gpu-name|v100| - |karpenter.k8s.aws/instance-hypervisor|xen| - |karpenter.k8s.aws/instance-memory|62464| - |karpenter.k8s.aws/instance-pods|58| - |karpenter.k8s.aws/instance-size|2xlarge| - |kubernetes.io/arch|amd64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|p3.2xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |cpu|7910m| - |ephemeral-storage|17Gi| - |memory|56786Mi| - |nvidia.com/gpu|1| - |pods|58| - |vpc.amazonaws.com/pod-eni|38| -### `p3.8xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-category|p| - |karpenter.k8s.aws/instance-cpu|32| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|false| - |karpenter.k8s.aws/instance-family|p3| - |karpenter.k8s.aws/instance-generation|3| - |karpenter.k8s.aws/instance-gpu-count|4| - |karpenter.k8s.aws/instance-gpu-manufacturer|nvidia| - |karpenter.k8s.aws/instance-gpu-memory|16384| - |karpenter.k8s.aws/instance-gpu-name|v100| - |karpenter.k8s.aws/instance-hypervisor|xen| - |karpenter.k8s.aws/instance-memory|249856| - |karpenter.k8s.aws/instance-network-bandwidth|10000| - |karpenter.k8s.aws/instance-pods|234| - |karpenter.k8s.aws/instance-size|8xlarge| - |kubernetes.io/arch|amd64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|p3.8xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |cpu|31850m| - |ephemeral-storage|17Gi| - |memory|228187Mi| - |nvidia.com/gpu|4| - |pods|234| - |vpc.amazonaws.com/pod-eni|54| -### `p3.16xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-category|p| - |karpenter.k8s.aws/instance-cpu|64| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|false| - |karpenter.k8s.aws/instance-family|p3| - |karpenter.k8s.aws/instance-generation|3| - |karpenter.k8s.aws/instance-gpu-count|8| - |karpenter.k8s.aws/instance-gpu-manufacturer|nvidia| - |karpenter.k8s.aws/instance-gpu-memory|16384| - |karpenter.k8s.aws/instance-gpu-name|v100| - |karpenter.k8s.aws/instance-hypervisor|xen| - |karpenter.k8s.aws/instance-memory|499712| - |karpenter.k8s.aws/instance-network-bandwidth|25000| - |karpenter.k8s.aws/instance-pods|234| - |karpenter.k8s.aws/instance-size|16xlarge| - |kubernetes.io/arch|amd64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|p3.16xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |cpu|63770m| - |ephemeral-storage|17Gi| - |memory|459304Mi| - |nvidia.com/gpu|8| - |pods|234| - |vpc.amazonaws.com/pod-eni|114| -## p3dn Family -### `p3dn.24xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-category|p| - |karpenter.k8s.aws/instance-cpu|96| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|true| - |karpenter.k8s.aws/instance-family|p3dn| - |karpenter.k8s.aws/instance-generation|3| - |karpenter.k8s.aws/instance-gpu-count|8| - |karpenter.k8s.aws/instance-gpu-manufacturer|nvidia| - |karpenter.k8s.aws/instance-gpu-memory|32768| - |karpenter.k8s.aws/instance-gpu-name|v100| - |karpenter.k8s.aws/instance-hypervisor|nitro| - |karpenter.k8s.aws/instance-local-nvme|1800| - |karpenter.k8s.aws/instance-memory|786432| - |karpenter.k8s.aws/instance-network-bandwidth|100000| - |karpenter.k8s.aws/instance-pods|737| - |karpenter.k8s.aws/instance-size|24xlarge| - |kubernetes.io/arch|amd64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|p3dn.24xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |cpu|95690m| - |ephemeral-storage|17Gi| - |memory|718987Mi| - |nvidia.com/gpu|8| - |pods|737| - |vpc.amazonaws.com/pod-eni|107| -## p4d Family -### `p4d.24xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-category|p| - |karpenter.k8s.aws/instance-cpu|96| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|true| - |karpenter.k8s.aws/instance-family|p4d| - |karpenter.k8s.aws/instance-generation|4| - |karpenter.k8s.aws/instance-gpu-count|8| - |karpenter.k8s.aws/instance-gpu-manufacturer|nvidia| - |karpenter.k8s.aws/instance-gpu-memory|40960| - |karpenter.k8s.aws/instance-gpu-name|a100| - |karpenter.k8s.aws/instance-hypervisor|nitro| - |karpenter.k8s.aws/instance-local-nvme|8000| - |karpenter.k8s.aws/instance-memory|1179648| - |karpenter.k8s.aws/instance-network-bandwidth|400000| - |karpenter.k8s.aws/instance-pods|737| - |karpenter.k8s.aws/instance-size|24xlarge| - |kubernetes.io/arch|amd64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|p4d.24xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |cpu|95690m| - |ephemeral-storage|17Gi| - |memory|1082712Mi| - |nvidia.com/gpu|8| - |pods|737| - |vpc.amazonaws.com/pod-eni|62| -## p5 Family -### `p5.48xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-category|p| - |karpenter.k8s.aws/instance-cpu|192| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|true| - |karpenter.k8s.aws/instance-family|p5| - |karpenter.k8s.aws/instance-generation|5| - |karpenter.k8s.aws/instance-gpu-count|8| - |karpenter.k8s.aws/instance-gpu-manufacturer|nvidia| - |karpenter.k8s.aws/instance-gpu-memory|81920| - |karpenter.k8s.aws/instance-gpu-name|h100| - |karpenter.k8s.aws/instance-hypervisor|nitro| - |karpenter.k8s.aws/instance-local-nvme|30400| - |karpenter.k8s.aws/instance-memory|2097152| - |karpenter.k8s.aws/instance-network-bandwidth|3200000| - |karpenter.k8s.aws/instance-pods|100| - |karpenter.k8s.aws/instance-size|48xlarge| - |kubernetes.io/arch|amd64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|p5.48xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |cpu|191450m| - |ephemeral-storage|17Gi| - |memory|1938410Mi| - |nvidia.com/gpu|8| - |pods|100| - |vpc.amazonaws.com/pod-eni|120| ## r3 Family ### `r3.large` #### Labels @@ -17245,36 +16954,6 @@ below are the resources available with some assumptions and after the instance o |memory|29258Mi| |pods|58| ## trn1 Family -### `trn1.2xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-accelerator-count|1| - |karpenter.k8s.aws/instance-accelerator-manufacturer|aws| - |karpenter.k8s.aws/instance-accelerator-name|inferentia| - |karpenter.k8s.aws/instance-category|trn| - |karpenter.k8s.aws/instance-cpu|8| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|true| - |karpenter.k8s.aws/instance-family|trn1| - |karpenter.k8s.aws/instance-generation|1| - |karpenter.k8s.aws/instance-hypervisor|nitro| - |karpenter.k8s.aws/instance-local-nvme|474| - |karpenter.k8s.aws/instance-memory|32768| - |karpenter.k8s.aws/instance-network-bandwidth|3125| - |karpenter.k8s.aws/instance-pods|58| - |karpenter.k8s.aws/instance-size|2xlarge| - |kubernetes.io/arch|amd64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|trn1.2xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |aws.amazon.com/neuron|1| - |cpu|7910m| - |ephemeral-storage|17Gi| - |memory|29317Mi| - |pods|58| - |vpc.amazonaws.com/pod-eni|17| ### `trn1.32xlarge` #### Labels | Label | Value | @@ -17305,37 +16984,6 @@ below are the resources available with some assumptions and after the instance o |memory|481894Mi| |pods|247| |vpc.amazonaws.com/pod-eni|82| -## trn1n Family -### `trn1n.32xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-accelerator-count|16| - |karpenter.k8s.aws/instance-accelerator-manufacturer|aws| - |karpenter.k8s.aws/instance-accelerator-name|inferentia| - |karpenter.k8s.aws/instance-category|trn| - |karpenter.k8s.aws/instance-cpu|128| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|true| - |karpenter.k8s.aws/instance-family|trn1n| - |karpenter.k8s.aws/instance-generation|1| - |karpenter.k8s.aws/instance-hypervisor|nitro| - |karpenter.k8s.aws/instance-local-nvme|7600| - |karpenter.k8s.aws/instance-memory|524288| - |karpenter.k8s.aws/instance-network-bandwidth|1600000| - |karpenter.k8s.aws/instance-pods|247| - |karpenter.k8s.aws/instance-size|32xlarge| - |kubernetes.io/arch|amd64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|trn1n.32xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |aws.amazon.com/neuron|16| - |cpu|127610m| - |ephemeral-storage|17Gi| - |memory|481894Mi| - |pods|247| - |vpc.amazonaws.com/pod-eni|120| ## u-12tb1 Family ### `u-12tb1.112xlarge` #### Labels @@ -17511,82 +17159,6 @@ below are the resources available with some assumptions and after the instance o |ephemeral-storage|17Gi| |memory|8720933Mi| |pods|737| -## vt1 Family -### `vt1.3xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-category|vt| - |karpenter.k8s.aws/instance-cpu|12| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|true| - |karpenter.k8s.aws/instance-family|vt1| - |karpenter.k8s.aws/instance-generation|1| - |karpenter.k8s.aws/instance-hypervisor|nitro| - |karpenter.k8s.aws/instance-memory|24576| - |karpenter.k8s.aws/instance-network-bandwidth|3120| - |karpenter.k8s.aws/instance-pods|58| - |karpenter.k8s.aws/instance-size|3xlarge| - |kubernetes.io/arch|amd64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|vt1.3xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |cpu|11900m| - |ephemeral-storage|17Gi| - |memory|21739Mi| - |pods|58| - |vpc.amazonaws.com/pod-eni|38| -### `vt1.6xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-category|vt| - |karpenter.k8s.aws/instance-cpu|24| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|true| - |karpenter.k8s.aws/instance-family|vt1| - |karpenter.k8s.aws/instance-generation|1| - |karpenter.k8s.aws/instance-hypervisor|nitro| - |karpenter.k8s.aws/instance-memory|49152| - |karpenter.k8s.aws/instance-network-bandwidth|6250| - |karpenter.k8s.aws/instance-pods|234| - |karpenter.k8s.aws/instance-size|6xlarge| - |kubernetes.io/arch|amd64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|vt1.6xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |cpu|23870m| - |ephemeral-storage|17Gi| - |memory|42536Mi| - |pods|234| - |vpc.amazonaws.com/pod-eni|54| -### `vt1.24xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-category|vt| - |karpenter.k8s.aws/instance-cpu|96| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|true| - |karpenter.k8s.aws/instance-family|vt1| - |karpenter.k8s.aws/instance-generation|1| - |karpenter.k8s.aws/instance-hypervisor|nitro| - |karpenter.k8s.aws/instance-memory|196608| - |karpenter.k8s.aws/instance-network-bandwidth|25000| - |karpenter.k8s.aws/instance-pods|737| - |karpenter.k8s.aws/instance-size|24xlarge| - |kubernetes.io/arch|amd64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|vt1.24xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |cpu|95690m| - |ephemeral-storage|17Gi| - |memory|173400Mi| - |pods|737| - |vpc.amazonaws.com/pod-eni|107| ## x1 Family ### `x1.16xlarge` #### Labels diff --git a/website/content/en/preview/concepts/metrics.md b/website/content/en/preview/concepts/metrics.md index 531e3d8c5b0f..112f63b0fe9c 100644 --- a/website/content/en/preview/concepts/metrics.md +++ b/website/content/en/preview/concepts/metrics.md @@ -8,6 +8,48 @@ description: > --- Karpenter makes several metrics available in Prometheus format to allow monitoring cluster provisioning status. These metrics are available by default at `karpenter.karpenter.svc.cluster.local:8000/metrics` configurable via the `METRICS_PORT` environment variable documented [here](../settings) +## Controller Runtime Metrics + +### `controller_runtime_active_workers` +Number of currently used workers per controller + +### `controller_runtime_max_concurrent_reconciles` +Maximum number of concurrent reconciles per controller + +### `controller_runtime_reconcile_errors_total` +Total number of reconciliation errors per controller + +### `controller_runtime_reconcile_time_seconds` +Length of time per reconciliation per controller + +### `controller_runtime_reconcile_total` +Total number of reconciliations per controller + +## Consistency Metrics + +### `karpenter_consistency_errors` +Number of consistency checks that have failed. + +## Deprovisioning Metrics + +### `karpenter_deprovisioning_actions_performed` +Number of deprovisioning actions performed. Labeled by deprovisioner. + +### `karpenter_deprovisioning_consolidation_timeouts` +Number of times the Consolidation algorithm has reached a timeout. Labeled by consolidation type. + +### `karpenter_deprovisioning_eligible_machines` +Number of machines eligible for deprovisioning by Karpenter. Labeled by deprovisioner + +### `karpenter_deprovisioning_evaluation_duration_seconds` +Duration of the deprovisioning evaluation process in seconds. + +### `karpenter_deprovisioning_replacement_machine_initialized_seconds` +Amount of time required for a replacement machine to become initialized. + +### `karpenter_deprovisioning_replacement_machine_launch_failure_counter` +The number of times that Karpenter failed to launch a replacement node for deprovisioning. Labeled by deprovisioner. + ## Interruption Metrics ### `karpenter_interruption_actions_performed` @@ -22,8 +64,94 @@ Length of time between message creation in queue and an action taken on the mess ### `karpenter_interruption_received_messages` Count of messages received from the SQS queue. Broken down by message type and whether the message was actionable. +## Machines Metrics + +### `karpenter_machines_created` +Number of machines created in total by Karpenter. Labeled by reason the machine was created and the owning provisioner. + +### `karpenter_machines_disrupted` +Number of machines disrupted in total by Karpenter. Labeled by disruption type of the machine and the owning provisioner. + +### `karpenter_machines_drifted` +Number of machine drifted reasons in total by Karpenter. Labeled by drift type of the machine and the owning provisioner.. + +### `karpenter_machines_initialized` +Number of machines initialized in total by Karpenter. Labeled by the owning provisioner. + +### `karpenter_machines_launched` +Number of machines launched in total by Karpenter. Labeled by the owning provisioner. + +### `karpenter_machines_registered` +Number of machines registered in total by Karpenter. Labeled by the owning provisioner. + +### `karpenter_machines_terminated` +Number of machines terminated in total by Karpenter. Labeled by reason the machine was terminated and the owning provisioner. + +## Provisioner Metrics + +### `karpenter_provisioner_limit` +The Provisioner Limits are the limits specified on the provisioner that restrict the quantity of resources provisioned. Labeled by provisioner name and resource type. + +### `karpenter_provisioner_scheduling_duration_seconds` +Duration of scheduling process in seconds. + +### `karpenter_provisioner_scheduling_simulation_duration_seconds` +Duration of scheduling simulations used for deprovisioning and provisioning in seconds. + +### `karpenter_provisioner_usage` +The Provisioner Usage is the amount of resources that have been provisioned by a particular provisioner. Labeled by provisioner name and resource type. + +### `karpenter_provisioner_usage_pct` +The Provisioner Usage Percentage is the percentage of each resource used based on the resources provisioned and the limits that have been configured in the range [0,100]. Labeled by provisioner name and resource type. + +## Nodes Metrics + +### `karpenter_nodes_allocatable` +Node allocatable are the resources allocatable by nodes. + +### `karpenter_nodes_created` +Number of nodes created in total by Karpenter. Labeled by owning provisioner. + +### `karpenter_nodes_leases_deleted` +Number of deleted leaked leases. + +### `karpenter_nodes_system_overhead` +Node system daemon overhead are the resources reserved for system overhead, the difference between the node's capacity and allocatable values are reported by the status. + +### `karpenter_nodes_terminated` +Number of nodes terminated in total by Karpenter. Labeled by owning provisioner. + +### `karpenter_nodes_termination_time_seconds` +The time taken between a node's deletion request and the removal of its finalizer + +### `karpenter_nodes_total_daemon_limits` +Node total daemon limits are the resources specified by DaemonSet pod limits. + +### `karpenter_nodes_total_daemon_requests` +Node total daemon requests are the resource requested by DaemonSet pods bound to nodes. + +### `karpenter_nodes_total_pod_limits` +Node total pod limits are the resources specified by non-DaemonSet pod limits. + +### `karpenter_nodes_total_pod_requests` +Node total pod requests are the resources requested by non-DaemonSet pods bound to nodes. + +## Pods Metrics + +### `karpenter_pods_startup_time_seconds` +The time from pod creation until the pod is running. + +### `karpenter_pods_state` +Pod state is the current state of pods. This metric can be used several ways as it is labeled by the pod name, namespace, owner, node, provisioner name, zone, architecture, capacity type, instance type and pod phase. + ## Cloudprovider Metrics +### `karpenter_cloudprovider_duration_seconds` +Duration of cloud provider method calls. Labeled by the controller, method name and provider. + +### `karpenter_cloudprovider_errors_total` +Total number of errors returned from CloudProvider calls. + ### `karpenter_cloudprovider_instance_type_cpu_cores` VCPUs cores for a given instance type. diff --git a/website/content/en/preview/upgrade-guide.md b/website/content/en/preview/upgrade-guide.md index c3235a441b6a..088277a5ee4b 100644 --- a/website/content/en/preview/upgrade-guide.md +++ b/website/content/en/preview/upgrade-guide.md @@ -15,9 +15,9 @@ To make upgrading easier we aim to minimize introduction of breaking changes wit [comment]: <> (the content below is generated from hack/docs/compataiblitymetrix_gen_docs.go) -| KUBERNETES | 1.24 | 1.25 | 1.26 | 1.27 | 1.28 | -|------------|---------|---------|---------|---------|--------| -| karpenter | 0.21.x+ | 0.25.x+ | 0.28.x+ | 0.28.x+ | 0.31.0 | +| KUBERNETES | 1.23 | 1.24 | 1.25 | 1.26 | 1.27 | 1.28 | +|------------|---------|---------|---------|---------|---------|--------| +| karpenter | 0.21.x+ | 0.21.x+ | 0.25.x+ | 0.28.x+ | 0.28.x+ | 0.31.x | [comment]: <> (end docs generated content from hack/docs/compataiblitymetrix_gen_docs.go) diff --git a/website/content/en/v0.31/concepts/instance-types.md b/website/content/en/v0.31/concepts/instance-types.md index 406f2d07eeb0..95752c33498f 100644 --- a/website/content/en/v0.31/concepts/instance-types.md +++ b/website/content/en/v0.31/concepts/instance-types.md @@ -4401,38 +4401,6 @@ below are the resources available with some assumptions and after the instance o |memory|180528Mi| |pods|89| |vpc.amazonaws.com/pod-eni|119| -## dl1 Family -### `dl1.24xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-category|dl| - |karpenter.k8s.aws/instance-cpu|96| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|true| - |karpenter.k8s.aws/instance-family|dl1| - |karpenter.k8s.aws/instance-generation|1| - |karpenter.k8s.aws/instance-gpu-count|8| - |karpenter.k8s.aws/instance-gpu-manufacturer|habana| - |karpenter.k8s.aws/instance-gpu-memory|32768| - |karpenter.k8s.aws/instance-gpu-name|gaudi-hl-205| - |karpenter.k8s.aws/instance-hypervisor|nitro| - |karpenter.k8s.aws/instance-local-nvme|4000| - |karpenter.k8s.aws/instance-memory|786432| - |karpenter.k8s.aws/instance-network-bandwidth|400000| - |karpenter.k8s.aws/instance-pods|737| - |karpenter.k8s.aws/instance-size|24xlarge| - |kubernetes.io/arch|amd64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|dl1.24xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |cpu|95690m| - |ephemeral-storage|17Gi| - |habana.ai/gaudi|8| - |memory|718987Mi| - |pods|737| - |vpc.amazonaws.com/pod-eni|62| ## f1 Family ### `f1.2xlarge` #### Labels @@ -5584,79 +5552,6 @@ below are the resources available with some assumptions and after the instance o |ephemeral-storage|17Gi| |memory|234021Mi| |pods|737| -## hpc7g Family -### `hpc7g.4xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-category|hpc| - |karpenter.k8s.aws/instance-cpu|16| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|true| - |karpenter.k8s.aws/instance-family|hpc7g| - |karpenter.k8s.aws/instance-generation|7| - |karpenter.k8s.aws/instance-hypervisor|nitro| - |karpenter.k8s.aws/instance-memory|131072| - |karpenter.k8s.aws/instance-network-bandwidth|200000| - |karpenter.k8s.aws/instance-pods|198| - |karpenter.k8s.aws/instance-size|4xlarge| - |kubernetes.io/arch|arm64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|hpc7g.4xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |cpu|15890m| - |ephemeral-storage|17Gi| - |memory|118649Mi| - |pods|198| -### `hpc7g.8xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-category|hpc| - |karpenter.k8s.aws/instance-cpu|32| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|true| - |karpenter.k8s.aws/instance-family|hpc7g| - |karpenter.k8s.aws/instance-generation|7| - |karpenter.k8s.aws/instance-hypervisor|nitro| - |karpenter.k8s.aws/instance-memory|131072| - |karpenter.k8s.aws/instance-network-bandwidth|200000| - |karpenter.k8s.aws/instance-pods|198| - |karpenter.k8s.aws/instance-size|8xlarge| - |kubernetes.io/arch|arm64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|hpc7g.8xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |cpu|31850m| - |ephemeral-storage|17Gi| - |memory|118649Mi| - |pods|198| -### `hpc7g.16xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-category|hpc| - |karpenter.k8s.aws/instance-cpu|64| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|true| - |karpenter.k8s.aws/instance-family|hpc7g| - |karpenter.k8s.aws/instance-generation|7| - |karpenter.k8s.aws/instance-hypervisor|nitro| - |karpenter.k8s.aws/instance-memory|131072| - |karpenter.k8s.aws/instance-network-bandwidth|200000| - |karpenter.k8s.aws/instance-pods|198| - |karpenter.k8s.aws/instance-size|16xlarge| - |kubernetes.io/arch|arm64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|hpc7g.16xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |cpu|63770m| - |ephemeral-storage|17Gi| - |memory|118649Mi| - |pods|198| ## i2 Family ### `i2.xlarge` #### Labels @@ -11891,192 +11786,6 @@ below are the resources available with some assumptions and after the instance o |memory|690421Mi| |nvidia.com/gpu|16| |pods|234| -## p3 Family -### `p3.2xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-category|p| - |karpenter.k8s.aws/instance-cpu|8| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|false| - |karpenter.k8s.aws/instance-family|p3| - |karpenter.k8s.aws/instance-generation|3| - |karpenter.k8s.aws/instance-gpu-count|1| - |karpenter.k8s.aws/instance-gpu-manufacturer|nvidia| - |karpenter.k8s.aws/instance-gpu-memory|16384| - |karpenter.k8s.aws/instance-gpu-name|v100| - |karpenter.k8s.aws/instance-hypervisor|xen| - |karpenter.k8s.aws/instance-memory|62464| - |karpenter.k8s.aws/instance-pods|58| - |karpenter.k8s.aws/instance-size|2xlarge| - |kubernetes.io/arch|amd64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|p3.2xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |cpu|7910m| - |ephemeral-storage|17Gi| - |memory|56786Mi| - |nvidia.com/gpu|1| - |pods|58| - |vpc.amazonaws.com/pod-eni|38| -### `p3.8xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-category|p| - |karpenter.k8s.aws/instance-cpu|32| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|false| - |karpenter.k8s.aws/instance-family|p3| - |karpenter.k8s.aws/instance-generation|3| - |karpenter.k8s.aws/instance-gpu-count|4| - |karpenter.k8s.aws/instance-gpu-manufacturer|nvidia| - |karpenter.k8s.aws/instance-gpu-memory|16384| - |karpenter.k8s.aws/instance-gpu-name|v100| - |karpenter.k8s.aws/instance-hypervisor|xen| - |karpenter.k8s.aws/instance-memory|249856| - |karpenter.k8s.aws/instance-network-bandwidth|10000| - |karpenter.k8s.aws/instance-pods|234| - |karpenter.k8s.aws/instance-size|8xlarge| - |kubernetes.io/arch|amd64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|p3.8xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |cpu|31850m| - |ephemeral-storage|17Gi| - |memory|228187Mi| - |nvidia.com/gpu|4| - |pods|234| - |vpc.amazonaws.com/pod-eni|54| -### `p3.16xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-category|p| - |karpenter.k8s.aws/instance-cpu|64| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|false| - |karpenter.k8s.aws/instance-family|p3| - |karpenter.k8s.aws/instance-generation|3| - |karpenter.k8s.aws/instance-gpu-count|8| - |karpenter.k8s.aws/instance-gpu-manufacturer|nvidia| - |karpenter.k8s.aws/instance-gpu-memory|16384| - |karpenter.k8s.aws/instance-gpu-name|v100| - |karpenter.k8s.aws/instance-hypervisor|xen| - |karpenter.k8s.aws/instance-memory|499712| - |karpenter.k8s.aws/instance-network-bandwidth|25000| - |karpenter.k8s.aws/instance-pods|234| - |karpenter.k8s.aws/instance-size|16xlarge| - |kubernetes.io/arch|amd64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|p3.16xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |cpu|63770m| - |ephemeral-storage|17Gi| - |memory|459304Mi| - |nvidia.com/gpu|8| - |pods|234| - |vpc.amazonaws.com/pod-eni|114| -## p3dn Family -### `p3dn.24xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-category|p| - |karpenter.k8s.aws/instance-cpu|96| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|true| - |karpenter.k8s.aws/instance-family|p3dn| - |karpenter.k8s.aws/instance-generation|3| - |karpenter.k8s.aws/instance-gpu-count|8| - |karpenter.k8s.aws/instance-gpu-manufacturer|nvidia| - |karpenter.k8s.aws/instance-gpu-memory|32768| - |karpenter.k8s.aws/instance-gpu-name|v100| - |karpenter.k8s.aws/instance-hypervisor|nitro| - |karpenter.k8s.aws/instance-local-nvme|1800| - |karpenter.k8s.aws/instance-memory|786432| - |karpenter.k8s.aws/instance-network-bandwidth|100000| - |karpenter.k8s.aws/instance-pods|737| - |karpenter.k8s.aws/instance-size|24xlarge| - |kubernetes.io/arch|amd64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|p3dn.24xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |cpu|95690m| - |ephemeral-storage|17Gi| - |memory|718987Mi| - |nvidia.com/gpu|8| - |pods|737| - |vpc.amazonaws.com/pod-eni|107| -## p4d Family -### `p4d.24xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-category|p| - |karpenter.k8s.aws/instance-cpu|96| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|true| - |karpenter.k8s.aws/instance-family|p4d| - |karpenter.k8s.aws/instance-generation|4| - |karpenter.k8s.aws/instance-gpu-count|8| - |karpenter.k8s.aws/instance-gpu-manufacturer|nvidia| - |karpenter.k8s.aws/instance-gpu-memory|40960| - |karpenter.k8s.aws/instance-gpu-name|a100| - |karpenter.k8s.aws/instance-hypervisor|nitro| - |karpenter.k8s.aws/instance-local-nvme|8000| - |karpenter.k8s.aws/instance-memory|1179648| - |karpenter.k8s.aws/instance-network-bandwidth|400000| - |karpenter.k8s.aws/instance-pods|737| - |karpenter.k8s.aws/instance-size|24xlarge| - |kubernetes.io/arch|amd64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|p4d.24xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |cpu|95690m| - |ephemeral-storage|17Gi| - |memory|1082712Mi| - |nvidia.com/gpu|8| - |pods|737| - |vpc.amazonaws.com/pod-eni|62| -## p5 Family -### `p5.48xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-category|p| - |karpenter.k8s.aws/instance-cpu|192| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|true| - |karpenter.k8s.aws/instance-family|p5| - |karpenter.k8s.aws/instance-generation|5| - |karpenter.k8s.aws/instance-gpu-count|8| - |karpenter.k8s.aws/instance-gpu-manufacturer|nvidia| - |karpenter.k8s.aws/instance-gpu-memory|81920| - |karpenter.k8s.aws/instance-gpu-name|h100| - |karpenter.k8s.aws/instance-hypervisor|nitro| - |karpenter.k8s.aws/instance-local-nvme|30400| - |karpenter.k8s.aws/instance-memory|2097152| - |karpenter.k8s.aws/instance-network-bandwidth|3200000| - |karpenter.k8s.aws/instance-pods|100| - |karpenter.k8s.aws/instance-size|48xlarge| - |kubernetes.io/arch|amd64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|p5.48xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |cpu|191450m| - |ephemeral-storage|17Gi| - |memory|1938410Mi| - |nvidia.com/gpu|8| - |pods|100| - |vpc.amazonaws.com/pod-eni|120| ## r3 Family ### `r3.large` #### Labels @@ -17245,36 +16954,6 @@ below are the resources available with some assumptions and after the instance o |memory|29258Mi| |pods|58| ## trn1 Family -### `trn1.2xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-accelerator-count|1| - |karpenter.k8s.aws/instance-accelerator-manufacturer|aws| - |karpenter.k8s.aws/instance-accelerator-name|inferentia| - |karpenter.k8s.aws/instance-category|trn| - |karpenter.k8s.aws/instance-cpu|8| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|true| - |karpenter.k8s.aws/instance-family|trn1| - |karpenter.k8s.aws/instance-generation|1| - |karpenter.k8s.aws/instance-hypervisor|nitro| - |karpenter.k8s.aws/instance-local-nvme|474| - |karpenter.k8s.aws/instance-memory|32768| - |karpenter.k8s.aws/instance-network-bandwidth|3125| - |karpenter.k8s.aws/instance-pods|58| - |karpenter.k8s.aws/instance-size|2xlarge| - |kubernetes.io/arch|amd64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|trn1.2xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |aws.amazon.com/neuron|1| - |cpu|7910m| - |ephemeral-storage|17Gi| - |memory|29317Mi| - |pods|58| - |vpc.amazonaws.com/pod-eni|17| ### `trn1.32xlarge` #### Labels | Label | Value | @@ -17305,37 +16984,6 @@ below are the resources available with some assumptions and after the instance o |memory|481894Mi| |pods|247| |vpc.amazonaws.com/pod-eni|82| -## trn1n Family -### `trn1n.32xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-accelerator-count|16| - |karpenter.k8s.aws/instance-accelerator-manufacturer|aws| - |karpenter.k8s.aws/instance-accelerator-name|inferentia| - |karpenter.k8s.aws/instance-category|trn| - |karpenter.k8s.aws/instance-cpu|128| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|true| - |karpenter.k8s.aws/instance-family|trn1n| - |karpenter.k8s.aws/instance-generation|1| - |karpenter.k8s.aws/instance-hypervisor|nitro| - |karpenter.k8s.aws/instance-local-nvme|7600| - |karpenter.k8s.aws/instance-memory|524288| - |karpenter.k8s.aws/instance-network-bandwidth|1600000| - |karpenter.k8s.aws/instance-pods|247| - |karpenter.k8s.aws/instance-size|32xlarge| - |kubernetes.io/arch|amd64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|trn1n.32xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |aws.amazon.com/neuron|16| - |cpu|127610m| - |ephemeral-storage|17Gi| - |memory|481894Mi| - |pods|247| - |vpc.amazonaws.com/pod-eni|120| ## u-12tb1 Family ### `u-12tb1.112xlarge` #### Labels @@ -17511,82 +17159,6 @@ below are the resources available with some assumptions and after the instance o |ephemeral-storage|17Gi| |memory|8720933Mi| |pods|737| -## vt1 Family -### `vt1.3xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-category|vt| - |karpenter.k8s.aws/instance-cpu|12| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|true| - |karpenter.k8s.aws/instance-family|vt1| - |karpenter.k8s.aws/instance-generation|1| - |karpenter.k8s.aws/instance-hypervisor|nitro| - |karpenter.k8s.aws/instance-memory|24576| - |karpenter.k8s.aws/instance-network-bandwidth|3120| - |karpenter.k8s.aws/instance-pods|58| - |karpenter.k8s.aws/instance-size|3xlarge| - |kubernetes.io/arch|amd64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|vt1.3xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |cpu|11900m| - |ephemeral-storage|17Gi| - |memory|21739Mi| - |pods|58| - |vpc.amazonaws.com/pod-eni|38| -### `vt1.6xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-category|vt| - |karpenter.k8s.aws/instance-cpu|24| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|true| - |karpenter.k8s.aws/instance-family|vt1| - |karpenter.k8s.aws/instance-generation|1| - |karpenter.k8s.aws/instance-hypervisor|nitro| - |karpenter.k8s.aws/instance-memory|49152| - |karpenter.k8s.aws/instance-network-bandwidth|6250| - |karpenter.k8s.aws/instance-pods|234| - |karpenter.k8s.aws/instance-size|6xlarge| - |kubernetes.io/arch|amd64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|vt1.6xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |cpu|23870m| - |ephemeral-storage|17Gi| - |memory|42536Mi| - |pods|234| - |vpc.amazonaws.com/pod-eni|54| -### `vt1.24xlarge` -#### Labels - | Label | Value | - |--|--| - |karpenter.k8s.aws/instance-category|vt| - |karpenter.k8s.aws/instance-cpu|96| - |karpenter.k8s.aws/instance-encryption-in-transit-supported|true| - |karpenter.k8s.aws/instance-family|vt1| - |karpenter.k8s.aws/instance-generation|1| - |karpenter.k8s.aws/instance-hypervisor|nitro| - |karpenter.k8s.aws/instance-memory|196608| - |karpenter.k8s.aws/instance-network-bandwidth|25000| - |karpenter.k8s.aws/instance-pods|737| - |karpenter.k8s.aws/instance-size|24xlarge| - |kubernetes.io/arch|amd64| - |kubernetes.io/os|linux| - |node.kubernetes.io/instance-type|vt1.24xlarge| -#### Resources - | Resource | Quantity | - |--|--| - |cpu|95690m| - |ephemeral-storage|17Gi| - |memory|173400Mi| - |pods|737| - |vpc.amazonaws.com/pod-eni|107| ## x1 Family ### `x1.16xlarge` #### Labels diff --git a/website/content/en/v0.31/concepts/metrics.md b/website/content/en/v0.31/concepts/metrics.md index 531e3d8c5b0f..112f63b0fe9c 100644 --- a/website/content/en/v0.31/concepts/metrics.md +++ b/website/content/en/v0.31/concepts/metrics.md @@ -8,6 +8,48 @@ description: > --- Karpenter makes several metrics available in Prometheus format to allow monitoring cluster provisioning status. These metrics are available by default at `karpenter.karpenter.svc.cluster.local:8000/metrics` configurable via the `METRICS_PORT` environment variable documented [here](../settings) +## Controller Runtime Metrics + +### `controller_runtime_active_workers` +Number of currently used workers per controller + +### `controller_runtime_max_concurrent_reconciles` +Maximum number of concurrent reconciles per controller + +### `controller_runtime_reconcile_errors_total` +Total number of reconciliation errors per controller + +### `controller_runtime_reconcile_time_seconds` +Length of time per reconciliation per controller + +### `controller_runtime_reconcile_total` +Total number of reconciliations per controller + +## Consistency Metrics + +### `karpenter_consistency_errors` +Number of consistency checks that have failed. + +## Deprovisioning Metrics + +### `karpenter_deprovisioning_actions_performed` +Number of deprovisioning actions performed. Labeled by deprovisioner. + +### `karpenter_deprovisioning_consolidation_timeouts` +Number of times the Consolidation algorithm has reached a timeout. Labeled by consolidation type. + +### `karpenter_deprovisioning_eligible_machines` +Number of machines eligible for deprovisioning by Karpenter. Labeled by deprovisioner + +### `karpenter_deprovisioning_evaluation_duration_seconds` +Duration of the deprovisioning evaluation process in seconds. + +### `karpenter_deprovisioning_replacement_machine_initialized_seconds` +Amount of time required for a replacement machine to become initialized. + +### `karpenter_deprovisioning_replacement_machine_launch_failure_counter` +The number of times that Karpenter failed to launch a replacement node for deprovisioning. Labeled by deprovisioner. + ## Interruption Metrics ### `karpenter_interruption_actions_performed` @@ -22,8 +64,94 @@ Length of time between message creation in queue and an action taken on the mess ### `karpenter_interruption_received_messages` Count of messages received from the SQS queue. Broken down by message type and whether the message was actionable. +## Machines Metrics + +### `karpenter_machines_created` +Number of machines created in total by Karpenter. Labeled by reason the machine was created and the owning provisioner. + +### `karpenter_machines_disrupted` +Number of machines disrupted in total by Karpenter. Labeled by disruption type of the machine and the owning provisioner. + +### `karpenter_machines_drifted` +Number of machine drifted reasons in total by Karpenter. Labeled by drift type of the machine and the owning provisioner.. + +### `karpenter_machines_initialized` +Number of machines initialized in total by Karpenter. Labeled by the owning provisioner. + +### `karpenter_machines_launched` +Number of machines launched in total by Karpenter. Labeled by the owning provisioner. + +### `karpenter_machines_registered` +Number of machines registered in total by Karpenter. Labeled by the owning provisioner. + +### `karpenter_machines_terminated` +Number of machines terminated in total by Karpenter. Labeled by reason the machine was terminated and the owning provisioner. + +## Provisioner Metrics + +### `karpenter_provisioner_limit` +The Provisioner Limits are the limits specified on the provisioner that restrict the quantity of resources provisioned. Labeled by provisioner name and resource type. + +### `karpenter_provisioner_scheduling_duration_seconds` +Duration of scheduling process in seconds. + +### `karpenter_provisioner_scheduling_simulation_duration_seconds` +Duration of scheduling simulations used for deprovisioning and provisioning in seconds. + +### `karpenter_provisioner_usage` +The Provisioner Usage is the amount of resources that have been provisioned by a particular provisioner. Labeled by provisioner name and resource type. + +### `karpenter_provisioner_usage_pct` +The Provisioner Usage Percentage is the percentage of each resource used based on the resources provisioned and the limits that have been configured in the range [0,100]. Labeled by provisioner name and resource type. + +## Nodes Metrics + +### `karpenter_nodes_allocatable` +Node allocatable are the resources allocatable by nodes. + +### `karpenter_nodes_created` +Number of nodes created in total by Karpenter. Labeled by owning provisioner. + +### `karpenter_nodes_leases_deleted` +Number of deleted leaked leases. + +### `karpenter_nodes_system_overhead` +Node system daemon overhead are the resources reserved for system overhead, the difference between the node's capacity and allocatable values are reported by the status. + +### `karpenter_nodes_terminated` +Number of nodes terminated in total by Karpenter. Labeled by owning provisioner. + +### `karpenter_nodes_termination_time_seconds` +The time taken between a node's deletion request and the removal of its finalizer + +### `karpenter_nodes_total_daemon_limits` +Node total daemon limits are the resources specified by DaemonSet pod limits. + +### `karpenter_nodes_total_daemon_requests` +Node total daemon requests are the resource requested by DaemonSet pods bound to nodes. + +### `karpenter_nodes_total_pod_limits` +Node total pod limits are the resources specified by non-DaemonSet pod limits. + +### `karpenter_nodes_total_pod_requests` +Node total pod requests are the resources requested by non-DaemonSet pods bound to nodes. + +## Pods Metrics + +### `karpenter_pods_startup_time_seconds` +The time from pod creation until the pod is running. + +### `karpenter_pods_state` +Pod state is the current state of pods. This metric can be used several ways as it is labeled by the pod name, namespace, owner, node, provisioner name, zone, architecture, capacity type, instance type and pod phase. + ## Cloudprovider Metrics +### `karpenter_cloudprovider_duration_seconds` +Duration of cloud provider method calls. Labeled by the controller, method name and provider. + +### `karpenter_cloudprovider_errors_total` +Total number of errors returned from CloudProvider calls. + ### `karpenter_cloudprovider_instance_type_cpu_cores` VCPUs cores for a given instance type. diff --git a/website/content/en/v0.31/upgrade-guide.md b/website/content/en/v0.31/upgrade-guide.md index c350743b6306..088277a5ee4b 100644 --- a/website/content/en/v0.31/upgrade-guide.md +++ b/website/content/en/v0.31/upgrade-guide.md @@ -15,9 +15,9 @@ To make upgrading easier we aim to minimize introduction of breaking changes wit [comment]: <> (the content below is generated from hack/docs/compataiblitymetrix_gen_docs.go) -| KUBERNETES | 1.24 | 1.25 | 1.26 | 1.27 | 1.28 | -|------------|---------|---------|---------|---------|--------| -| karpenter | 0.21.x+ | 0.25.x+ | 0.28.x+ | 0.28.x+ | 0.31.0 | +| KUBERNETES | 1.23 | 1.24 | 1.25 | 1.26 | 1.27 | 1.28 | +|------------|---------|---------|---------|---------|---------|--------| +| karpenter | 0.21.x+ | 0.21.x+ | 0.25.x+ | 0.28.x+ | 0.28.x+ | 0.31.x | [comment]: <> (end docs generated content from hack/docs/compataiblitymetrix_gen_docs.go) @@ -69,9 +69,9 @@ If you get the error `invalid ownership metadata; label validation error:` while In general, you can reapply the CRDs in the `crds` directory of the Karpenter helm chart: ```shell -kubectl apply -f https://raw.githubusercontent.com/aws/karpenter/v0.31.0/pkg/apis/crds/karpenter.sh_provisioners.yaml -kubectl apply -f https://raw.githubusercontent.com/aws/karpenter/v0.31.0/pkg/apis/crds/karpenter.sh_machines.yaml -kubectl apply -f https://raw.githubusercontent.com/aws/karpenter/v0.31.0/pkg/apis/crds/karpenter.k8s.aws_awsnodetemplates.yaml +kubectl apply -f https://raw.githubusercontent.com/aws/karpenter{{< githubRelRef >}}pkg/apis/crds/karpenter.sh_provisioners.yaml +kubectl apply -f https://raw.githubusercontent.com/aws/karpenter{{< githubRelRef >}}pkg/apis/crds/karpenter.sh_machines.yaml +kubectl apply -f https://raw.githubusercontent.com/aws/karpenter{{< githubRelRef >}}pkg/apis/crds/karpenter.k8s.aws_awsnodetemplates.yaml ``` ### How Do We Break Incompatibility? @@ -232,7 +232,7 @@ kubectl delete mutatingwebhookconfigurations defaulting.webhook.karpenter.sh * The karpenter webhook and controller containers are combined into a single binary, which requires changes to the helm chart. If your Karpenter installation (helm or otherwise) currently customizes the karpenter webhook, your deployment tooling may require minor changes. * Karpenter now supports native interruption handling. If you were previously using Node Termination Handler for spot interruption handling and health events, you will need to remove the component from your cluster before enabling `aws.interruptionQueueName`. For more details on Karpenter's interruption handling, see the [Interruption Handling Docs]({{< ref "./concepts/deprovisioning/#interruption" >}}). For common questions on the migration process, see the [FAQ]({{< ref "./faq/#interruption-handling" >}}) * Instance category defaults are now explicitly persisted in the Provisioner, rather than handled implicitly in memory. By default, Provisioners will limit instance category to c,m,r. If any instance type constraints are applied, it will override this default. If you have created Provisioners in the past with unconstrained instance type, family, or category, Karpenter will now more flexibly use instance types than before. If you would like to apply these constraints, they must be included in the Provisioner CRD. -* Karpenter CRD raw YAML URLs have migrated from `https://raw.githubusercontent.com/aws/karpenter/v0.31.0/charts/karpenter/crds/...` to `https://raw.githubusercontent.com/aws/karpenter/v0.31.0/pkg/apis/crds/...`. If you reference static Karpenter CRDs or rely on `kubectl replace -f` to apply these CRDs from their remote location, you will need to migrate to the new location. +* Karpenter CRD raw YAML URLs have migrated from `https://raw.githubusercontent.com/aws/karpenter{{< githubRelRef >}}charts/karpenter/crds/...` to `https://raw.githubusercontent.com/aws/karpenter{{< githubRelRef >}}pkg/apis/crds/...`. If you reference static Karpenter CRDs or rely on `kubectl replace -f` to apply these CRDs from their remote location, you will need to migrate to the new location. * Pods without an ownerRef (also called "controllerless" or "naked" pods) will now be evicted by default during node termination and consolidation. Users can prevent controllerless pods from being voluntarily disrupted by applying the `karpenter.sh/do-not-evict: "true"` annotation to the pods in question. * The following CLI options/environment variables are now removed and replaced in favor of pulling settings dynamically from the [`karpenter-global-settings`]({{}}) ConfigMap. See the [Settings docs]({{}}) for more details on configuring the new values in the ConfigMap.