diff --git a/cluster-autoscaler/FAQ.md b/cluster-autoscaler/FAQ.md index df4b929e6fde..32312fb3c030 100644 --- a/cluster-autoscaler/FAQ.md +++ b/cluster-autoscaler/FAQ.md @@ -67,8 +67,11 @@ this document: * [How can I update CA dependencies (particularly k8s.io/kubernetes)?](#how-can-i-update-ca-dependencies-particularly-k8siokubernetes) * [In the context of Gardener](#in-the-context-of-gardener) - * [How do I sync gardener autoscaler with an upstream autoscaler minor release?](#how-do-i-sync-gardener-autoscaler-with-an-upstream-autoscaler-minor-release) - * [How do I revendor a different version of MCM in autoscaler?](#how-do-i-revendor-a-different-version-of-mcm-in-autoscaler) + * [For User](#for-user) + * [When does autoscaler back off early from a node group?](#when-does-autoscaler-backs-off-early-from-a-node-group) + * [For Developer](#for-developer) + * [How do I sync gardener autoscaler with an upstream autoscaler minor release?](#how-do-i-sync-gardener-autoscaler-with-an-upstream-autoscaler-minor-release) + * [How do I revendor a different version of MCM in autoscaler?](#how-do-i-revendor-a-different-version-of-mcm-in-autoscaler) # Basics @@ -1087,6 +1090,51 @@ Caveats: # In the context of Gardener: +## For User +### When does autoscaler backs off early from a node group? + +Autoscaler backs off from a node group if the scale-up requested doesn't succeed. Autoscaler decides to backoff based on: +- Timeout + - if the node doesn't join in `max-node-provision-time` +- `ResourceExhausted` error + - if the node doesn't join due to error from cloud provider side, and the error is classified as `ResourceExhausted` +- Scale up operation fails for a node group + +As the name suggests, early back-off doesn't wait till `timeout` but backs off when a certain condition is satisfied. This helps in trying other node groups quickly. + +Currently early-backoff is enabled only for `ResourceExhausted` errors. Errors classified as `ResourceExhausted` are(and not limited to): +- `out of quota` errors where customer quota is exhausted, and the quota is configurable per zone (not per region). Generally quotas for VMs, cpus, gpus and disks are configurable per zone, but please confirm the same for your cloud provider +- `out of stock` errors where cloud-provider doesn't have enough resources in the particular zone, but the resource is available in other zones +- `not-supported` errors where the instance type or disk type is not supported in the particular zone. + +Errors not classified as `ResourceExhausted` are:(and not limited to): +- `invalid credentials` +- `rate limiting` +- `policy constraints defined by customer` +- `service-unavailable` on cloud-provider side + +Backoff after `timeout` will happen for errors other than `ResourceExhausted`. + +*NOTE:* The identifier for the error might differ for each cloud-provider. The above listed errors are general names used. + +**--Caveat during rolling update--** + +Case: + +- If node-grp `ng-A` is in rolling update, AND +- If the scale-up happens for `ng-A` due to an unschedulable pod `podA`, or a set of pods, AND +- if the node(say `node1`) couldn't join due to `ResourceExhausted` + +then autoscaler will early backoff and try to remove the node, but the node removal won't succeed as currently CA is not allowed to perform any scale-down/delete node operation for a rolling update node-grp. + +In the above scenario, CA won't try to scale-up any other node-grp for `podA` as it still calculates `node1` to be a possible candidate to join(`ResourceExhausted` errors are recoverable errors). +Scale-up would still work for any new pods that can't fit on upcoming `node1` but can fit on some other node group. + +The scale-up would stay blocked for such pod(s) for maximum `max-node-provision-time` , because after that the node won't be considered an upcoming node + +Refer issue https://github.com/gardener/autoscaler/issues/154 to track changes made for early-backoff enablement + +## For Developer ### How do I sync gardener autoscaler with an upstream autoscaler minor release? This is helpful in order to offer Gardener CA with latest or recent K8s version. Note that this may also demand a need to upgrade K8s version used by Machine Controller Manager. diff --git a/cluster-autoscaler/cloudprovider/mcm/mcm_cloud_provider.go b/cluster-autoscaler/cloudprovider/mcm/mcm_cloud_provider.go index 50b618929da4..040369c5bce2 100644 --- a/cluster-autoscaler/cloudprovider/mcm/mcm_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/mcm/mcm_cloud_provider.go @@ -24,13 +24,14 @@ package mcm import ( "context" "fmt" + "strings" + apiv1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" "k8s.io/klog/v2" schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" - "strings" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" "k8s.io/autoscaler/cluster-autoscaler/config" @@ -237,7 +238,7 @@ func ReferenceFromProviderID(m *McmManager, id string) (*Ref, error) { for _, machine := range machines { machineID := strings.Split(machine.Spec.ProviderID, "/") nodeID := strings.Split(id, "/") - // If registered, the ID will match the AWS instance ID. + // If registered, the ID will match the cloudprovider instance ID. // If unregistered, the ID will match the machine name. if machineID[len(machineID)-1] == nodeID[len(nodeID)-1] || nodeID[len(nodeID)-1] == machine.Name { @@ -371,7 +372,7 @@ func (machinedeployment *MachineDeployment) Belongs(node *apiv1.Node) (bool, err } // DeleteNodes deletes the nodes from the group. It is expected that this method will not be called -// for nodes not part of ANY machine deployment. +// for nodes which are not part of ANY machine deployment. func (machinedeployment *MachineDeployment) DeleteNodes(nodes []*apiv1.Node) error { size, err := machinedeployment.mcmManager.GetMachineDeploymentSize(machinedeployment) if err != nil { @@ -409,17 +410,11 @@ func (machinedeployment *MachineDeployment) Debug() string { // Nodes returns a list of all nodes that belong to this node group. func (machinedeployment *MachineDeployment) Nodes() ([]cloudprovider.Instance, error) { - nodeProviderIDs, err := machinedeployment.mcmManager.GetMachineDeploymentNodes(machinedeployment) + instances, err := machinedeployment.mcmManager.GetInstancesForMachineDeployment(machinedeployment) if err != nil { - return nil, fmt.Errorf("failed to get the nodes backed by the machinedeployment %q, error: %v", machinedeployment.Name, err) + return nil, fmt.Errorf("failed to get the cloudprovider.Instance for machines backed by the machinedeployment %q, error: %v", machinedeployment.Name, err) } - instances := make([]cloudprovider.Instance, len(nodeProviderIDs)) - for i := range nodeProviderIDs { - instances[i] = cloudprovider.Instance{ - Id: nodeProviderIDs[i], - } - } return instances, nil } diff --git a/cluster-autoscaler/cloudprovider/mcm/mcm_cloud_provider_test.go b/cluster-autoscaler/cloudprovider/mcm/mcm_cloud_provider_test.go index 57431c64aee7..671278c3a9e6 100644 --- a/cluster-autoscaler/cloudprovider/mcm/mcm_cloud_provider_test.go +++ b/cluster-autoscaler/cloudprovider/mcm/mcm_cloud_provider_test.go @@ -20,14 +20,19 @@ import ( "context" "errors" "fmt" + "math" + "strings" + "testing" + + machinecodes "github.com/gardener/machine-controller-manager/pkg/util/provider/machinecodes/codes" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + customfake "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/mcm/fakeclient" + "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1" . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" - customfake "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/mcm/fakeclient" - "math" - "testing" ) const ( @@ -375,7 +380,7 @@ func TestRefresh(t *testing.T) { nodeGroups: []string{nodeGroup2}, }, expect{ - machines: []*v1alpha1.Machine{newMachine("machine-1", "fakeID", nil, "machinedeployment-1", "machineset-1", "1", false)}, + machines: []*v1alpha1.Machine{newMachine("machine-1", "fakeID-1", nil, "machinedeployment-1", "machineset-1", "1", false, true)}, err: errors.Join(fmt.Errorf("could not reset priority annotation on machine machine-1, Error: %v", mcUpdateErrorMsg)), }, }, @@ -414,3 +419,125 @@ func TestRefresh(t *testing.T) { }) } } + +// Different kinds of cases possible and expected cloudprovider.Instance returned for them +// (mobj, mobjPid, nodeobj) -> instance(nodeobj.pid,_) +// (mobj, mobjPid, _) -> instance("requested://",_) +// (mobj, _,_) -> instance("requested://",_) +// (mobj, _,_) with quota error -> instance("requested://",status{'creating',{'outofResourcesClass','ResourceExhausted',''}}) +// (mobj, _,_) with invalid credentials error -> instance("requested://",_) + +// Example machine.status.lastOperation for a `ResourceExhausted` error +// +// lastOperation: { +// type: Creating +// state: Failed +// errorCode: ResourceExhausted +// description: "Cloud provider message - machine codes error: code = [ResourceExhausted] message = [Create machine "shoot--ddci--cbc-sys-tests03-pool-c32m256-3b-z1-575b9-hlvj6" failed: The following errors occurred: [{QUOTA_EXCEEDED Quota 'N2_CPUS' exceeded. Limit: 6000.0 in region europe-west3. [] []}]]." +// } +// } +func TestNodes(t *testing.T) { + const ( + outOfQuotaMachineStatusErrorDescription = "Cloud provider message - machine codes error: code = [ResourceExhausted] message = [Create machine \"machine-with-vm-create-error-out-of-quota\" failed: The following errors occurred: [{QUOTA_EXCEEDED Quota 'N2_CPUS' exceeded. Limit: 6000.0 in region europe-west3. [] []}]]" + invalidCredentialsMachineStatusErrorDescription = "Cloud provider message - machine codes error: code = [Internal] message = [user is not authorized to perform this action]" + ) + type expectationPerInstance struct { + providerID string + instanceState cloudprovider.InstanceState + instanceErrorClass cloudprovider.InstanceErrorClass + instanceErrorCode string + instanceErrorMessage string + } + type expect struct { + expectationPerInstanceList []expectationPerInstance + } + type data struct { + name string + setup setup + expect expect + } + table := []data{ + { + "Correct instances should be returned for machine objects under the machinedeployment", + setup{ + nodes: []*corev1.Node{newNode("node-1", "fakeID-1", false)}, + machines: func() []*v1alpha1.Machine { + allMachines := make([]*v1alpha1.Machine, 0, 5) + allMachines = append(allMachines, newMachine("machine-with-registered-node", "fakeID-1", nil, "machinedeployment-1", "", "", false, true)) + allMachines = append(allMachines, newMachine("machine-with-vm-but-no-node", "fakeID-2", nil, "machinedeployment-1", "", "", false, false)) + allMachines = append(allMachines, newMachine("machine-with-vm-creating", "", nil, "machinedeployment-1", "", "", false, false)) + allMachines = append(allMachines, newMachine("machine-with-vm-create-error-out-of-quota", "", &v1alpha1.MachineStatus{LastOperation: v1alpha1.LastOperation{Type: v1alpha1.MachineOperationCreate, State: v1alpha1.MachineStateFailed, ErrorCode: machinecodes.ResourceExhausted.String(), Description: outOfQuotaMachineStatusErrorDescription}}, "machinedeployment-1", "", "", false, false)) + allMachines = append(allMachines, newMachine("machine-with-vm-create-error-invalid-credentials", "", &v1alpha1.MachineStatus{LastOperation: v1alpha1.LastOperation{Type: v1alpha1.MachineOperationCreate, State: v1alpha1.MachineStateFailed, ErrorCode: machinecodes.Internal.String(), Description: invalidCredentialsMachineStatusErrorDescription}}, "machinedeployment-1", "", "", false, false)) + return allMachines + }(), + machineDeployments: newMachineDeployments(1, 2, nil, nil, nil), + nodeGroups: []string{nodeGroup1}, + }, + expect{ + expectationPerInstanceList: []expectationPerInstance{ + {"fakeID-1", cloudprovider.InstanceState(-1), cloudprovider.InstanceErrorClass(-1), "", ""}, + {placeholderInstanceIDForMachineObj("machine-with-vm-but-no-node"), cloudprovider.InstanceState(-1), cloudprovider.InstanceErrorClass(-1), "", ""}, + {placeholderInstanceIDForMachineObj("machine-with-vm-creating"), cloudprovider.InstanceState(-1), cloudprovider.InstanceErrorClass(-1), "", ""}, + {placeholderInstanceIDForMachineObj("machine-with-vm-create-error-out-of-quota"), cloudprovider.InstanceCreating, cloudprovider.OutOfResourcesErrorClass, machinecodes.ResourceExhausted.String(), outOfQuotaMachineStatusErrorDescription}, + // invalid credentials error is mapped to Internal code as it can't be fixed by trying another zone + {placeholderInstanceIDForMachineObj("machine-with-vm-create-error-invalid-credentials"), cloudprovider.InstanceState(-1), cloudprovider.InstanceErrorClass(-1), "", ""}, + }, + }, + }, + } + + for _, entry := range table { + entry := entry // have a shallow copy of the entry for parallelization of tests + t.Run(entry.name, func(t *testing.T) { + t.Parallel() + g := NewWithT(t) + stop := make(chan struct{}) + defer close(stop) + controlMachineObjects, targetCoreObjects := setupEnv(&entry.setup) + m, trackers, hasSyncedCacheFns := createMcmManager(t, stop, testNamespace, nil, controlMachineObjects, targetCoreObjects) + defer trackers.Stop() + waitForCacheSync(t, stop, hasSyncedCacheFns) + + if entry.setup.targetCoreFakeResourceActions != nil { + trackers.TargetCore.SetFailAtFakeResourceActions(entry.setup.targetCoreFakeResourceActions) + } + if entry.setup.controlMachineFakeResourceActions != nil { + trackers.ControlMachine.SetFailAtFakeResourceActions(entry.setup.controlMachineFakeResourceActions) + } + + md, err := buildMachineDeploymentFromSpec(entry.setup.nodeGroups[0], m) + g.Expect(err).To(BeNil()) + + returnedInstances, err := md.Nodes() + g.Expect(err).To(BeNil()) + g.Expect(len(returnedInstances)).To(BeNumerically("==", len(entry.expect.expectationPerInstanceList))) + + for _, expectedInstance := range entry.expect.expectationPerInstanceList { + found := false + for _, gotInstance := range returnedInstances { + g.Expect(gotInstance.Id).ToNot(BeEmpty()) + if expectedInstance.providerID == gotInstance.Id { + if !strings.Contains(gotInstance.Id, "requested://") { + // must be a machine obj whose node is registered (ready or notReady) + g.Expect(gotInstance.Status).To(BeNil()) + } else { + if int(expectedInstance.instanceState) != -1 { + g.Expect(gotInstance.Status).ToNot(BeNil()) + g.Expect(gotInstance.Status.State).To(Equal(expectedInstance.instanceState)) + } + if int(expectedInstance.instanceErrorClass) != -1 || expectedInstance.instanceErrorCode != "" || expectedInstance.instanceErrorMessage != "" { + g.Expect(gotInstance.Status.ErrorInfo).ToNot(BeNil()) + g.Expect(gotInstance.Status.ErrorInfo.ErrorClass).To(Equal(expectedInstance.instanceErrorClass)) + g.Expect(gotInstance.Status.ErrorInfo.ErrorCode).To(Equal(expectedInstance.instanceErrorCode)) + g.Expect(gotInstance.Status.ErrorInfo.ErrorMessage).To(Equal(expectedInstance.instanceErrorMessage)) + } + } + found = true + break + } + } + g.Expect(found).To(BeTrue()) + } + }) + } +} diff --git a/cluster-autoscaler/cloudprovider/mcm/mcm_manager.go b/cluster-autoscaler/cloudprovider/mcm/mcm_manager.go index 088099ca5667..09bf080e6ddb 100644 --- a/cluster-autoscaler/cloudprovider/mcm/mcm_manager.go +++ b/cluster-autoscaler/cloudprovider/mcm/mcm_manager.go @@ -27,7 +27,6 @@ import ( "errors" "flag" "fmt" - "k8s.io/autoscaler/cluster-autoscaler/utils/taints" "math/rand" "net/http" "os" @@ -41,6 +40,7 @@ import ( machineapi "github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned/typed/machine/v1alpha1" machineinformers "github.com/gardener/machine-controller-manager/pkg/client/informers/externalversions" machinelisters "github.com/gardener/machine-controller-manager/pkg/client/listers/machine/v1alpha1" + machinecodes "github.com/gardener/machine-controller-manager/pkg/util/provider/machinecodes/codes" apiv1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1" kube_errors "k8s.io/apimachinery/pkg/api/errors" @@ -55,6 +55,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/config/dynamic" "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupset" "k8s.io/autoscaler/cluster-autoscaler/utils/gpu" + "k8s.io/autoscaler/cluster-autoscaler/utils/taints" "k8s.io/client-go/discovery" coreinformers "k8s.io/client-go/informers" corelisters "k8s.io/client-go/listers/core/v1" @@ -602,43 +603,70 @@ func (m *McmManager) retry(fn func(ctx context.Context) (bool, error), resourceT } } -// GetMachineDeploymentNodes returns the set of Nodes which belongs to the MachineDeployment. -func (m *McmManager) GetMachineDeploymentNodes(machinedeployment *MachineDeployment) ([]string, error) { - md, err := m.machineDeploymentLister.MachineDeployments(m.namespace).Get(machinedeployment.Name) - if err != nil { - return nil, fmt.Errorf("Unable to fetch MachineDeployment object %s, Error: %v", machinedeployment.Name, err) - } +// GetInstancesForMachineDeployment returns list of cloudprovider.Instance for machines which belongs to the MachineDeployment. +func (m *McmManager) GetInstancesForMachineDeployment(machinedeployment *MachineDeployment) ([]cloudprovider.Instance, error) { + var ( + list = []string{machinedeployment.Name} + selector = labels.NewSelector() + req, _ = labels.NewRequirement("name", selection.Equals, list) + ) - machineList, err := m.machineLister.Machines(m.namespace).List(labels.Everything()) + selector = selector.Add(*req) + machineList, err := m.machineLister.Machines(m.namespace).List(selector) if err != nil { - return nil, fmt.Errorf("Unable to fetch list of Machine objects %v", err) + return nil, fmt.Errorf("unable to fetch list of Machine objects %v for machinedeployment %q", err, machinedeployment.Name) } nodeList, err := m.nodeLister.List(labels.Everything()) if err != nil { - return nil, fmt.Errorf("Unable to fetch list of Nodes %v", err) + return nil, fmt.Errorf("unable to fetch list of Nodes %v", err) } - var nodes []string + instances := make([]cloudprovider.Instance, 0, len(machineList)) // Bearing O(n2) complexity, assuming we will not have lot of nodes/machines, open for optimisations. for _, machine := range machineList { - if strings.Contains(machine.Name, md.Name) { - var found bool - for _, node := range nodeList { - if machine.Labels["node"] == node.Name { - nodes = append(nodes, node.Spec.ProviderID) - found = true - break - } - } - if !found { - // No node found - either the machine has not registered yet or AWS is unable to fulfill the request. - // Report a special ID so that the autoscaler can track it as an unregistered node. - nodes = append(nodes, fmt.Sprintf("requested://%s", machine.Name)) - } + instance := findMatchingInstance(nodeList, machine) + instances = append(instances, instance) + } + return instances, nil +} + +func findMatchingInstance(nodes []*v1.Node, machine *v1alpha1.Machine) cloudprovider.Instance { + for _, node := range nodes { + if machine.Labels["node"] == node.Name { + return cloudprovider.Instance{Id: node.Spec.ProviderID} + } + } + // No k8s node found , one of the following cases possible + // - MCM is unable to fulfill the request to create VM. + // - VM is being created + // - the VM is up but has not registered yet + + // Report instance with a special placeholder ID so that the autoscaler can track it as an unregistered node. + // Report InstanceStatus only for `ResourceExhausted` errors + return cloudprovider.Instance{ + Id: placeholderInstanceIDForMachineObj(machine.Name), + Status: checkAndGetResourceExhaustedInstanceStatus(machine), + } +} + +func placeholderInstanceIDForMachineObj(name string) string { + return fmt.Sprintf("requested://%s", name) +} + +// checkAndGetResourceExhaustedInstanceStatus returns cloudprovider.InstanceStatus for the machine obj +func checkAndGetResourceExhaustedInstanceStatus(machine *v1alpha1.Machine) *cloudprovider.InstanceStatus { + if machine.Status.LastOperation.Type == v1alpha1.MachineOperationCreate && machine.Status.LastOperation.State == v1alpha1.MachineStateFailed && machine.Status.LastOperation.ErrorCode == machinecodes.ResourceExhausted.String() { + return &cloudprovider.InstanceStatus{ + State: cloudprovider.InstanceCreating, + ErrorInfo: &cloudprovider.InstanceErrorInfo{ + ErrorClass: cloudprovider.OutOfResourcesErrorClass, + ErrorCode: machinecodes.ResourceExhausted.String(), + ErrorMessage: machine.Status.LastOperation.Description, + }, } } - return nodes, nil + return nil } // validateNodeTemplate function validates the NodeTemplate object of the MachineClass diff --git a/cluster-autoscaler/cloudprovider/mcm/test_utils.go b/cluster-autoscaler/cloudprovider/mcm/test_utils.go index 05f300d0fc28..9e51dc8ef8c6 100644 --- a/cluster-autoscaler/cloudprovider/mcm/test_utils.go +++ b/cluster-autoscaler/cloudprovider/mcm/test_utils.go @@ -18,11 +18,12 @@ package mcm import ( "fmt" - "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" - "k8s.io/client-go/tools/cache" "testing" "time" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/client-go/tools/cache" + machineinternal "github.com/gardener/machine-controller-manager/pkg/apis/machine" "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1" faketyped "github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned/typed/machine/v1alpha1/fake" @@ -102,14 +103,19 @@ func newMachineSets( func newMachine( name string, - providerIdGenerateName string, + providerId string, statusTemplate *v1alpha1.MachineStatus, mdName, msName string, priorityAnnotationValue string, - setDeletionTimeStamp bool, + setDeletionTimeStamp, + setNodeLabel bool, ) *v1alpha1.Machine { - m := newMachines(1, providerIdGenerateName, statusTemplate, mdName, msName, []string{priorityAnnotationValue}, []bool{setDeletionTimeStamp})[0] + m := newMachines(1, providerId, statusTemplate, mdName, msName, []string{priorityAnnotationValue}, []bool{setDeletionTimeStamp})[0] m.Name = name + m.Spec.ProviderID = providerId + if !setNodeLabel { + delete(m.Labels, "node") + } return m } @@ -141,7 +147,10 @@ func newMachines( Annotations: map[string]string{priorityAnnotationKey: priorityAnnotationValues[i]}, CreationTimestamp: metav1.Now(), }, - Spec: v1alpha1.MachineSpec{ProviderID: fmt.Sprintf("%s/i%d", providerIdGenerateName, i+1)}, + } + + if providerIdGenerateName != "" { + m.Spec = v1alpha1.MachineSpec{ProviderID: fmt.Sprintf("%s/i%d", providerIdGenerateName, i+1)} } m.Labels["node"] = fmt.Sprintf("node-%d", i+1) diff --git a/cluster-autoscaler/core/static_autoscaler.go b/cluster-autoscaler/core/static_autoscaler.go index aee83b82a64e..cfe62f9b0ca1 100644 --- a/cluster-autoscaler/core/static_autoscaler.go +++ b/cluster-autoscaler/core/static_autoscaler.go @@ -434,7 +434,8 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) caerrors.AutoscalerErr // Check if there has been a constant difference between the number of nodes in k8s and // the number of nodes on the cloud provider side. // TODO: andrewskim - add protection for ready AWS nodes. - // NOTE: Commented this code as it removes `Registered but long not Ready` nodes which causes issues like scaling below minimum size and removing ready nodes during meltdown scenario + + // FORK-CHANGE: Commented this code as it removes `Registered but long not Ready` nodes which causes issues like scaling below minimum size and removing ready nodes during meltdown scenario //fixedSomething, err := fixNodeGroupSize(autoscalingContext, a.clusterStateRegistry, currentTime) //if err != nil { // klog.Errorf("Failed to fix node group sizes: %v", err) @@ -621,7 +622,8 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) caerrors.AutoscalerErr a.lastScaleDownFailTime.Add(a.ScaleDownDelayAfterFailure).After(currentTime) || a.lastScaleDownDeleteTime.Add(a.ScaleDownDelayAfterDelete).After(currentTime) - klog.V(4).Infof("Scale down status: lastScaleUpTime=%s lastScaleDownDeleteTime=%v "+ + // FORK-CHANGE: Updated log V(4) -> V(2). This helps in debugging + klog.V(2).Infof("Scale down status: lastScaleUpTime=%s lastScaleDownDeleteTime=%v "+ "lastScaleDownFailTime=%s scaleDownForbidden=%v scaleDownInCooldown=%v", a.lastScaleUpTime, a.lastScaleDownDeleteTime, a.lastScaleDownFailTime, a.processorCallbacks.disableScaleDownForLoop, scaleDownInCooldown) diff --git a/cluster-autoscaler/go.mod b/cluster-autoscaler/go.mod index 4520048bb087..14cee1be5724 100644 --- a/cluster-autoscaler/go.mod +++ b/cluster-autoscaler/go.mod @@ -13,7 +13,7 @@ require ( github.com/Azure/skewer v0.0.14 github.com/aws/aws-sdk-go v1.44.241 github.com/digitalocean/godo v1.27.0 - github.com/gardener/machine-controller-manager v0.49.0 + github.com/gardener/machine-controller-manager v0.50.0 github.com/gardener/machine-controller-manager-provider-aws v0.17.0 github.com/gardener/machine-controller-manager-provider-azure v0.10.0 github.com/ghodss/yaml v1.0.0 @@ -41,13 +41,13 @@ require ( google.golang.org/protobuf v1.30.0 gopkg.in/gcfg.v1 v1.2.3 gopkg.in/yaml.v2 v2.4.0 - k8s.io/api v0.27.1 - k8s.io/apimachinery v0.27.1 - k8s.io/apiserver v0.27.1 - k8s.io/client-go v0.27.1 + k8s.io/api v0.27.2 + k8s.io/apimachinery v0.27.2 + k8s.io/apiserver v0.27.2 + k8s.io/client-go v0.27.2 k8s.io/cloud-provider v0.27.1 k8s.io/cloud-provider-aws v1.27.1 - k8s.io/component-base v0.27.1 + k8s.io/component-base v0.27.2 k8s.io/component-helpers v0.27.1 k8s.io/klog/v2 v2.90.1 k8s.io/kubelet v0.27.1 @@ -186,7 +186,7 @@ require ( k8s.io/csi-translation-lib v0.27.0 // indirect k8s.io/dynamic-resource-allocation v0.0.0 // indirect k8s.io/kms v0.27.1 // indirect - k8s.io/kube-openapi v0.0.0-20230327201221-f5883ff37f0c // indirect + k8s.io/kube-openapi v0.0.0-20230501164219-8b0f38b5fd1f // indirect k8s.io/kube-proxy v0.0.0 // indirect k8s.io/kube-scheduler v0.0.0 // indirect k8s.io/kubectl v0.0.0 // indirect diff --git a/cluster-autoscaler/go.sum b/cluster-autoscaler/go.sum index a21f5d40c2e5..9cd6c7e07fff 100644 --- a/cluster-autoscaler/go.sum +++ b/cluster-autoscaler/go.sum @@ -240,8 +240,8 @@ github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMo github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= -github.com/gardener/machine-controller-manager v0.49.0 h1:ZUQPhwVnLrgftFnM85035kDJJdWxIbL3iXHP6VgimJs= -github.com/gardener/machine-controller-manager v0.49.0/go.mod h1:ypFf18+Y6mJpohfZrOJI+vq7NQ3Yn7Ke4ujW23u/6nM= +github.com/gardener/machine-controller-manager v0.50.0 h1:3dcQjzueFU1TGgprV00adjb3OCR99myTBx8DQGxywks= +github.com/gardener/machine-controller-manager v0.50.0/go.mod h1:RySZ40AgbNV/wMq60G/w49kb+okbj5Xs1A6usz5Pm/I= github.com/gardener/machine-controller-manager-provider-aws v0.17.0 h1:I4ML6yUOy4aHJ83Gstsryt1D7oRAGiSSR9MNihEUeAk= github.com/gardener/machine-controller-manager-provider-aws v0.17.0/go.mod h1:GjkJKfEVKoMQmJJVpzRgqftzDitwBt61PWbBH0Vx940= github.com/gardener/machine-controller-manager-provider-azure v0.10.0 h1:P5/SIMAuMwb8EwmfL+r0dyKIsnKE9TYgzWHSAyZwhtw= @@ -1221,8 +1221,8 @@ k8s.io/klog/v2 v2.90.1 h1:m4bYOKall2MmOiRaR1J+We67Do7vm9KiQVlT96lnHUw= k8s.io/klog/v2 v2.90.1/go.mod h1:y1WjHnz7Dj687irZUWR/WLkLc5N1YHtjLdmgWjndZn0= k8s.io/kms v0.27.1 h1:JTSQbJb+mcobScQwF0bOmZhIwP17k8GvBsiLlA6SQqw= k8s.io/kms v0.27.1/go.mod h1:VuTsw0uHlSycKLCkypCGxfFCjLfzf/5YMeATECd/zJA= -k8s.io/kube-openapi v0.0.0-20230327201221-f5883ff37f0c h1:EFfsozyzZ/pggw5qNx7ftTVZdp7WZl+3ih89GEjYEK8= -k8s.io/kube-openapi v0.0.0-20230327201221-f5883ff37f0c/go.mod h1:byini6yhqGC14c3ebc/QwanvYwhuMWF6yz2F8uwW8eg= +k8s.io/kube-openapi v0.0.0-20230501164219-8b0f38b5fd1f h1:2kWPakN3i/k81b0gvD5C5FJ2kxm1WrQFanWchyKuqGg= +k8s.io/kube-openapi v0.0.0-20230501164219-8b0f38b5fd1f/go.mod h1:byini6yhqGC14c3ebc/QwanvYwhuMWF6yz2F8uwW8eg= k8s.io/kube-proxy v0.27.1 h1:awlTLXvZhM/A4Nsu0ma34uKR4pHxigj9vhuQ9BHfwUk= k8s.io/kube-proxy v0.27.1/go.mod h1:6hJ7Fnt3QtD+5cpGN6MgZOOO9KbD6TvF0/BPHk+lYtQ= k8s.io/kube-scheduler v0.27.1 h1:Tq7ff+jUZaK8fejL4uOy1CC2B+bz2acKQ7Bf7fCtnhs= diff --git a/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/apis/machine/types.go b/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/apis/machine/types.go index bfe211cc8627..ab83b4226d67 100644 --- a/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/apis/machine/types.go +++ b/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/apis/machine/types.go @@ -87,11 +87,11 @@ type NodeTemplateSpec struct { // MachineTemplateSpec describes the data a machine should have when created from a template type MachineTemplateSpec struct { // Standard object's metadata. - // More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#metadata + // More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata metav1.ObjectMeta // Specification of the desired behavior of the machine. - // More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#spec-and-status + // More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status Spec MachineSpec } @@ -121,11 +121,11 @@ type MachineTemplate struct { metav1.TypeMeta // Standard object's metadata. - // More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#metadata + // More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata metav1.ObjectMeta // Template defines the machines that will be created from this machine template. - // https://git.k8s.io/community/contributors/devel/api-conventions.md#spec-and-status + // https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status Template MachineTemplateSpec } @@ -136,7 +136,7 @@ type MachineTemplateList struct { metav1.TypeMeta // Standard list metadata. - // More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds + // More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds metav1.ListMeta // List of machine templates @@ -187,6 +187,10 @@ type LastOperation struct { // Description of the current operation Description string + // ErrorCode of the current operation if any + // +optional + ErrorCode string + // Last update time of current operation LastUpdateTime metav1.Time diff --git a/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1/machine_types.go b/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1/machine_types.go index 1051f31de0bc..b5920e570a6a 100644 --- a/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1/machine_types.go +++ b/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1/machine_types.go @@ -122,6 +122,10 @@ type LastOperation struct { // Description of the current operation Description string `json:"description,omitempty"` + // ErrorCode of the current operation if any + // +optional + ErrorCode string `json:"errorCode,omitempty"` + // Last update time of current operation LastUpdateTime metav1.Time `json:"lastUpdateTime,omitempty"` @@ -188,7 +192,7 @@ const ( // MachineOperationHealthCheck indicates that the operation was a create MachineOperationHealthCheck MachineOperationType = "HealthCheck" - // MachineOperationDelete indicates that the operation was a create + // MachineOperationDelete indicates that the operation was a delete MachineOperationDelete MachineOperationType = "Delete" ) diff --git a/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1/shared_types.go b/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1/shared_types.go index c5737c45ba34..b5473561d491 100644 --- a/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1/shared_types.go +++ b/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1/shared_types.go @@ -26,12 +26,12 @@ import ( type MachineTemplateSpec struct { // +kubebuilder:validation:XPreserveUnknownFields // Standard object's metadata. - // More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#metadata + // More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata // +optional metav1.ObjectMeta `json:"metadata,omitempty"` // Specification of the desired behavior of the machine. - // More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#spec-and-status + // More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status // +optional Spec MachineSpec `json:"spec,omitempty"` } diff --git a/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1/zz_generated.conversion.go b/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1/zz_generated.conversion.go index 5b027828dae6..d912fa928f9d 100644 --- a/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1/zz_generated.conversion.go +++ b/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1/zz_generated.conversion.go @@ -362,6 +362,7 @@ func Convert_machine_CurrentStatus_To_v1alpha1_CurrentStatus(in *machine.Current func autoConvert_v1alpha1_LastOperation_To_machine_LastOperation(in *LastOperation, out *machine.LastOperation, s conversion.Scope) error { out.Description = in.Description + out.ErrorCode = in.ErrorCode out.LastUpdateTime = in.LastUpdateTime out.State = machine.MachineState(in.State) out.Type = machine.MachineOperationType(in.Type) @@ -375,6 +376,7 @@ func Convert_v1alpha1_LastOperation_To_machine_LastOperation(in *LastOperation, func autoConvert_machine_LastOperation_To_v1alpha1_LastOperation(in *machine.LastOperation, out *LastOperation, s conversion.Scope) error { out.Description = in.Description + out.ErrorCode = in.ErrorCode out.LastUpdateTime = in.LastUpdateTime out.State = MachineState(in.State) out.Type = MachineOperationType(in.Type) diff --git a/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/apis/machine/zz_generated.defaults.go b/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/apis/machine/zz_generated.defaults.go index 3fa68441f148..d5513ac6d091 100644 --- a/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/apis/machine/zz_generated.defaults.go +++ b/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/apis/machine/zz_generated.defaults.go @@ -1,3 +1,17 @@ +// Copyright 2023 SAP SE or an SAP affiliate company +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + // +build !ignore_autogenerated // This file was autogenerated by defaulter-gen. Do not edit it manually! diff --git a/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned/typed/machine/v1alpha1/fake/fake_machine.go b/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned/typed/machine/v1alpha1/fake/fake_machine.go index 4c6100ca49db..fc28e64fec05 100644 --- a/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned/typed/machine/v1alpha1/fake/fake_machine.go +++ b/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned/typed/machine/v1alpha1/fake/fake_machine.go @@ -24,7 +24,6 @@ import ( v1alpha1 "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" labels "k8s.io/apimachinery/pkg/labels" - schema "k8s.io/apimachinery/pkg/runtime/schema" types "k8s.io/apimachinery/pkg/types" watch "k8s.io/apimachinery/pkg/watch" testing "k8s.io/client-go/testing" @@ -36,9 +35,9 @@ type FakeMachines struct { ns string } -var machinesResource = schema.GroupVersionResource{Group: "machine.sapcloud.io", Version: "v1alpha1", Resource: "machines"} +var machinesResource = v1alpha1.SchemeGroupVersion.WithResource("machines") -var machinesKind = schema.GroupVersionKind{Group: "machine.sapcloud.io", Version: "v1alpha1", Kind: "Machine"} +var machinesKind = v1alpha1.SchemeGroupVersion.WithKind("Machine") // Get takes name of the machine, and returns the corresponding machine object, and an error if there is any. func (c *FakeMachines) Get(ctx context.Context, name string, options v1.GetOptions) (result *v1alpha1.Machine, err error) { diff --git a/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned/typed/machine/v1alpha1/fake/fake_machineclass.go b/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned/typed/machine/v1alpha1/fake/fake_machineclass.go index e75166de4e50..c3eec42a82c2 100644 --- a/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned/typed/machine/v1alpha1/fake/fake_machineclass.go +++ b/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned/typed/machine/v1alpha1/fake/fake_machineclass.go @@ -24,7 +24,6 @@ import ( v1alpha1 "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" labels "k8s.io/apimachinery/pkg/labels" - schema "k8s.io/apimachinery/pkg/runtime/schema" types "k8s.io/apimachinery/pkg/types" watch "k8s.io/apimachinery/pkg/watch" testing "k8s.io/client-go/testing" @@ -36,9 +35,9 @@ type FakeMachineClasses struct { ns string } -var machineclassesResource = schema.GroupVersionResource{Group: "machine.sapcloud.io", Version: "v1alpha1", Resource: "machineclasses"} +var machineclassesResource = v1alpha1.SchemeGroupVersion.WithResource("machineclasses") -var machineclassesKind = schema.GroupVersionKind{Group: "machine.sapcloud.io", Version: "v1alpha1", Kind: "MachineClass"} +var machineclassesKind = v1alpha1.SchemeGroupVersion.WithKind("MachineClass") // Get takes name of the machineClass, and returns the corresponding machineClass object, and an error if there is any. func (c *FakeMachineClasses) Get(ctx context.Context, name string, options v1.GetOptions) (result *v1alpha1.MachineClass, err error) { diff --git a/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned/typed/machine/v1alpha1/fake/fake_machinedeployment.go b/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned/typed/machine/v1alpha1/fake/fake_machinedeployment.go index e2eceb3b081c..af9f5f96ef9c 100644 --- a/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned/typed/machine/v1alpha1/fake/fake_machinedeployment.go +++ b/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned/typed/machine/v1alpha1/fake/fake_machinedeployment.go @@ -25,7 +25,6 @@ import ( autoscalingv1 "k8s.io/api/autoscaling/v1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" labels "k8s.io/apimachinery/pkg/labels" - schema "k8s.io/apimachinery/pkg/runtime/schema" types "k8s.io/apimachinery/pkg/types" watch "k8s.io/apimachinery/pkg/watch" testing "k8s.io/client-go/testing" @@ -37,9 +36,9 @@ type FakeMachineDeployments struct { ns string } -var machinedeploymentsResource = schema.GroupVersionResource{Group: "machine.sapcloud.io", Version: "v1alpha1", Resource: "machinedeployments"} +var machinedeploymentsResource = v1alpha1.SchemeGroupVersion.WithResource("machinedeployments") -var machinedeploymentsKind = schema.GroupVersionKind{Group: "machine.sapcloud.io", Version: "v1alpha1", Kind: "MachineDeployment"} +var machinedeploymentsKind = v1alpha1.SchemeGroupVersion.WithKind("MachineDeployment") // Get takes name of the machineDeployment, and returns the corresponding machineDeployment object, and an error if there is any. func (c *FakeMachineDeployments) Get(ctx context.Context, name string, options v1.GetOptions) (result *v1alpha1.MachineDeployment, err error) { diff --git a/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned/typed/machine/v1alpha1/fake/fake_machineset.go b/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned/typed/machine/v1alpha1/fake/fake_machineset.go index 6c4ac2b9c235..9e54ae7bb4fa 100644 --- a/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned/typed/machine/v1alpha1/fake/fake_machineset.go +++ b/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned/typed/machine/v1alpha1/fake/fake_machineset.go @@ -25,7 +25,6 @@ import ( autoscalingv1 "k8s.io/api/autoscaling/v1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" labels "k8s.io/apimachinery/pkg/labels" - schema "k8s.io/apimachinery/pkg/runtime/schema" types "k8s.io/apimachinery/pkg/types" watch "k8s.io/apimachinery/pkg/watch" testing "k8s.io/client-go/testing" @@ -37,9 +36,9 @@ type FakeMachineSets struct { ns string } -var machinesetsResource = schema.GroupVersionResource{Group: "machine.sapcloud.io", Version: "v1alpha1", Resource: "machinesets"} +var machinesetsResource = v1alpha1.SchemeGroupVersion.WithResource("machinesets") -var machinesetsKind = schema.GroupVersionKind{Group: "machine.sapcloud.io", Version: "v1alpha1", Kind: "MachineSet"} +var machinesetsKind = v1alpha1.SchemeGroupVersion.WithKind("MachineSet") // Get takes name of the machineSet, and returns the corresponding machineSet object, and an error if there is any. func (c *FakeMachineSets) Get(ctx context.Context, name string, options v1.GetOptions) (result *v1alpha1.MachineSet, err error) { diff --git a/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/machinecodes/codes/code_string.go b/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/machinecodes/codes/code_string.go new file mode 100644 index 000000000000..3feb6cea9404 --- /dev/null +++ b/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/machinecodes/codes/code_string.go @@ -0,0 +1,68 @@ +/* + * + * Copyright 2017 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * This file was copied and modified from the github.com/grpc/grpc-go project + * https://github.com/grpc/grpc-go/blob/v1.29.x/code/code_string.go + * + * Modifications Copyright (c) 2020 SAP SE or an SAP affiliate company. All rights reserved. + * + */ + +// Package codes defines the canonical error codes used by machineAPIs for gardener/machine-controller-manager +package codes + +import "strconv" + +func (c Code) String() string { + switch c { + case OK: + return "OK" + case Canceled: + return "Canceled" + case Unknown: + return "Unknown" + case InvalidArgument: + return "InvalidArgument" + case DeadlineExceeded: + return "DeadlineExceeded" + case NotFound: + return "NotFound" + case AlreadyExists: + return "AlreadyExists" + case PermissionDenied: + return "PermissionDenied" + case ResourceExhausted: + return "ResourceExhausted" + case FailedPrecondition: + return "FailedPrecondition" + case Aborted: + return "Aborted" + case OutOfRange: + return "OutOfRange" + case Unimplemented: + return "Unimplemented" + case Internal: + return "Internal" + case Unavailable: + return "Unavailable" + case DataLoss: + return "DataLoss" + case Unauthenticated: + return "Unauthenticated" + default: + return "Code(" + strconv.FormatInt(int64(c), 10) + ")" + } +} diff --git a/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/machinecodes/codes/codes.go b/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/machinecodes/codes/codes.go new file mode 100644 index 000000000000..ad9e979d1ae5 --- /dev/null +++ b/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/machinecodes/codes/codes.go @@ -0,0 +1,175 @@ +/* + * + * Copyright 2014 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * This file was copied and modified from the github.com/grpc/grpc-go project + * https://github.com/grpc/grpc-go/blob/v1.29.x/code/code.go + * + * Modifications Copyright (c) 2020 SAP SE or an SAP affiliate company. All rights reserved. + * + */ + +// Package codes defines the canonical error codes used by machineAPIs for gardener/machine-controller-manager +package codes + +// Code is an unsigned int representing aa canonical error code for machineAPIs +type Code uint32 + +const ( + // OK is returned on success. + OK Code = 0 + + // Canceled indicates the operation was canceled (typically by the caller). + Canceled Code = 1 + + // Unknown error. An example of where this error may be returned is + // if a Status value received from another address space belongs to + // an error-space that is not known in this address space. Also + // errors raised by APIs that do not return enough error information + // may be converted to this error. + Unknown Code = 2 + + // InvalidArgument indicates client specified an invalid argument. + // Note that this differs from FailedPrecondition. It indicates arguments + // that are problematic regardless of the state of the system + // (e.g., a malformed file name). + InvalidArgument Code = 3 + + // DeadlineExceeded means operation expired before completion. + // For operations that change the state of the system, this error may be + // returned even if the operation has completed successfully. For + // example, a successful response from a server could have been delayed + // long enough for the deadline to expire. + DeadlineExceeded Code = 4 + + // NotFound means some requested entity (e.g., file or directory) was + // not found. + NotFound Code = 5 + + // AlreadyExists means an attempt to create an entity failed because one + // already exists. + AlreadyExists Code = 6 + + // PermissionDenied indicates the caller does not have permission to + // execute the specified operation. It must not be used for rejections + // caused by exhausting some resource (use ResourceExhausted + // instead for those errors). It must not be + // used if the caller cannot be identified (use Unauthenticated + // instead for those errors). + PermissionDenied Code = 7 + + // ResourceExhausted indicates some resource has been exhausted, perhaps + // a per-user quota, or perhaps the entire file system is out of space. + ResourceExhausted Code = 8 + + // FailedPrecondition indicates operation was rejected because the + // system is not in a state required for the operation's execution. + // For example, directory to be deleted may be non-empty, an rmdir + // operation is applied to a non-directory, etc. + // + // A litmus test that may help a service implementor in deciding + // between FailedPrecondition, Aborted, and Unavailable: + // (a) Use Unavailable if the client can retry just the failing call. + // (b) Use Aborted if the client should retry at a higher-level + // (e.g., restarting a read-modify-write sequence). + // (c) Use FailedPrecondition if the client should not retry until + // the system state has been explicitly fixed. E.g., if an "rmdir" + // fails because the directory is non-empty, FailedPrecondition + // should be returned since the client should not retry unless + // they have first fixed up the directory by deleting files from it. + // (d) Use FailedPrecondition if the client performs conditional + // REST Get/Update/Delete on a resource and the resource on the + // server does not match the condition. E.g., conflicting + // read-modify-write on the same resource. + FailedPrecondition Code = 9 + + // Aborted indicates the operation was aborted, typically due to a + // concurrency issue like sequencer check failures, transaction aborts, + // etc. + // + // See litmus test above for deciding between FailedPrecondition, + // Aborted, and Unavailable. + Aborted Code = 10 + + // OutOfRange means operation was attempted past the valid range. + // E.g., seeking or reading past end of file. + // + // Unlike InvalidArgument, this error indicates a problem that may + // be fixed if the system state changes. For example, a 32-bit file + // system will generate InvalidArgument if asked to read at an + // offset that is not in the range [0,2^32-1], but it will generate + // OutOfRange if asked to read from an offset past the current + // file size. + // + // There is a fair bit of overlap between FailedPrecondition and + // OutOfRange. We recommend using OutOfRange (the more specific + // error) when it applies so that callers who are iterating through + // a space can easily look for an OutOfRange error to detect when + // they are done. + OutOfRange Code = 11 + + // Unimplemented indicates operation is not implemented or not + // supported/enabled in this service. + Unimplemented Code = 12 + + // Internal errors. Means some invariants expected by underlying + // system has been broken. If you see one of these errors, + // something is very broken. + Internal Code = 13 + + // Unavailable indicates the service is currently unavailable. + // This is a most likely a transient condition and may be corrected + // by retrying with a backoff. Note that it is not always safe to retry + // non-idempotent operations. + // + // See litmus test above for deciding between FailedPrecondition, + // Aborted, and Unavailable. + Unavailable Code = 14 + + // DataLoss indicates unrecoverable data loss or corruption. + DataLoss Code = 15 + + // Unauthenticated indicates the request does not have valid + // authentication credentials for the operation. + Unauthenticated Code = 16 +) + +var strToCode = map[string]Code{ + "OK": OK, + "Canceled": Canceled, + "Unknown": Unknown, + "InvalidArgument": InvalidArgument, + "DeadlineExceeded": DeadlineExceeded, + "NotFound": NotFound, + "AlreadyExists": AlreadyExists, + "PermissionDenied": PermissionDenied, + "ResourceExhausted": ResourceExhausted, + "FailedPrecondition": FailedPrecondition, + "Aborted": Aborted, + "OutOfRange": OutOfRange, + "Unimplemented": Unimplemented, + "Internal": Internal, + "Unavailable": Unavailable, + "DataLoss": DataLoss, + "Unauthenticated": Unauthenticated, +} + +// StringToCode coverts string into the Code. +func StringToCode(input string) Code { + if code, ok := strToCode[input]; ok { + return code + } + return Unknown +} diff --git a/cluster-autoscaler/vendor/k8s.io/kube-openapi/pkg/handler/handler.go b/cluster-autoscaler/vendor/k8s.io/kube-openapi/pkg/handler/handler.go index 84e902646537..37cb96f1be11 100644 --- a/cluster-autoscaler/vendor/k8s.io/kube-openapi/pkg/handler/handler.go +++ b/cluster-autoscaler/vendor/k8s.io/kube-openapi/pkg/handler/handler.go @@ -22,6 +22,7 @@ import ( "fmt" "net/http" "strconv" + "sync" "time" "github.com/NYTimes/gziphandler" @@ -98,16 +99,6 @@ func NewOpenAPIServiceLazy(swagger cached.Data[*spec.Swagger]) *OpenAPIService { return o } -func (o *OpenAPIService) getSwaggerBytes() (timedSpec, string, error) { - result := o.jsonCache.Get() - return result.Data, result.Etag, result.Err -} - -func (o *OpenAPIService) getSwaggerPbBytes() (timedSpec, string, error) { - result := o.protoCache.Get() - return result.Data, result.Etag, result.Err -} - func (o *OpenAPIService) UpdateSpec(swagger *spec.Swagger) error { o.UpdateSpecLazy(cached.NewResultOK(swagger, uuid.New().String())) return nil @@ -135,6 +126,9 @@ func RegisterOpenAPIVersionedService(spec *spec.Swagger, servePath string, handl // RegisterOpenAPIVersionedService registers a handler to provide access to provided swagger spec. func (o *OpenAPIService) RegisterOpenAPIVersionedService(servePath string, handler common.PathHandler) error { + // Mutex protects the cache chain + var mutex sync.Mutex + accepted := []struct { Type string SubType string @@ -163,7 +157,9 @@ func (o *OpenAPIService) RegisterOpenAPIVersionedService(servePath string, handl continue } // serve the first matching media type in the sorted clause list + mutex.Lock() result := accepts.GetDataAndEtag.Get() + mutex.Unlock() if result.Err != nil { klog.Errorf("Error in OpenAPI handler: %s", result.Err) // only return a 503 if we have no older cache data to serve diff --git a/cluster-autoscaler/vendor/modules.txt b/cluster-autoscaler/vendor/modules.txt index fba6d15faba3..4b0dac7aa486 100644 --- a/cluster-autoscaler/vendor/modules.txt +++ b/cluster-autoscaler/vendor/modules.txt @@ -239,8 +239,8 @@ github.com/felixge/httpsnoop # github.com/fsnotify/fsnotify v1.6.0 ## explicit; go 1.16 github.com/fsnotify/fsnotify -# github.com/gardener/machine-controller-manager v0.49.0 -## explicit; go 1.19 +# github.com/gardener/machine-controller-manager v0.50.0 +## explicit; go 1.20 github.com/gardener/machine-controller-manager/pkg/apis/machine github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1 github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned @@ -254,6 +254,7 @@ github.com/gardener/machine-controller-manager/pkg/client/informers/externalvers github.com/gardener/machine-controller-manager/pkg/client/informers/externalversions/machine/v1alpha1 github.com/gardener/machine-controller-manager/pkg/client/listers/machine/v1alpha1 github.com/gardener/machine-controller-manager/pkg/util/provider/cache +github.com/gardener/machine-controller-manager/pkg/util/provider/machinecodes/codes # github.com/gardener/machine-controller-manager-provider-aws v0.17.0 ## explicit; go 1.19 github.com/gardener/machine-controller-manager-provider-aws/pkg/aws/apis @@ -1044,7 +1045,7 @@ gopkg.in/yaml.v2 # gopkg.in/yaml.v3 v3.0.1 ## explicit gopkg.in/yaml.v3 -# k8s.io/api v0.27.1 => k8s.io/api v0.27.1 +# k8s.io/api v0.27.2 => k8s.io/api v0.27.1 ## explicit; go 1.20 k8s.io/api/admission/v1 k8s.io/api/admission/v1beta1 @@ -1100,7 +1101,7 @@ k8s.io/api/scheduling/v1beta1 k8s.io/api/storage/v1 k8s.io/api/storage/v1alpha1 k8s.io/api/storage/v1beta1 -# k8s.io/apimachinery v0.27.1 => k8s.io/apimachinery v0.28.0-alpha.0 +# k8s.io/apimachinery v0.27.2 => k8s.io/apimachinery v0.28.0-alpha.0 ## explicit; go 1.20 k8s.io/apimachinery/pkg/api/equality k8s.io/apimachinery/pkg/api/errors @@ -1161,7 +1162,7 @@ k8s.io/apimachinery/pkg/watch k8s.io/apimachinery/third_party/forked/golang/json k8s.io/apimachinery/third_party/forked/golang/netutil k8s.io/apimachinery/third_party/forked/golang/reflect -# k8s.io/apiserver v0.27.1 => k8s.io/apiserver v0.27.1 +# k8s.io/apiserver v0.27.2 => k8s.io/apiserver v0.27.1 ## explicit; go 1.20 k8s.io/apiserver/pkg/admission k8s.io/apiserver/pkg/admission/cel @@ -1303,7 +1304,7 @@ k8s.io/apiserver/plugin/pkg/audit/truncate k8s.io/apiserver/plugin/pkg/audit/webhook k8s.io/apiserver/plugin/pkg/authenticator/token/webhook k8s.io/apiserver/plugin/pkg/authorizer/webhook -# k8s.io/client-go v0.27.1 => k8s.io/client-go v0.27.1 +# k8s.io/client-go v0.27.2 => k8s.io/client-go v0.27.1 ## explicit; go 1.20 k8s.io/client-go/applyconfigurations/admissionregistration/v1 k8s.io/client-go/applyconfigurations/admissionregistration/v1alpha1 @@ -1651,7 +1652,7 @@ k8s.io/cloud-provider/volume/helpers # k8s.io/cloud-provider-aws v1.27.1 ## explicit; go 1.20 k8s.io/cloud-provider-aws/pkg/providers/v1 -# k8s.io/component-base v0.27.1 => k8s.io/component-base v0.27.1 +# k8s.io/component-base v0.27.2 => k8s.io/component-base v0.27.1 ## explicit; go 1.20 k8s.io/component-base/cli/flag k8s.io/component-base/codec @@ -1727,7 +1728,7 @@ k8s.io/kms/apis/v1beta1 k8s.io/kms/apis/v2 k8s.io/kms/pkg/service k8s.io/kms/pkg/util -# k8s.io/kube-openapi v0.0.0-20230327201221-f5883ff37f0c +# k8s.io/kube-openapi v0.0.0-20230501164219-8b0f38b5fd1f ## explicit; go 1.19 k8s.io/kube-openapi/pkg/builder k8s.io/kube-openapi/pkg/builder3