From d88285b420fae4712b3388316bba0586a4320471 Mon Sep 17 00:00:00 2001 From: Bevan Arps Date: Tue, 12 Sep 2023 14:57:46 +1200 Subject: [PATCH 1/2] Treat CustomKubeletIdentityMissingPermissionError as retryable --- .../managed_cluster_extensions.go | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/v2/api/containerservice/customizations/managed_cluster_extensions.go b/v2/api/containerservice/customizations/managed_cluster_extensions.go index f99932fb98c..1483126280a 100644 --- a/v2/api/containerservice/customizations/managed_cluster_extensions.go +++ b/v2/api/containerservice/customizations/managed_cluster_extensions.go @@ -8,6 +8,7 @@ package customizations import ( "context" "fmt" + "github.com/Azure/azure-service-operator/v2/pkg/genruntime/core" "strings" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice" @@ -181,3 +182,43 @@ func clusterProvisioningStateBlocksReconciliation(provisioningState *string) boo return !nonBlockingManagedClusterProvisioningStates.Contains(strings.ToLower(*provisioningState)) } + +var _ extensions.ErrorClassifier = &ManagedClusterExtension{} + +// ClassifyError evaluates the provided error, returning including whether it is fatal or can be retried. +// cloudError is the error returned from ARM. +// apiVersion is the ARM API version used for the request. +// log is a logger than can be used for telemetry. +// next is the next implementation to call. +func (ext *ManagedClusterExtension) ClassifyError( + cloudError *genericarmclient.CloudError, + apiVersion string, + log logr.Logger, + next extensions.ErrorClassifierFunc, +) (core.CloudErrorDetails, error) { + details, err := next(cloudError) + if err != nil { + return core.CloudErrorDetails{}, err + } + + // Override is to treat Conflict as retryable for Redis, if the message contains "try again later" + if isRetryableClusterError(cloudError) { + details.Classification = core.ErrorRetryable + } + + return details, nil +} + +func isRetryableClusterError(err *genericarmclient.CloudError) bool { + if err == nil { + return false + } + + // A CustomKubeletIdentityMissingPermissionError can occur if the user-assigned identity required by the cluster + // hasn't yet been provisioned; we want to retry so that we finish provisioning the cluster once it is available. + if err.Code() == "CustomKubeletIdentityMissingPermissionError" { + return true + } + + return false +} From ad44fe40b332adf7e14e22385f34079649f93545 Mon Sep 17 00:00:00 2001 From: Bevan Arps Date: Thu, 14 Sep 2023 07:59:11 +1200 Subject: [PATCH 2/2] Fix issues identified during review --- .../customizations/managed_cluster_extensions.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/v2/api/containerservice/customizations/managed_cluster_extensions.go b/v2/api/containerservice/customizations/managed_cluster_extensions.go index 1483126280a..18d604ec048 100644 --- a/v2/api/containerservice/customizations/managed_cluster_extensions.go +++ b/v2/api/containerservice/customizations/managed_cluster_extensions.go @@ -8,7 +8,6 @@ package customizations import ( "context" "fmt" - "github.com/Azure/azure-service-operator/v2/pkg/genruntime/core" "strings" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice" @@ -25,6 +24,7 @@ import ( "github.com/Azure/azure-service-operator/v2/internal/resolver" "github.com/Azure/azure-service-operator/v2/internal/set" "github.com/Azure/azure-service-operator/v2/pkg/genruntime" + "github.com/Azure/azure-service-operator/v2/pkg/genruntime/core" "github.com/Azure/azure-service-operator/v2/pkg/genruntime/extensions" "github.com/Azure/azure-service-operator/v2/pkg/genruntime/secrets" ) @@ -201,7 +201,6 @@ func (ext *ManagedClusterExtension) ClassifyError( return core.CloudErrorDetails{}, err } - // Override is to treat Conflict as retryable for Redis, if the message contains "try again later" if isRetryableClusterError(cloudError) { details.Classification = core.ErrorRetryable }