From d332e642bb11bd27b457e4568d3221116e32a8a1 Mon Sep 17 00:00:00 2001 From: Soumik Majumder Date: Wed, 1 Sep 2021 14:36:58 +0530 Subject: [PATCH 1/2] Added retryable etcdserver errors to the retry list --- pkg/kapp/resources/resources.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pkg/kapp/resources/resources.go b/pkg/kapp/resources/resources.go index 4e1b2f170..de5205c23 100644 --- a/pkg/kapp/resources/resources.go +++ b/pkg/kapp/resources/resources.go @@ -429,7 +429,7 @@ func (c *ResourcesImpl) isPodMetrics(resource Resource, err error) bool { } func (c *ResourcesImpl) isGeneralRetryableErr(err error) bool { - return IsResourceChangeBlockedErr(err) || c.isServerRescaleErr(err) || + return IsResourceChangeBlockedErr(err) || c.isServerRescaleErr(err) || c.isEtcdRetryableError(err) || c.isResourceQuotaConflict(err) || c.isInternalFailure(err) || errors.IsTooManyRequests(err) } @@ -453,6 +453,14 @@ func (c *ResourcesImpl) isServerRescaleErr(err error) bool { return false } +// Retries retryable errors thrown by etcd server. +// Comprehensive list of errors at : https://github.com/etcd-io/etcd/blob/main/server/etcdserver/errors.go +// Addresses : https://github.com/vmware-tanzu/carvel-kapp/issues/106 +func (c *ResourcesImpl) isEtcdRetryableError(err error) bool { + return strings.Contains(err.Error(), "etcdserver:") && + (strings.Contains(err.Error(), "timed out") || strings.Contains(err.Error(), "leader changed")) +} + // Handles case pointed out in : https://github.com/vmware-tanzu/carvel-kapp/issues/258. // An internal network error which might succeed on retrying. func (c *ResourcesImpl) isInternalFailure(err error) bool { From 5a4cbe506337a5e07de8710b0bc92ed6a5cb4e9d Mon Sep 17 00:00:00 2001 From: Soumik Majumder Date: Thu, 2 Sep 2021 14:24:36 +0530 Subject: [PATCH 2/2] Using regex to match retryable etcdserver errors --- pkg/kapp/resources/resources.go | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/pkg/kapp/resources/resources.go b/pkg/kapp/resources/resources.go index de5205c23..2fe99b802 100644 --- a/pkg/kapp/resources/resources.go +++ b/pkg/kapp/resources/resources.go @@ -453,14 +453,6 @@ func (c *ResourcesImpl) isServerRescaleErr(err error) bool { return false } -// Retries retryable errors thrown by etcd server. -// Comprehensive list of errors at : https://github.com/etcd-io/etcd/blob/main/server/etcdserver/errors.go -// Addresses : https://github.com/vmware-tanzu/carvel-kapp/issues/106 -func (c *ResourcesImpl) isEtcdRetryableError(err error) bool { - return strings.Contains(err.Error(), "etcdserver:") && - (strings.Contains(err.Error(), "timed out") || strings.Contains(err.Error(), "leader changed")) -} - // Handles case pointed out in : https://github.com/vmware-tanzu/carvel-kapp/issues/258. // An internal network error which might succeed on retrying. func (c *ResourcesImpl) isInternalFailure(err error) bool { @@ -523,6 +515,10 @@ var ( // Post https://cert-manager-webhook.cert-manager.svc:443/convert?timeout=30s: // x509: certificate signed by unknown authority (reason: ) conversionWebhookErrCheck = regexp.MustCompile("conversion webhook for (.+) failed:") + + // Matches retryable etcdserver errors + // Comprehensive list of errors at : https://github.com/etcd-io/etcd/blob/main/server/etcdserver/errors.go + etcdserverRetryableErrCheck = regexp.MustCompile("etcdserver:(.+)(leader changed|timed out)") ) func IsResourceChangeBlockedErr(err error) bool { @@ -540,6 +536,12 @@ func IsResourceChangeBlockedErr(err error) bool { } } +// Retries retryable errors thrown by etcd server. +// Addresses : https://github.com/vmware-tanzu/carvel-kapp/issues/106 +func (c *ResourcesImpl) isEtcdRetryableError(err error) bool { + return etcdserverRetryableErrCheck.MatchString(err.Error()) +} + type AllOpts struct { ListOpts *metav1.ListOptions }