From 6280ca4f23e039475fba09922c0cd6d38aab2b6a Mon Sep 17 00:00:00 2001 From: Kevin Earls Date: Fri, 2 Sep 2022 14:37:20 +0200 Subject: [PATCH 1/5] Add retry loop for client.get of replicaset as that sometimes fails Signed-off-by: Kevin Earls --- pkg/instrumentation/sdk.go | 45 ++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/pkg/instrumentation/sdk.go b/pkg/instrumentation/sdk.go index 1c0c31fc5a..95c220c4af 100644 --- a/pkg/instrumentation/sdk.go +++ b/pkg/instrumentation/sdk.go @@ -19,6 +19,7 @@ import ( "fmt" "sort" "strings" + "time" "unsafe" "github.com/go-logr/logr" @@ -28,6 +29,8 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/util/retry" "sigs.k8s.io/controller-runtime/pkg/client" "github.com/open-telemetry/opentelemetry-operator/apis/v1alpha1" @@ -244,7 +247,7 @@ func chooseServiceName(pod corev1.Pod, resources map[string]string, index int) s // User defined attributes (in explicitly set env var) have higher precedence. func (i *sdkInjector) createResourceMap(ctx context.Context, otelinst v1alpha1.Instrumentation, ns corev1.Namespace, pod corev1.Pod, index int) map[string]string { // get existing resources env var and parse it into a map - existingRes := map[string]bool{} + existingResourceMap := map[string]bool{} existingResourceEnvIdx := getIndexOfEnv(pod.Spec.Containers[index].Env, constants.EnvOTELResourceAttrs) if existingResourceEnvIdx > -1 { existingResArr := strings.Split(pod.Spec.Containers[index].Env[existingResourceEnvIdx].Value, ",") @@ -253,14 +256,14 @@ func (i *sdkInjector) createResourceMap(ctx context.Context, otelinst v1alpha1.I if len(keyValueArr) != 2 { continue } - existingRes[keyValueArr[0]] = true + existingResourceMap[keyValueArr[0]] = true } } - res := map[string]string{} + resourceMap := map[string]string{} for k, v := range otelinst.Spec.Resource.Attributes { - if !existingRes[k] { - res[k] = v + if !existingResourceMap[k] { + resourceMap[k] = v } } @@ -274,11 +277,11 @@ func (i *sdkInjector) createResourceMap(ctx context.Context, otelinst v1alpha1.I k8sResources[semconv.K8SNodeNameKey] = pod.Spec.NodeName i.addParentResourceLabels(ctx, otelinst.Spec.Resource.AddK8sUIDAttributes, ns, pod.ObjectMeta, k8sResources) for k, v := range k8sResources { - if !existingRes[string(k)] && v != "" { - res[string(k)] = v + if !existingResourceMap[string(k)] && v != "" { + resourceMap[string(k)] = v } } - return res + return resourceMap } func (i *sdkInjector) addParentResourceLabels(ctx context.Context, uid bool, ns corev1.Namespace, objectMeta metav1.ObjectMeta, resources map[attribute.Key]string) { @@ -291,12 +294,26 @@ func (i *sdkInjector) addParentResourceLabels(ctx context.Context, uid bool, ns } // parent of ReplicaSet is e.g. Deployment which we are interested to know rs := appsv1.ReplicaSet{} - // ignore the error. The object might not exist, the error is not important, getting labels is just the best effort - //nolint:errcheck - i.client.Get(ctx, types.NamespacedName{ - Namespace: ns.Name, - Name: owner.Name, - }, &rs) + nsn := types.NamespacedName{Namespace: ns.Name, Name: owner.Name} + backOff := wait.Backoff{Duration: 10 * time.Millisecond, Factor: 1.5, Jitter: 0.1, Steps: 20, Cap: 30 * time.Second} // TODO decide which of these we need and what they should be set to + + checkError := func(err error) bool { + // if the error looks like 'ReplicaSet.apps "my-deployment-with-sidecar-f46b479f" not found' ignore it + if strings.HasPrefix(err.Error(), "ReplicaSet.apps") && strings.HasSuffix(err.Error(), "not found") { + return true + } + return false + } + + getReplicaSet := func() error { + return i.client.Get(ctx, nsn, &rs) + } + + // use a retry loop to get the Deployment. A single call to client.get fails occasionally + err := retry.OnError(backOff, checkError, getReplicaSet) + if err != nil { + i.logger.Error(err, "failed to get replicaset", "replicaset", nsn.Name, "namespace", nsn.Namespace) + } i.addParentResourceLabels(ctx, uid, ns, rs.ObjectMeta, resources) case "deployment": resources[semconv.K8SDeploymentNameKey] = owner.Name From eb63157bb3c360156483025f999ecf2ce61d52fa Mon Sep 17 00:00:00 2001 From: Kevin Earls Date: Fri, 2 Sep 2022 16:24:36 +0200 Subject: [PATCH 2/5] Reduce total timeout, remove TODO Signed-off-by: Kevin Earls --- pkg/instrumentation/sdk.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/instrumentation/sdk.go b/pkg/instrumentation/sdk.go index 95c220c4af..55c04ac29d 100644 --- a/pkg/instrumentation/sdk.go +++ b/pkg/instrumentation/sdk.go @@ -295,7 +295,7 @@ func (i *sdkInjector) addParentResourceLabels(ctx context.Context, uid bool, ns // parent of ReplicaSet is e.g. Deployment which we are interested to know rs := appsv1.ReplicaSet{} nsn := types.NamespacedName{Namespace: ns.Name, Name: owner.Name} - backOff := wait.Backoff{Duration: 10 * time.Millisecond, Factor: 1.5, Jitter: 0.1, Steps: 20, Cap: 30 * time.Second} // TODO decide which of these we need and what they should be set to + backOff := wait.Backoff{Duration: 10 * time.Millisecond, Factor: 1.5, Jitter: 0.1, Steps: 20, Cap: 10 * time.Second} checkError := func(err error) bool { // if the error looks like 'ReplicaSet.apps "my-deployment-with-sidecar-f46b479f" not found' ignore it From 3f6fd89682c7043e8c8401ce84478cba3284a627 Mon Sep 17 00:00:00 2001 From: Kevin Earls Date: Fri, 2 Sep 2022 17:39:13 +0200 Subject: [PATCH 3/5] Use apierrors to check error Signed-off-by: Kevin Earls --- pkg/instrumentation/sdk.go | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/pkg/instrumentation/sdk.go b/pkg/instrumentation/sdk.go index 55c04ac29d..2b5ee6f8bd 100644 --- a/pkg/instrumentation/sdk.go +++ b/pkg/instrumentation/sdk.go @@ -27,6 +27,7 @@ import ( semconv "go.opentelemetry.io/otel/semconv/v1.7.0" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/wait" @@ -247,7 +248,7 @@ func chooseServiceName(pod corev1.Pod, resources map[string]string, index int) s // User defined attributes (in explicitly set env var) have higher precedence. func (i *sdkInjector) createResourceMap(ctx context.Context, otelinst v1alpha1.Instrumentation, ns corev1.Namespace, pod corev1.Pod, index int) map[string]string { // get existing resources env var and parse it into a map - existingResourceMap := map[string]bool{} + existingRes := map[string]bool{} existingResourceEnvIdx := getIndexOfEnv(pod.Spec.Containers[index].Env, constants.EnvOTELResourceAttrs) if existingResourceEnvIdx > -1 { existingResArr := strings.Split(pod.Spec.Containers[index].Env[existingResourceEnvIdx].Value, ",") @@ -256,14 +257,14 @@ func (i *sdkInjector) createResourceMap(ctx context.Context, otelinst v1alpha1.I if len(keyValueArr) != 2 { continue } - existingResourceMap[keyValueArr[0]] = true + existingRes[keyValueArr[0]] = true } } - resourceMap := map[string]string{} + res := map[string]string{} for k, v := range otelinst.Spec.Resource.Attributes { - if !existingResourceMap[k] { - resourceMap[k] = v + if !existingRes[k] { + res[k] = v } } @@ -277,11 +278,11 @@ func (i *sdkInjector) createResourceMap(ctx context.Context, otelinst v1alpha1.I k8sResources[semconv.K8SNodeNameKey] = pod.Spec.NodeName i.addParentResourceLabels(ctx, otelinst.Spec.Resource.AddK8sUIDAttributes, ns, pod.ObjectMeta, k8sResources) for k, v := range k8sResources { - if !existingResourceMap[string(k)] && v != "" { - resourceMap[string(k)] = v + if !existingRes[string(k)] && v != "" { + res[string(k)] = v } } - return resourceMap + return res } func (i *sdkInjector) addParentResourceLabels(ctx context.Context, uid bool, ns corev1.Namespace, objectMeta metav1.ObjectMeta, resources map[attribute.Key]string) { @@ -299,7 +300,7 @@ func (i *sdkInjector) addParentResourceLabels(ctx context.Context, uid bool, ns checkError := func(err error) bool { // if the error looks like 'ReplicaSet.apps "my-deployment-with-sidecar-f46b479f" not found' ignore it - if strings.HasPrefix(err.Error(), "ReplicaSet.apps") && strings.HasSuffix(err.Error(), "not found") { + if apierrors.IsNotFound(err) { return true } return false From abe29d6215958e091cf3dcca4eb4a61ad98b069e Mon Sep 17 00:00:00 2001 From: Kevin Earls Date: Fri, 2 Sep 2022 17:43:45 +0200 Subject: [PATCH 4/5] Appease the linter Signed-off-by: Kevin Earls --- pkg/instrumentation/sdk.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pkg/instrumentation/sdk.go b/pkg/instrumentation/sdk.go index 2b5ee6f8bd..819b209191 100644 --- a/pkg/instrumentation/sdk.go +++ b/pkg/instrumentation/sdk.go @@ -299,11 +299,7 @@ func (i *sdkInjector) addParentResourceLabels(ctx context.Context, uid bool, ns backOff := wait.Backoff{Duration: 10 * time.Millisecond, Factor: 1.5, Jitter: 0.1, Steps: 20, Cap: 10 * time.Second} checkError := func(err error) bool { - // if the error looks like 'ReplicaSet.apps "my-deployment-with-sidecar-f46b479f" not found' ignore it - if apierrors.IsNotFound(err) { - return true - } - return false + return apierrors.IsNotFound(err) } getReplicaSet := func() error { From 480a2707d1bdd416d33c517e580036e0d17e449e Mon Sep 17 00:00:00 2001 From: Kevin Earls Date: Mon, 5 Sep 2022 09:35:19 +0200 Subject: [PATCH 5/5] Lower meximum wait time to 2 seconds Signed-off-by: Kevin Earls --- pkg/instrumentation/sdk.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/instrumentation/sdk.go b/pkg/instrumentation/sdk.go index 819b209191..346fc77681 100644 --- a/pkg/instrumentation/sdk.go +++ b/pkg/instrumentation/sdk.go @@ -296,7 +296,7 @@ func (i *sdkInjector) addParentResourceLabels(ctx context.Context, uid bool, ns // parent of ReplicaSet is e.g. Deployment which we are interested to know rs := appsv1.ReplicaSet{} nsn := types.NamespacedName{Namespace: ns.Name, Name: owner.Name} - backOff := wait.Backoff{Duration: 10 * time.Millisecond, Factor: 1.5, Jitter: 0.1, Steps: 20, Cap: 10 * time.Second} + backOff := wait.Backoff{Duration: 10 * time.Millisecond, Factor: 1.5, Jitter: 0.1, Steps: 20, Cap: 2 * time.Second} checkError := func(err error) bool { return apierrors.IsNotFound(err)