Skip to content

Commit

Permalink
Add retry loop for client.get of replicaset as that sometimes fails (#…
Browse files Browse the repository at this point in the history
…1072)

* Add retry loop for client.get of replicaset as that sometimes fails

Signed-off-by: Kevin Earls <kearls@redhat.com>

* Reduce total timeout, remove TODO

Signed-off-by: Kevin Earls <kearls@redhat.com>

* Use apierrors to check error

Signed-off-by: Kevin Earls <kearls@redhat.com>

* Appease the linter

Signed-off-by: Kevin Earls <kearls@redhat.com>

* Lower meximum wait time to 2 seconds

Signed-off-by: Kevin Earls <kearls@redhat.com>

Signed-off-by: Kevin Earls <kearls@redhat.com>
  • Loading branch information
kevinearls authored Sep 5, 2022
1 parent b753f56 commit 4655c15
Showing 1 changed file with 20 additions and 6 deletions.
26 changes: 20 additions & 6 deletions pkg/instrumentation/sdk.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,19 @@ import (
"fmt"
"sort"
"strings"
"time"
"unsafe"

"github.com/go-logr/logr"
"go.opentelemetry.io/otel/attribute"
semconv "go.opentelemetry.io/otel/semconv/v1.7.0"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/util/retry"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/open-telemetry/opentelemetry-operator/apis/v1alpha1"
Expand Down Expand Up @@ -291,12 +295,22 @@ func (i *sdkInjector) addParentResourceLabels(ctx context.Context, uid bool, ns
}
// parent of ReplicaSet is e.g. Deployment which we are interested to know
rs := appsv1.ReplicaSet{}
// ignore the error. The object might not exist, the error is not important, getting labels is just the best effort
//nolint:errcheck
i.client.Get(ctx, types.NamespacedName{
Namespace: ns.Name,
Name: owner.Name,
}, &rs)
nsn := types.NamespacedName{Namespace: ns.Name, Name: owner.Name}
backOff := wait.Backoff{Duration: 10 * time.Millisecond, Factor: 1.5, Jitter: 0.1, Steps: 20, Cap: 2 * time.Second}

checkError := func(err error) bool {
return apierrors.IsNotFound(err)
}

getReplicaSet := func() error {
return i.client.Get(ctx, nsn, &rs)
}

// use a retry loop to get the Deployment. A single call to client.get fails occasionally
err := retry.OnError(backOff, checkError, getReplicaSet)
if err != nil {
i.logger.Error(err, "failed to get replicaset", "replicaset", nsn.Name, "namespace", nsn.Namespace)
}
i.addParentResourceLabels(ctx, uid, ns, rs.ObjectMeta, resources)
case "deployment":
resources[semconv.K8SDeploymentNameKey] = owner.Name
Expand Down

0 comments on commit 4655c15

Please sign in to comment.