Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Controller logging improvements #1759

Merged
merged 1 commit into from
Aug 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions hack/generated/config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ spec:
containers:
- args:
- --enable-leader-election
- --v=2
image: controller:latest
name: manager
resources:
Expand Down
5 changes: 3 additions & 2 deletions hack/generated/controllers/generic_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"github.com/Azure/azure-service-operator/hack/generated/pkg/armclient"
"github.com/Azure/azure-service-operator/hack/generated/pkg/genruntime"
"github.com/Azure/azure-service-operator/hack/generated/pkg/genruntime/conditions"
. "github.com/Azure/azure-service-operator/hack/generated/pkg/logging"
"github.com/Azure/azure-service-operator/hack/generated/pkg/reconcilers"
"github.com/Azure/azure-service-operator/hack/generated/pkg/util/kubeclient"
)
Expand Down Expand Up @@ -121,7 +122,7 @@ func register(mgr ctrl.Manager, reconciledResourceLookup map[schema.GroupKind]sc
if err != nil {
return errors.Wrapf(err, "creating GVK for obj %T", obj)
}
log.V(4).Info("Registering", "GVK", gvk)
log.V(Status).Info("Registering", "GVK", gvk)

// TODO: Do we need to add any index fields here? DavidJ's controller index's status.id - see its usage
// TODO: of IndexField
Expand Down Expand Up @@ -198,7 +199,7 @@ func (gr *GenericReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
// (actually a reference or shallow copy) of an object, you'll mess up other controllers (not just your own).
obj = obj.DeepCopyObject().(client.Object)

gr.Log.V(1).Info(
gr.Log.V(Verbose).Info(
"Reconcile invoked",
"kind", fmt.Sprintf("%T", obj),
"resourceVersion", obj.GetResourceVersion(),
Expand Down
38 changes: 38 additions & 0 deletions hack/generated/pkg/logging/levels.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Copyright (c) Microsoft Corporation.
* Licensed under the MIT license.
*/

package logging

// These levels are defined to categorize (roughly) what a given log level means. They serve as guidelines
// to help decide what level a new log should be emitted at. They are not rules that must be followed.
// Keep in mind that there may be logs logged at a level higher
// than Debug (the highest log level defined below). Logs between levels 4 and 10 are progressively
// more verbose and are used rarely.

const (
// Status logs are logs that should ALWAYS be shown. In the context of a controller,
// Status logs should not be happening for every reconcile loop. Examples of good Status logs
// are state changes or major problems. These probably line up very nicely with interesting
// customer facing "events" as well.
Status = 0

// Info logs are logs that are probably useful but may be slightly more verbose.
// In the context of a controller, an Info log probably shouldn't be emitted every time through
// the reconcile loop, at least in the happy path.
// Examples of good Info logs include intermittent errors which we expect to be able to retry through
// and object updates (think: "set ownerReference" or similar things which are not critical state changes
// but are still interesting updates that aren't super verbose).
Info = 1

// Verbose logs are logs that are quite verbose. In the context of a controller
// they likely log on each reconcile loop. Examples of good Verbose logs include
// "waiting for deployment..." or "waiting for deletion to complete..."
Verbose = 2

// Debug logs are logs that are extremely verbose and log each reconcile loop (or multiple times in a single
// reconcile loop). Examples include ARM request and response payloads, or request and response payloads
// from APIServer.
Debug = 3
)
61 changes: 30 additions & 31 deletions hack/generated/pkg/reconcilers/azure_deployment_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"

"github.com/Azure/azure-service-operator/hack/generated/pkg/genruntime/conditions"
"github.com/Azure/azure-service-operator/hack/generated/pkg/ownerutil"

"github.com/Azure/azure-service-operator/hack/generated/pkg/armclient"
"github.com/Azure/azure-service-operator/hack/generated/pkg/genruntime"
"github.com/Azure/azure-service-operator/hack/generated/pkg/genruntime/conditions"
. "github.com/Azure/azure-service-operator/hack/generated/pkg/logging"
"github.com/Azure/azure-service-operator/hack/generated/pkg/ownerutil"
"github.com/Azure/azure-service-operator/hack/generated/pkg/reflecthelpers"
"github.com/Azure/azure-service-operator/hack/generated/pkg/util/kubeclient"
)
Expand Down Expand Up @@ -129,7 +129,7 @@ func (r *AzureDeploymentReconciler) CreateOrUpdate(ctx context.Context) (ctrl.Re
}

func (r *AzureDeploymentReconciler) Delete(ctx context.Context) (ctrl.Result, error) {
r.logObj("deleting resource")
r.logObj("reconciling resource")

action, actionFunc, err := r.DetermineDeleteAction()
if err != nil {
Expand Down Expand Up @@ -352,15 +352,15 @@ func (r *AzureDeploymentReconciler) DetermineCreateOrUpdateAction() (CreateOrUpd
if ready != nil {
conditionString = ready.String()
}
r.log.V(3).Info(
r.log.V(Verbose).Info(
"DetermineCreateOrUpdateAction",
"condition", conditionString,
"hasChanged", hasChanged,
"ongoingDeploymentID", ongoingDeploymentID)

if !hasChanged && r.InTerminalState() {
msg := fmt.Sprintf("Resource spec has not changed and resource has terminal Ready condition: %q", ready)
r.log.V(1).Info(msg)
msg := fmt.Sprintf("Nothing to do. Spec has not changed and resource has terminal Ready condition: %q.", ready)
r.log.V(Info).Info(msg)
return CreateOrUpdateActionNoAction, NoAction, nil
}

Expand All @@ -369,8 +369,6 @@ func (r *AzureDeploymentReconciler) DetermineCreateOrUpdateAction() (CreateOrUpd
}

if hasOngoingDeployment {
r.log.V(3).Info("Have existing deployment ID, will monitor it", "deploymentID", ongoingDeploymentID)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If something stalls and never makes progress, this log line might be the key to spotting the situation.
Suggest restoring this, but at Verbose level.

Copy link
Member Author

@matthchr matthchr Aug 30, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed this because we actually already logged this above on line 355 pretty much, so this line was duplicate. Actually the above line is better because it also includes the current Ready condition and hasChanged.

// There is an ongoing deployment we need to monitor
return CreateOrUpdateActionMonitorDeployment, r.MonitorDeployment, nil
}

Expand Down Expand Up @@ -402,7 +400,7 @@ func NoAction(_ context.Context) (ctrl.Result, error) {
// marked with the provisioning state of "Deleting".
func (r *AzureDeploymentReconciler) StartDeleteOfResource(ctx context.Context) (ctrl.Result, error) {
msg := "Starting delete of resource"
r.log.Info(msg)
r.log.V(Status).Info(msg)
r.recorder.Event(r.obj, v1.EventTypeNormal, string(DeleteActionBeginDelete), msg)

// If we have no resourceID to begin with, the Azure resource was never created
Expand Down Expand Up @@ -453,8 +451,6 @@ func (r *AzureDeploymentReconciler) StartDeleteOfResource(ctx context.Context) (
// Note: We requeue here because we've only changed the status and status updates don't trigger another reconcile
// because we use predicate.GenerationChangedPredicate and predicate.AnnotationChangedPredicate
// delete has started, check back to seen when the finalizer can be removed
r.log.V(3).Info("Resource deletion started")

// Normally don't need to set both of these fields but because retryAfter can be 0 we do
return ctrl.Result{Requeue: true, RequeueAfter: retryAfter}, nil
}
Expand All @@ -463,7 +459,7 @@ func (r *AzureDeploymentReconciler) StartDeleteOfResource(ctx context.Context) (
// the finalizer will be removed.
func (r *AzureDeploymentReconciler) MonitorDelete(ctx context.Context) (ctrl.Result, error) {
msg := "Continue monitoring deletion"
r.log.V(1).Info(msg)
r.log.V(Verbose).Info(msg)
r.recorder.Event(r.obj, v1.EventTypeNormal, string(DeleteActionMonitorDelete), msg)

resource, err := r.constructArmResource(ctx)
Expand All @@ -475,15 +471,15 @@ func (r *AzureDeploymentReconciler) MonitorDelete(ctx context.Context) (ctrl.Res
found, retryAfter, err := r.ARMClient.HeadResource(ctx, resource.GetID(), resource.Spec().GetAPIVersion())
if err != nil {
if retryAfter != 0 {
r.log.V(3).Info("Error performing HEAD on resource, will retry", "delaySec", retryAfter/time.Second)
r.log.V(Info).Info("Error performing HEAD on resource, will retry", "delaySec", retryAfter/time.Second)
return ctrl.Result{RequeueAfter: retryAfter}, nil
}

return ctrl.Result{}, errors.Wrap(err, "head resource")
}

if found {
r.log.V(1).Info("Found resource: continuing to wait for deletion...")
r.log.V(Verbose).Info("Found resource: continuing to wait for deletion...")
return ctrl.Result{Requeue: true}, nil
}

Expand All @@ -499,7 +495,7 @@ func (r *AzureDeploymentReconciler) CreateDeployment(ctx context.Context) (ctrl.
return ctrl.Result{}, err
}

r.log.Info("Starting new deployment to Azure", "action", string(CreateOrUpdateActionBeginDeployment))
r.log.V(Status).Info("Starting new deployment to Azure")

// Update our state and commit BEFORE creating the Azure deployment in case
// we're not operating on the latest version of the object and the CommitUpdate fails
Expand Down Expand Up @@ -546,7 +542,7 @@ func (r *AzureDeploymentReconciler) CreateDeployment(ctx context.Context) (ctrl.
// to know. This could happen if Azure is returning a weird code (bug in Azure?) or if our list
// of error classifications is incomplete.
if ready.Severity != conditions.ConditionSeverityError {
r.log.V(0).Info(
r.log.V(Status).Info(
"BadRequest was misclassified as non-fatal error. Correcting it. This could be because of a bug, please report it",
"condition", ready.String())
ready.Severity = conditions.ConditionSeverityError
Expand Down Expand Up @@ -575,7 +571,7 @@ func (r *AzureDeploymentReconciler) CreateDeployment(ctx context.Context) (ctrl.
}
}
} else {
r.log.Info("Created deployment in Azure", "id", deployment.ID)
r.log.V(Status).Info("Created deployment in Azure", "id", deployment.ID)
r.recorder.Eventf(r.obj, v1.EventTypeNormal, string(CreateOrUpdateActionBeginDeployment), "Created new deployment to Azure with ID %q", deployment.ID)
}

Expand Down Expand Up @@ -636,6 +632,9 @@ func (r *AzureDeploymentReconciler) MonitorDeployment(ctx context.Context) (ctrl
// If the deployment doesn't exist, clear our ID/Name and return so we can try again
var reqErr *autorestAzure.RequestError
if errors.As(err, &reqErr) && reqErr.StatusCode == http.StatusNotFound {
r.log.V(Info).Info(
"Deployment doesn't exist, clearing state and requeuing",
"id", deploymentID)
r.SetDeploymentID("")
r.SetDeploymentName("")
err = r.CommitUpdate(ctx)
Expand All @@ -650,14 +649,14 @@ func (r *AzureDeploymentReconciler) MonitorDeployment(ctx context.Context) (ctrl
}

if retryAfter != 0 {
r.log.V(3).Info("Error performing GET on deployment, will retry", "delaySec", retryAfter/time.Second)
r.log.V(Info).Info("Error performing GET on deployment, will retry", "delaySec", retryAfter/time.Second)
return ctrl.Result{RequeueAfter: retryAfter}, nil
}

return ctrl.Result{}, errors.Wrapf(err, "getting deployment %q from ARM", deploymentID)
}

r.log.V(4).Info(
r.log.V(Verbose).Info(
"Monitoring deployment",
"action", string(CreateOrUpdateActionMonitorDeployment),
"id", deploymentID,
Expand All @@ -670,12 +669,12 @@ func (r *AzureDeploymentReconciler) MonitorDeployment(ctx context.Context) (ctrl

// If the deployment isn't done yet, there's nothing to do just bail out
if !deployment.IsTerminalProvisioningState() {
r.log.V(3).Info("Deployment still running")
r.log.V(Verbose).Info("Deployment still running")
return ctrl.Result{Requeue: true, RequeueAfter: retryAfter}, nil
}

// The deployment is in a terminal state - let's handle it
r.log.Info(
r.log.V(Status).Info(
"Deployment in terminal state",
"DeploymentID", deployment.ID,
"State", deployment.ProvisioningStateOrUnknown(),
Expand All @@ -687,7 +686,7 @@ func (r *AzureDeploymentReconciler) MonitorDeployment(ctx context.Context) (ctrl
// because the Azure resource already exists). Since it's expected that this sequence of events is rare, we don't
// try to optimize for preventing it with some sort of two phase commit or anything.
// TODO: Create a unit test that forces this specific sequence of events
r.log.V(4).Info("Deleting deployment", "DeploymentID", deployment.ID)
r.log.V(Info).Info("Deleting deployment", "DeploymentID", deployment.ID)
_, err = r.ARMClient.DeleteDeployment(ctx, deployment.ID)
if err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed deleting deployment %q", deployment.ID)
Expand All @@ -700,7 +699,7 @@ func (r *AzureDeploymentReconciler) MonitorDeployment(ctx context.Context) (ctrl
}

func (r *AzureDeploymentReconciler) ManageOwnership(ctx context.Context) (ctrl.Result, error) {
r.log.V(1).Info("applying ownership", "action", CreateOrUpdateActionManageOwnership)
r.log.V(Info).Info("applying ownership", "action", CreateOrUpdateActionManageOwnership)
isOwnerReady, err := r.isOwnerReady(ctx)
if err != nil {
return ctrl.Result{}, err
Expand Down Expand Up @@ -760,13 +759,13 @@ func (r *AzureDeploymentReconciler) getStatus(ctx context.Context, id string) (g

// Get the resource
retryAfter, err := r.ARMClient.GetResource(ctx, id, deployableSpec.Spec().GetAPIVersion(), armStatus)
if r.log.V(4).Enabled() {
if r.log.V(Debug).Enabled() {
statusBytes, marshalErr := json.Marshal(armStatus)
if marshalErr != nil {
return nil, zeroDuration, errors.Wrapf(err, "serializing ARM status to JSON for debugging")
}

r.log.V(4).Info("Got ARM status", "status", string(statusBytes))
r.log.V(Debug).Info("Got ARM status", "status", string(statusBytes))
}

if err != nil {
Expand Down Expand Up @@ -838,12 +837,12 @@ func (r *AzureDeploymentReconciler) createDeployment(

// logObj logs the r.obj JSON payload
func (r *AzureDeploymentReconciler) logObj(note string) {
if r.log.V(4).Enabled() {
if r.log.V(Debug).Enabled() {
objJson, err := json.Marshal(r.obj)
if err != nil {
r.log.Error(err, "failed to JSON serialize obj for logging purposes")
} else {
r.log.V(4).Info(note, "resource", string(objJson))
r.log.V(Debug).Info(note, "resource", string(objJson))
}
}
}
Expand Down Expand Up @@ -883,7 +882,7 @@ func (r *AzureDeploymentReconciler) isOwnerReady(ctx context.Context) (bool, err
if err != nil {
var typedErr *genruntime.ReferenceNotFound
if errors.As(err, &typedErr) {
r.log.V(4).Info("Owner does not yet exist", "NamespacedName", typedErr.NamespacedName)
r.log.V(Info).Info("Owner does not yet exist", "NamespacedName", typedErr.NamespacedName)
return false, nil
}

Expand Down Expand Up @@ -913,7 +912,7 @@ func (r *AzureDeploymentReconciler) applyOwnership(ctx context.Context) error {
}

r.obj.SetOwnerReferences(ownerutil.EnsureOwnerRef(r.obj.GetOwnerReferences(), ownerRef))
r.log.V(4).Info("Set owner reference", "ownerGvk", ownerGvk, "ownerName", owner.GetName())
r.log.V(Info).Info("Set owner reference", "ownerGvk", ownerGvk, "ownerName", owner.GetName())
err = r.CommitUpdate(ctx)

if err != nil {
Expand All @@ -935,7 +934,7 @@ func (r *AzureDeploymentReconciler) deleteResourceSucceeded(ctx context.Context)
return err
}

r.log.V(0).Info("Deleted resource")
r.log.V(Status).Info("Deleted resource")
return nil
}

Expand Down