Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add ignoreResourceUpdates to reduce controller CPU usage (#13534) #13912

Merged
merged 38 commits into from
Jun 25, 2023
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
bbfe176
feat: ignore watched resource update
agaudreault Jun 5, 2023
76ded23
add doc and CLI
agaudreault Jun 6, 2023
6cb5336
update doc index
agaudreault Jun 6, 2023
d059754
add command
agaudreault Jun 6, 2023
f9b02ea
Merge remote-tracking branch 'upstream/master' into reduce-object-rec…
agaudreault Jun 6, 2023
e0aa9b0
codegen
agaudreault Jun 6, 2023
b0b42cc
revert formatting
agaudreault Jun 6, 2023
c297319
do not skip on health change
agaudreault Jun 7, 2023
ef7d0f5
update doc
agaudreault Jun 7, 2023
ed10e74
Merge branch 'master' into reduce-object-reconcile
agaudreault Jun 7, 2023
e1d662b
Merge branch 'master' into reduce-object-reconcile
agaudreault Jun 12, 2023
704c880
Merge branch 'master' into reduce-object-reconcile
agaudreault Jun 14, 2023
11d6b9f
update logging to use context
agaudreault Jun 14, 2023
f5e5160
Merge branch 'reduce-object-reconcile' of github.com:agaudreault-jive…
agaudreault Jun 14, 2023
f7b6a04
fix typos. local build broken...
agaudreault Jun 15, 2023
5e0691d
Merge branch 'master' into reduce-object-reconcile
agaudreault Jun 20, 2023
89dca7d
change after review
agaudreault Jun 22, 2023
ce21a01
manifestHash to string
agaudreault Jun 22, 2023
6becaa1
more doc
agaudreault Jun 22, 2023
0621a5a
example for argoproj Application
agaudreault Jun 22, 2023
0c2eaca
add unit test for ignored logs
agaudreault Jun 22, 2023
95e8407
Merge remote-tracking branch 'upstream/master' into reduce-object-rec…
agaudreault Jun 22, 2023
b567f10
codegen
agaudreault Jun 22, 2023
7df1c2c
Update docs/operator-manual/reconcile.md
agaudreault Jun 22, 2023
2f3e31d
move hash and set log to debug
agaudreault Jun 22, 2023
1021114
Merge branch 'reduce-object-reconcile' of github.com:agaudreault-jive…
agaudreault Jun 22, 2023
4898bf7
Merge remote-tracking branch 'origin/master' into reduce-object-recon…
crenshaw-dev Jun 23, 2023
cc83ebc
Update util/settings/settings.go
agaudreault Jun 23, 2023
302e253
Update util/settings/settings.go
agaudreault Jun 23, 2023
1a28671
feature flag
crenshaw-dev Jun 23, 2023
1f8bc2f
fix
crenshaw-dev Jun 23, 2023
708ca5e
less aggressive managedFields ignore rule
crenshaw-dev Jun 23, 2023
b608ca7
Update docs/operator-manual/reconcile.md
crenshaw-dev Jun 23, 2023
e548727
use local settings
crenshaw-dev Jun 23, 2023
b5b502b
latest settings
crenshaw-dev Jun 23, 2023
9df66d9
safety first
crenshaw-dev Jun 23, 2023
ba0134d
Merge pull request #1 from crenshaw-dev/reduce-object-reconcile-flag
crenshaw-dev Jun 23, 2023
214cc53
since it's behind a feature flag, go aggressive on overrides
crenshaw-dev Jun 25, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions USERS.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ Currently, the following organizations are **officially** using Argo CD:
1. [Glovo](https://www.glovoapp.com)
1. [GMETRI](https://gmetri.com/)
1. [Gojek](https://www.gojek.io/)
1. [GoTo](https://www.goto.com/)
1. [GoTo Financial](https://gotofinancial.com/)
1. [Greenpass](https://www.greenpass.com.br/)
1. [Gridfuse](https://gridfuse.com/)
Expand Down
3 changes: 3 additions & 0 deletions assets/swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -8003,6 +8003,9 @@
"ignoreDifferences": {
"$ref": "#/definitions/v1alpha1OverrideIgnoreDiff"
},
"ignoreResourceUpdates": {
"$ref": "#/definitions/v1alpha1OverrideIgnoreDiff"
},
"knownTypeFields": {
"type": "array",
"items": {
Expand Down
72 changes: 72 additions & 0 deletions cmd/argocd/commands/admin/settings.go
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,7 @@ func NewResourceOverridesCommand(cmdCtx commandContext) *cobra.Command {
},
}
command.AddCommand(NewResourceIgnoreDifferencesCommand(cmdCtx))
command.AddCommand(NewResourceIgnoreResourceUpdatesCommand(cmdCtx))
command.AddCommand(NewResourceActionListCommand(cmdCtx))
command.AddCommand(NewResourceActionRunCommand(cmdCtx))
command.AddCommand(NewResourceHealthCommand(cmdCtx))
Expand Down Expand Up @@ -380,6 +381,31 @@ func executeResourceOverrideCommand(ctx context.Context, cmdCtx commandContext,
callback(res, override, overrides)
}

func executeIgnoreResourceUpdatesOverrideCommand(ctx context.Context, cmdCtx commandContext, args []string, callback func(res unstructured.Unstructured, override v1alpha1.ResourceOverride, overrides map[string]v1alpha1.ResourceOverride)) {
data, err := os.ReadFile(args[0])
errors.CheckError(err)

res := unstructured.Unstructured{}
errors.CheckError(yaml.Unmarshal(data, &res))

settingsManager, err := cmdCtx.createSettingsManager(ctx)
errors.CheckError(err)

overrides, err := settingsManager.GetIgnoreResourceUpdatesOverrides()
errors.CheckError(err)
gvk := res.GroupVersionKind()
key := gvk.Kind
if gvk.Group != "" {
key = fmt.Sprintf("%s/%s", gvk.Group, gvk.Kind)
}
override, hasOverride := overrides[key]
if !hasOverride {
_, _ = fmt.Printf("No overrides configured for '%s/%s'\n", gvk.Group, gvk.Kind)
return
}
callback(res, override, overrides)
}

func NewResourceIgnoreDifferencesCommand(cmdCtx commandContext) *cobra.Command {
var command = &cobra.Command{
Use: "ignore-differences RESOURCE_YAML_PATH",
Expand Down Expand Up @@ -430,6 +456,52 @@ argocd admin settings resource-overrides ignore-differences ./deploy.yaml --argo
return command
}

func NewResourceIgnoreResourceUpdatesCommand(cmdCtx commandContext) *cobra.Command {
var command = &cobra.Command{
Use: "ignore-resource-updates RESOURCE_YAML_PATH",
Short: "Renders fields excluded from resource updates",
Long: "Renders ignored fields using the 'ignoreResourceUpdates' setting specified in the 'resource.customizations' field of 'argocd-cm' ConfigMap",
Example: `
argocd admin settings resource-overrides ignore-resource-updates ./deploy.yaml --argocd-cm-path ./argocd-cm.yaml`,
Run: func(c *cobra.Command, args []string) {
ctx := c.Context()

if len(args) < 1 {
c.HelpFunc()(c, args)
os.Exit(1)
}

executeIgnoreResourceUpdatesOverrideCommand(ctx, cmdCtx, args, func(res unstructured.Unstructured, override v1alpha1.ResourceOverride, overrides map[string]v1alpha1.ResourceOverride) {
gvk := res.GroupVersionKind()
if len(override.IgnoreResourceUpdates.JSONPointers) == 0 && len(override.IgnoreResourceUpdates.JQPathExpressions) == 0 {
_, _ = fmt.Printf("Ignore resource updates are not configured for '%s/%s'\n", gvk.Group, gvk.Kind)
return
}

normalizer, err := normalizers.NewIgnoreNormalizer(nil, overrides)
errors.CheckError(err)

normalizedRes := res.DeepCopy()
logs := collectLogs(func() {
errors.CheckError(normalizer.Normalize(normalizedRes))
})
if logs != "" {
_, _ = fmt.Println(logs)
}

if reflect.DeepEqual(&res, normalizedRes) {
_, _ = fmt.Printf("No fields are ignored by ignoreResourceUpdates settings: \n%s\n", override.IgnoreResourceUpdates)
return
}

_, _ = fmt.Printf("Following fields are ignored:\n\n")
_ = cli.PrintDiff(res.GetName(), &res, normalizedRes)
})
},
}
return command
}

func NewResourceHealthCommand(cmdCtx commandContext) *cobra.Command {
var command = &cobra.Command{
Use: "health RESOURCE_YAML_PATH",
Expand Down
23 changes: 12 additions & 11 deletions controller/appcontroller.go
Original file line number Diff line number Diff line change
Expand Up @@ -359,17 +359,18 @@ func (ctrl *ApplicationController) handleObjectUpdated(managedByApp map[string]b
level = CompareWithRecent
}

// Additional check for debug level so we don't need to evaluate the
// format string in case of non-debug scenarios
if log.GetLevel() >= log.DebugLevel {
var resKey string
if ref.Namespace != "" {
resKey = ref.Namespace + "/" + ref.Name
} else {
resKey = "(cluster-scoped)/" + ref.Name
}
log.Debugf("Refreshing app %s for change in cluster of object %s of type %s/%s", appKey, resKey, ref.APIVersion, ref.Kind)
}
namespace := ref.Namespace
if ref.Namespace == "" {
namespace = "(cluster-scoped)"
}
log.WithFields(log.Fields{
"application": appKey,
"level": level,
"namespace": namespace,
"name": ref.Name,
"api-version": ref.APIVersion,
"kind": ref.Kind,
crenshaw-dev marked this conversation as resolved.
Show resolved Hide resolved
}).Debug("Requesting app refresh caused by object update")

ctrl.requestAppRefresh(app.QualifiedName(), &level, nil)
}
Expand Down
63 changes: 62 additions & 1 deletion controller/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"k8s.io/client-go/tools/cache"

"github.com/argoproj/argo-cd/v2/controller/metrics"
"github.com/argoproj/argo-cd/v2/pkg/apis/application"
appv1 "github.com/argoproj/argo-cd/v2/pkg/apis/application/v1alpha1"
"github.com/argoproj/argo-cd/v2/util/argo"
"github.com/argoproj/argo-cd/v2/util/db"
Expand Down Expand Up @@ -149,6 +150,8 @@ type ResourceInfo struct {
PodInfo *PodInfo
// NodeInfo is available for nodes only
NodeInfo *NodeInfo

manifestHash string
}

func NewLiveStateCache(
Expand Down Expand Up @@ -178,6 +181,8 @@ type cacheSettings struct {
clusterSettings clustercache.Settings
appInstanceLabelKey string
trackingMethod appv1.TrackingMethod
// resourceOverrides provides a list of ignored differences to ignore watched resource updates
resourceOverrides map[string]appv1.ResourceOverride
}

type liveStateCache struct {
Expand All @@ -200,6 +205,10 @@ func (c *liveStateCache) loadCacheSettings() (*cacheSettings, error) {
if err != nil {
return nil, err
}
resourceUpdatesOverrides, err := c.settingsMgr.GetIgnoreResourceUpdatesOverrides()
if err != nil {
return nil, err
agaudreault marked this conversation as resolved.
Show resolved Hide resolved
}
resourcesFilter, err := c.settingsMgr.GetResourcesFilter()
if err != nil {
return nil, err
Expand All @@ -212,7 +221,8 @@ func (c *liveStateCache) loadCacheSettings() (*cacheSettings, error) {
ResourceHealthOverride: lua.ResourceHealthOverrides(resourceOverrides),
ResourcesFilter: resourcesFilter,
}
return &cacheSettings{clusterSettings, appInstanceLabelKey, argo.GetTrackingMethod(c.settingsMgr)}, nil

return &cacheSettings{clusterSettings, appInstanceLabelKey, argo.GetTrackingMethod(c.settingsMgr), resourceUpdatesOverrides}, nil
}

func asResourceNode(r *clustercache.Resource) appv1.ResourceNode {
Expand Down Expand Up @@ -309,6 +319,27 @@ func skipAppRequeuing(key kube.ResourceKey) bool {
return ignoredRefreshResources[key.Group+"/"+key.Kind]
}

func skipResourceUpdate(oldInfo, newInfo *ResourceInfo) bool {
if oldInfo == nil || newInfo == nil {
return false
}
isSameHealthStatus := (oldInfo.Health == nil && newInfo.Health == nil) || oldInfo.Health != nil && newInfo.Health != nil && oldInfo.Health.Status == newInfo.Health.Status
isSameManifest := oldInfo.manifestHash != "" && newInfo.manifestHash != "" && oldInfo.manifestHash == newInfo.manifestHash
return isSameHealthStatus && isSameManifest
}

// shouldHashManifest validates if the API resource needs to be hashed.
// If there's an app name from resource tracking, or if this is itself an app, we should generate a hash.
// Otherwise, the hashing should be skipped to save CPU time.
func shouldHashManifest(appName string, gvk schema.GroupVersionKind) bool {
// Only hash if the resource belongs to an app.
// Best - Only hash for resources that are part of an app or their dependencies
// (current) - Only hash for resources that are part of an app + all apps that might be from an ApplicationSet
// Orphan - If orphan is enabled, hash should be made on all resource of that namespace and a config to disable it
// Worst - Hash all resources watched by Argo
return appName != "" || (gvk.Group == application.Group && gvk.Kind == application.ApplicationKind)
agaudreault marked this conversation as resolved.
Show resolved Hide resolved
}

// isRetryableError is a helper method to see whether an error
// returned from the dynamic client is potentially retryable.
func isRetryableError(err error) bool {
Expand Down Expand Up @@ -424,6 +455,7 @@ func (c *liveStateCache) getCluster(server string) (clustercache.ClusterCache, e
c.lock.RLock()
cacheSettings := c.cacheSettings
c.lock.RUnlock()

res.Health, _ = health.GetResourceHealth(un, cacheSettings.clusterSettings.ResourceHealthOverride)

appName := c.resourceTracking.GetAppName(un, cacheSettings.appInstanceLabelKey, cacheSettings.trackingMethod)
Expand All @@ -432,6 +464,15 @@ func (c *liveStateCache) getCluster(server string) (clustercache.ClusterCache, e
}
gvk := un.GroupVersionKind()

if shouldHashManifest(appName, gvk) {
hash, err := generateManifestHash(un, nil, cacheSettings.resourceOverrides)
if err != nil {
log.Errorf("Failed to generate manifest hash: %v", err)
} else {
res.manifestHash = hash
}
}

// edge case. we do not label CRDs, so they miss the tracking label we inject. But we still
// want the full resource to be available in our cache (to diff), so we store all CRDs
return res, res.AppName != "" || gvk.Kind == kube.CustomResourceDefinitionKind
Expand All @@ -450,6 +491,26 @@ func (c *liveStateCache) getCluster(server string) (clustercache.ClusterCache, e
} else {
ref = oldRes.Ref
}

if oldRes != nil && newRes != nil && skipResourceUpdate(resInfo(oldRes), resInfo(newRes)) {
// Additional check for debug level so we don't need to evaluate the
// format string in case of non-debug scenarios
if log.GetLevel() >= log.DebugLevel {
namespace := ref.Namespace
if ref.Namespace == "" {
namespace = "(cluster-scoped)"
}
log.WithFields(log.Fields{
"server": clusterCache.GetClusterInfo().Server,
"namespace": namespace,
"name": ref.Name,
"api-version": ref.APIVersion,
"kind": ref.Kind,
agaudreault marked this conversation as resolved.
Show resolved Hide resolved
}).Debug("Ignoring change of object because none of the watched resource fields have changed")
}
return
}

for _, r := range []*clustercache.Resource{newRes, oldRes} {
if r == nil {
continue
Expand Down
Loading