Skip to content

Commit

Permalink
pillar: configure GOGC based on pillar memory limit or global config
Browse files Browse the repository at this point in the history
Patch introduces two settings for Golang runtime which
impacts garbage collector behavior:

1. `gogc.memory.limit.bytes` provides the runtime with a soft memory
    limit.  The runtime undertakes several processes to try to respect
    this memory limit, including adjustments to the frequency of
    garbage collections and returning memory to the underlying system
    more aggressively. The Go API call is described here:
    https://pkg.go.dev/runtime/debug#SetMemoryLimit

    By default, EVE setting is disabled (set to 0), meaning the Golang
    runtime memory limit will be set according to the following
    equation based on the `memory.limit_in_bytes` hard memory limit
    provided by the pillar `cgroups`:

    `limit = memory.limit_in_bytes * 0.6`

    The constant 0.6 was chosen empirically and is explained by simple
    logic: `memory.limit_in_bytes` is a hard limit for the whole
    pillar cgroup, meaning when reached, likely one of the processes
    will be killed by OOM. In turn Golang runtime memory limit is a
    soft limit, so the difference must be significant to ensure that
    after the soft limit is reached, there will be enough memory for
    the Go garbage collector to do its job and, fortunately, not to
    hit the hard limit.

2. `gogc.percent` sets the garbage collection target percentage: a
    collection is triggered when the ratio of freshly allocated data
    to live data remaining after the previous collection reaches this
    percentage. The Go API call is described here:
    https://pkg.go.dev/runtime/debug#SetGCPercent

The patch is motivated by a frequently observed bloated `zedbox`
application (up to 500MB) that causes an OOM kill call to the /eve or
/pillar cgroups. It is assumed that the bloated `zedbox` application
is not caused by memory leaks, but by a delayed GC sweep cycle and a
unconditionally growing runtime heap size. An explicit memory limit
set for the Golang runtime (~400MB in the current version of EVE)
should make the GC more aggressive when the soft memory limit is hit,
which should result in a significant reduction in allocated but unused
memory.

Signed-off-by: Roman Penyaev <r.peniaev@gmail.com>
  • Loading branch information
rouming committed Sep 23, 2024
1 parent 6d403aa commit 40feb16
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 1 deletion.
12 changes: 12 additions & 0 deletions pkg/pillar/cmd/zedmanager/zedmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -1422,6 +1422,17 @@ func quantifyChanges(config types.AppInstanceConfig, oldConfig types.AppInstance
return needPurge, needRestart, purgeReason, restartReason
}

func configureGOGC(gcp *types.ConfigItemValueMap) {
lim := gcp.GlobalValueInt(types.GOGCMemoryLimitInBytes)
per := gcp.GlobalValueInt(types.GOGCPercent)
plim, pper, err := types.ConfigureGOGC(int64(lim), int(per))
if err != nil {
log.Warningf("configureGOGC: failed '%v'", err)
} else {
log.Functionf("configureGOGC: memory limit set to '%v' (previous '%v'), GC percent set to '%v' (previous '%v')", lim, plim, per, pper)
}
}

func handleGlobalConfigCreate(ctxArg interface{}, key string,
statusArg interface{}) {
handleGlobalConfigImpl(ctxArg, key, statusArg)
Expand All @@ -1447,6 +1458,7 @@ func handleGlobalConfigImpl(ctxArg interface{}, key string,
ctx.globalConfig = gcp
ctx.GCInitialized = true
}
configureGOGC(gcp)
log.Functionf("handleGlobalConfigImpl done for %s", key)
}

Expand Down
9 changes: 8 additions & 1 deletion pkg/pillar/types/global.go
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,10 @@ const (
EveMemoryLimitInBytes GlobalSettingKey = "memory.eve.limit.bytes"
// How much memory overhead is allowed for VMM needs
VmmMemoryLimitInMiB GlobalSettingKey = "memory.vmm.limit.MiB"
// GOGCMemoryLimitInBytes global setting key
GOGCMemoryLimitInBytes GlobalSettingKey = "gogc.memory.limit.bytes"
// GOGCPercent global setting key
GOGCPercent GlobalSettingKey = "gogc.percent"
// IgnoreMemoryCheckForApps global setting key
IgnoreMemoryCheckForApps GlobalSettingKey = "memory.apps.ignore.check"
// IgnoreDiskCheckForApps global setting key
Expand Down Expand Up @@ -892,7 +896,10 @@ func NewConfigItemSpecMap() ConfigItemSpecMap {
100*1024*1024, 0xFFFFFFFF)
configItemSpecMap.AddIntItem(StorageZfsReserved, 20, 1, 99)
configItemSpecMap.AddIntItem(ForceFallbackCounter, 0, 0, 0xFFFFFFFF)

// Default GOGC memory limit is 0
configItemSpecMap.AddIntItem(GOGCMemoryLimitInBytes, 0, 0, 0xFFFFFFFF)
// Default GOGC target percentage is 100, 0 means disable GC
configItemSpecMap.AddIntItem(GOGCPercent, 100, 0, 500)
configItemSpecMap.AddIntItem(EveMemoryLimitInBytes, uint32(eveMemoryLimitInBytes),
uint32(eveMemoryLimitInBytes), 0xFFFFFFFF)
// Limit manual vmm overhead override to 1 PiB
Expand Down
2 changes: 2 additions & 0 deletions pkg/pillar/types/global_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,8 @@ func TestNewConfigItemSpecMap(t *testing.T) {
ConsoleAccess,
VncShimVMAccess,
AllowAppVnc,
GOGCMemoryLimitInBytes,
GOGCPercent,
EveMemoryLimitInBytes,
VmmMemoryLimitInMiB,
IgnoreMemoryCheckForApps,
Expand Down
2 changes: 2 additions & 0 deletions pkg/pillar/types/locationconsts.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ const (
NewlogUploadAppDir = NewlogDir + "/appUpload"
// NewlogKeepSentQueueDir - a circular queue of gzip files already been sent
NewlogKeepSentQueueDir = NewlogDir + "/keepSentQueue"
// PillarHardMemoryLimitFile - hard memory reserved for pillar
PillarHardMemoryLimitFile = "/hostfs/sys/fs/cgroup/memory/eve/services/pillar/memory.limit_in_bytes"
// EveMemoryLimitFile - stores memory reserved for eve
EveMemoryLimitFile = "/hostfs/sys/fs/cgroup/memory/eve/memory.soft_limit_in_bytes"
// EveMemoryUsageFile - current usage
Expand Down
41 changes: 41 additions & 0 deletions pkg/pillar/types/memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,19 @@
package types

import (
"fmt"
"os"
"runtime/debug"
"strconv"
"strings"
)

// GetPillarHardMemoryLimitInBytes returns hard memory limit
// reserved for pillar in bytes
func GetPillarHardMemoryLimitInBytes() (uint64, error) {
return readUint64File(PillarHardMemoryLimitFile)
}

// GetEveMemoryLimitInBytes returns memory limit
// reserved for eve in bytes
func GetEveMemoryLimitInBytes() (uint64, error) {
Expand Down Expand Up @@ -42,3 +50,36 @@ func readUint64File(filename string) (uint64, error) {
dataUint64, err := strconv.ParseUint(dataString, 10, 64)
return dataUint64, err
}

// ConfigureGOGC sets two main configuration parameters for the
// garbage collector (GOGC): memory limit and percentage (see
// explanation here: https://tip.golang.org/doc/gc-guide).
// If limit is 0, create GOGC limit from the pillar cgroups hard
// memory limit.
func ConfigureGOGC(limit int64, percent int) (int64, int, error) {
if limit == 0 {
// Fallback to value from cgroups if no limit in the configuration
ulimit, err := GetPillarHardMemoryLimitInBytes()
if err != nil {
err := fmt.Errorf("can't receive pillar memory hard limit: '%w'", err)
return -1, -1, err
}
// Reduce actual memory limit to 0.6 of cgroup limit. The logic behind
// the constant is simple: cgroup limit is a hard limit for the whole
// pillar cgroup, meaning when reached, we are killed by OOM. In turn
// GOGC memory limit is a soft limit, so the difference must be
// significant to ensure that after the soft limit is reached, there
// will be enough memory for the GOGC to do its job and, fortunately,
// not to hit the hard limit.
limit = int64(ulimit) * 600 / 1000
}
if percent == 0 {
// Disable GC
percent = -1
}
// Set new and retrieve previous values
limit = debug.SetMemoryLimit(limit)
percent = debug.SetGCPercent(percent)

return limit, percent, nil
}

0 comments on commit 40feb16

Please sign in to comment.