Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow configuring nginx worker reload behaviour, to prevent multiple concurrent worker reloads which can lead to high resource usage and OOMKill #10884

Merged
1 change: 1 addition & 0 deletions charts/ingress-nginx/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,7 @@ As of version `1.26.0` of this chart, by simply not providing any clusterIP valu
| controller.electionTTL | string | `""` | Duration a leader election is valid before it's getting re-elected, e.g. `15s`, `10m` or `1h`. (Default: 30s) |
| controller.enableAnnotationValidations | bool | `false` | |
| controller.enableMimalloc | bool | `true` | Enable mimalloc as a drop-in replacement for malloc. # ref: https://github.com/microsoft/mimalloc # |
| controller.enableSerialReloads | bool | `false` | This configuration defines if NGINX workers should reload serially instead of concurrently when multiple changes that require reloads are queued |
| controller.enableTopologyAwareRouting | bool | `false` | This configuration enables Topology Aware Routing feature, used together with service annotation service.kubernetes.io/topology-mode="auto" Defaults to false |
| controller.existingPsp | string | `""` | Use an existing PSP instead of creating one |
| controller.extraArgs | object | `{}` | Additional command line arguments to pass to Ingress-Nginx Controller E.g. to specify the default SSL certificate you can use |
Expand Down
1 change: 1 addition & 0 deletions charts/ingress-nginx/templates/controller-configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ metadata:
namespace: {{ include "ingress-nginx.namespace" . }}
data:
allow-snippet-annotations: "{{ .Values.controller.allowSnippetAnnotations }}"
enable-serial-reloads: "{{ .Values.controller.enableSerialReloads }}"
{{- if .Values.controller.addHeaders }}
add-headers: {{ include "ingress-nginx.namespace" . }}/{{ include "ingress-nginx.fullname" . }}-custom-add-headers
{{- end }}
Expand Down
2 changes: 2 additions & 0 deletions charts/ingress-nginx/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ controller:
# when users add those annotations.
# Global snippets in ConfigMap are still respected
allowSnippetAnnotations: false
# -- This configuration defines if NGINX workers should reload serially instead of concurrently when multiple changes that require reloads are queued
enableSerialReloads: false
rsafonseca marked this conversation as resolved.
Show resolved Hide resolved
# -- Required for use with CNI based kubernetes installations (such as ones set up by kubeadm),
# since CNI and hostport don't mix yet. Can be deprecated once https://github.com/kubernetes/kubernetes/issues/23920
# is merged
Expand Down
1 change: 1 addition & 0 deletions docs/user-guide/nginx-configuration/configmap.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ The following table shows a configuration option's name, type, and the default v
|[worker-processes](#worker-processes)| string | `<Number of CPUs>` ||
|[worker-cpu-affinity](#worker-cpu-affinity)| string | "" ||
|[worker-shutdown-timeout](#worker-shutdown-timeout)| string | "240s" ||
|[enable-serial-reloads](#enable-serial-reloads)|bool|"false"||
|[load-balance](#load-balance)| string | "round_robin" ||
|[variables-hash-bucket-size](#variables-hash-bucket-size)| int | 128 ||
|[variables-hash-max-size](#variables-hash-max-size)| int | 2048 ||
Expand Down
8 changes: 8 additions & 0 deletions internal/ingress/controller/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,13 @@ type Configuration struct {
// http://nginx.org/en/docs/ngx_core_module.html#worker_processes
WorkerProcesses string `json:"worker-processes,omitempty"`

// Defines whether multiple concurrent reloads of worker processes should occur.
// Set this to false to prevent more than n x 2 workers to exist at any time, to avoid potential OOM situations and high CPU load
// With this setting on false, configuration changes in the queue will be re-queued with an exponential backoff, until the number of worker process is the expected value.
// By default new worker processes are spawned every time there's a change that cannot be applied dynamically with no upper limit to the number of running workers
// http://nginx.org/en/docs/ngx_core_module.html#worker_processes
WorkerSerialReloads bool `json:"enable-serial-reloads,omitempty"`

// Defines a timeout for a graceful shutdown of worker processes
// http://nginx.org/en/docs/ngx_core_module.html#worker_shutdown_timeout
WorkerShutdownTimeout string `json:"worker-shutdown-timeout,omitempty"`
Expand Down Expand Up @@ -851,6 +858,7 @@ func NewDefault() Configuration {
UseGeoIP2: false,
GeoIP2AutoReloadMinutes: 0,
WorkerProcesses: strconv.Itoa(runtime.NumCPU()),
WorkerSerialReloads: false,
WorkerShutdownTimeout: "240s",
VariablesHashBucketSize: 256,
VariablesHashMaxSize: 2048,
Expand Down
47 changes: 44 additions & 3 deletions internal/ingress/controller/nginx.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import (
"syscall"
"text/template"
"time"
"unicode"

proxyproto "github.com/armon/go-proxyproto"
"github.com/eapache/channels"
Expand Down Expand Up @@ -87,9 +88,10 @@ func NewNGINXController(config *Configuration, mc metric.Collector) *NGINXContro
n := &NGINXController{
isIPV6Enabled: ing_net.IsIPv6Enabled(),

resolver: h,
cfg: config,
syncRateLimiter: flowcontrol.NewTokenBucketRateLimiter(config.SyncRateLimit, 1),
resolver: h,
cfg: config,
syncRateLimiter: flowcontrol.NewTokenBucketRateLimiter(config.SyncRateLimit, 1),
workersReloading: false,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't the default be set to config.WorkerSerialReloads

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No. This is a state flag, not a config flag

Copy link
Contributor Author

@rsafonseca rsafonseca Mar 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The workersReloading flag controls the state of whether there is a reload going on, or not, and then if WorkerSerialReloads is enabled, and workersReloading is true, it will not start a new reload, but rather re-queue it, but if it is false, it will proceed with the reload as normal. From the start, there is no reload happening so this is set to false.


recorder: eventBroadcaster.NewRecorder(scheme.Scheme, apiv1.EventSource{
Component: "nginx-ingress-controller",
Expand Down Expand Up @@ -229,6 +231,8 @@ type NGINXController struct {

syncRateLimiter flowcontrol.RateLimiter

workersReloading bool

// stopLock is used to enforce that only a single call to Stop send at
// a given time. We allow stopping through an HTTP endpoint and
// allowing concurrent stoppers leads to stack traces.
Expand Down Expand Up @@ -673,6 +677,11 @@ Error: %v
//
//nolint:gocritic // the cfg shouldn't be changed, and shouldn't be mutated by other processes while being rendered.
func (n *NGINXController) OnUpdate(ingressCfg ingress.Configuration) error {
workerSerialReloads := n.store.GetBackendConfiguration().WorkerSerialReloads
if workerSerialReloads && n.workersReloading {
return errors.New("worker reload already in progress, requeuing reload")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this be an error, or just a warning?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The idea of returning an error is to pick up on the existing behaviour on the queue, which will re-queue in case of error here

if err := t.sync(key); err != nil {

}

cfg := n.store.GetBackendConfiguration()
rsafonseca marked this conversation as resolved.
Show resolved Hide resolved
cfg.Resolver = n.resolver

Expand Down Expand Up @@ -738,9 +747,41 @@ func (n *NGINXController) OnUpdate(ingressCfg ingress.Configuration) error {
return fmt.Errorf("%v\n%v", err, string(o))
}

// Reload status checking runs in a separate goroutine to avoid blocking the sync queue
if workerSerialReloads {
go n.awaitWorkersReload()
}

return nil
}

// awaitWorkersReload checks if the number of workers has returned to the expected count
func (n *NGINXController) awaitWorkersReload() {
n.workersReloading = true
defer func() { n.workersReloading = false }()

expectedWorkers := n.store.GetBackendConfiguration().WorkerProcesses
var numWorkers string
klog.V(3).Infof("waiting for worker count to be equal to %s", expectedWorkers)
for numWorkers != expectedWorkers {
time.Sleep(time.Second)
o, err := exec.Command("/bin/sh", "-c", "pgrep worker | wc -l").Output()
rsafonseca marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
klog.ErrorS(err, numWorkers)
return
}
// cleanup any non-printable chars from shell output
numWorkers = strings.Map(func(r rune) rune {
if unicode.IsPrint(r) {
return r
}
return -1
}, string(o))

klog.V(3).Infof("Currently running nginx worker processes: %s, expected %s", numWorkers, expectedWorkers)
}
}

// nginxHashBucketSize computes the correct NGINX hash_bucket_size for a hash
// with the given longest key.
func nginxHashBucketSize(longestString int) int {
Expand Down
12 changes: 12 additions & 0 deletions internal/ingress/controller/template/configmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ const (
luaSharedDictsKey = "lua-shared-dicts"
plugins = "plugins"
debugConnections = "debug-connections"
workerSerialReloads = "enable-serial-reloads"
)

var (
Expand Down Expand Up @@ -404,6 +405,17 @@ func ReadConfig(src map[string]string) config.Configuration {
delete(conf, workerProcesses)
}

if val, ok := conf[workerSerialReloads]; ok {
boolVal, err := strconv.ParseBool(val)
if err != nil {
to.WorkerSerialReloads = false
klog.Warningf("failed to parse enable-serial-reloads setting, valid values are true or false, found %s", val)
} else {
to.WorkerSerialReloads = boolVal
}
delete(conf, workerSerialReloads)
}

if val, ok := conf[plugins]; ok {
to.Plugins = splitAndTrimSpace(val, ",")
delete(conf, plugins)
Expand Down