Skip to content

Commit

Permalink
Merge pull request #631 from l1b0k/fix/sysconf
Browse files Browse the repository at this point in the history
fix: sysconf may overrived
  • Loading branch information
BSWANG authored May 16, 2024
2 parents 9b7c916 + cd8201b commit 47301e6
Show file tree
Hide file tree
Showing 2 changed files with 209 additions and 1 deletion.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ARG TERWAY_POLICY_IMAGE=registry-cn-zhangjiakou.ack.aliyuncs.com/acs/terway:policy-f37c94d0@sha256:703688a58f1907e049a146eb44ca6af9fce8f1f20c3b0a048ef3bfcc081f0d91
ARG TERWAY_POLICY_IMAGE=registry-cn-zhangjiakou.ack.aliyuncs.com/acs/terway:policy-5a5ad9f7@sha256:2d1c55d6cc601b8e770221ff07d9d8466a7ed7f2c1ae175cf751485858226e8c
ARG UBUNTU_IMAGE=registry.cn-hangzhou.aliyuncs.com/acs/ubuntu:22.04-update
ARG CILIUM_LLVM_IMAGE=quay.io/cilium/cilium-llvm:547db7ec9a750b8f888a506709adb41f135b952e@sha256:4d6fa0aede3556c5fb5a9c71bc6b9585475ac9b1064f516d4c45c8fb691c9d9e
ARG CILIUM_BPFTOOL_IMAGE=quay.io/cilium/cilium-bpftool:78448c1a37ff2b790d5e25c3d8b8ec3e96e6405f@sha256:99a9453a921a8de99899ef82e0822f0c03f65d97005c064e231c06247ad8597d
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: ArthurChiao <arthurchiao@hotmail.com>
Date: Thu, 1 Dec 2022 12:33:39 +0800
Subject: pkg/bandwidth: enlarge sysctl settings only if they are below
baseline

On bandwidth manager enabled, several sysctl parameters of the node
will be overwritten by Cilium's baseline ones on agent start.

Problem arises when baseline values are smaller than the current system
settings. Such as, a node has a deliberately tuned setting
netdev_max_backlog=8000, but bandwidth manager will overwrite it with
its beseline value 1000, which may lead to packet drop issues in high
throughput cases that the node is targeted for.

This patch fixes that problem by inspecting the current settings, and
only overwrite them when they are below our baseline.

Fixes: #20878

Signed-off-by: ArthurChiao <arthurchiao@hotmail.com>
Signed-off-by: l1b0k <libokang.dev@gmail.com>
---
pkg/bandwidth/bandwidth.go | 107 ++++++++++++++++++++++++++++---------
pkg/sysctl/sysctl.go | 21 ++++++++
2 files changed, 102 insertions(+), 26 deletions(-)

diff --git a/pkg/bandwidth/bandwidth.go b/pkg/bandwidth/bandwidth.go
index b01c94930e..06974a2868 100644
--- a/pkg/bandwidth/bandwidth.go
+++ b/pkg/bandwidth/bandwidth.go
@@ -4,6 +4,8 @@
package bandwidth

import (
+ "fmt"
+
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"
"k8s.io/apimachinery/pkg/api/resource"
@@ -82,6 +84,82 @@ func ProbeBandwidthManager() {
}
}

+func setBaselineSysctls() error {
+ // Ensure interger type sysctls are no smaller than our baseline settings
+ baseIntSettings := map[string]int64{
+ "net.core.netdev_max_backlog": 1000,
+ "net.core.somaxconn": 4096,
+ "net.ipv4.tcp_max_syn_backlog": 4096,
+ }
+
+ for name, value := range baseIntSettings {
+ currentValue, err := sysctl.ReadInt(name)
+ if err != nil {
+ return fmt.Errorf("read sysctl %s failed: %s", name, err)
+ }
+
+ scopedLog := log.WithFields(logrus.Fields{
+ logfields.SysParamName: name,
+ logfields.SysParamValue: currentValue,
+ "baselineValue": value,
+ })
+
+ if currentValue >= value {
+ scopedLog.Info("Skip setting sysctl as it already meets baseline")
+ continue
+ }
+
+ scopedLog.Info("Setting sysctl to baseline for BPF bandwidth manager")
+ if err := sysctl.WriteInt(name, value); err != nil {
+ return fmt.Errorf("set sysctl %s=%d failed: %s", name, value, err)
+ }
+ }
+
+ // Ensure string type sysctls
+ congctl := "cubic"
+ if option.Config.EnableBBR {
+ congctl = "bbr"
+ }
+
+ baseStringSettings := map[string]string{
+ "net.core.default_qdisc": "fq",
+ "net.ipv4.tcp_congestion_control": congctl,
+ }
+
+ for name, value := range baseStringSettings {
+ log.WithFields(logrus.Fields{
+ logfields.SysParamName: name,
+ "baselineValue": value,
+ }).Info("Setting sysctl to baseline for BPF bandwidth manager")
+
+ if err := sysctl.Write(name, value); err != nil {
+ return fmt.Errorf("set sysctl %s=%s failed: %s", name, value, err)
+ }
+ }
+
+ // Extra settings
+ extraSettings := map[string]int64{
+ "net.ipv4.tcp_slow_start_after_idle": 0,
+ }
+
+ // Few extra knobs which can be turned on along with pacing. EnableBBR
+ // also provides the right kernel dependency implicitly as well.
+ if option.Config.EnableBBR {
+ for name, value := range extraSettings {
+ log.WithFields(logrus.Fields{
+ logfields.SysParamName: name,
+ "baselineValue": value,
+ }).Info("Setting sysctl to baseline for BPF bandwidth manager")
+
+ if err := sysctl.WriteInt(name, value); err != nil {
+ return fmt.Errorf("set sysctl %s=%d failed: %s", name, value, err)
+ }
+ }
+ }
+
+ return nil
+}
+
func InitBandwidthManager() {
if option.Config.DryMode || !option.Config.EnableBandwidthManager {
return
@@ -103,32 +181,9 @@ func InitBandwidthManager() {
if _, err := bwmap.ThrottleMap.OpenOrCreate(); err != nil {
log.WithError(err).Fatal("Failed to access ThrottleMap")
}
- congctl := "cubic"
- if option.Config.EnableBBR {
- congctl = "bbr"
- }
- type setting struct {
- name string
- val string
- }
- baseSettings := []setting{
- {"net.core.netdev_max_backlog", "1000"},
- {"net.core.somaxconn", "4096"},
- {"net.core.default_qdisc", "fq"},
- {"net.ipv4.tcp_max_syn_backlog", "4096"},
- {"net.ipv4.tcp_congestion_control", congctl},
- }
- for _, s := range baseSettings {
- log.WithFields(logrus.Fields{
- logfields.SysParamName: s.name,
- logfields.SysParamValue: s.val,
- }).Info("Setting sysctl")
- if err := sysctl.Write(s.name, s.val); err != nil {
- log.WithError(err).WithFields(logrus.Fields{
- logfields.SysParamName: s.name,
- logfields.SysParamValue: s.val,
- }).Fatal("Failed to set sysctl needed by BPF bandwidth manager.")
- }
+
+ if err := setBaselineSysctls(); err != nil {
+ log.WithError(err).Fatal("Failed to set sysctl needed by BPF bandwidth manager.")
}
return
for _, device := range option.Config.GetDevices() {
diff --git a/pkg/sysctl/sysctl.go b/pkg/sysctl/sysctl.go
index 77d072b145..ea5bee3b03 100644
--- a/pkg/sysctl/sysctl.go
+++ b/pkg/sysctl/sysctl.go
@@ -10,6 +10,7 @@ import (
"os"
"path/filepath"
"regexp"
+ "strconv"
"strings"

"github.com/sirupsen/logrus"
@@ -120,6 +121,11 @@ func Write(name string, val string) error {
return writeSysctl(name, val)
}

+// WriteInt writes the given integer type sysctl parameter.
+func WriteInt(name string, val int64) error {
+ return writeSysctl(name, strconv.FormatInt(val, 10))
+}
+
// Read reads the given sysctl parameter.
func Read(name string) (string, error) {
path, err := parameterPath(name)
@@ -134,6 +140,21 @@ func Read(name string) (string, error) {
return strings.TrimRight(string(val), "\n"), nil
}

+// ReadInt reads the given sysctl parameter, return an int64 value.
+func ReadInt(name string) (int64, error) {
+ s, err := Read(name)
+ if err != nil {
+ return -1, err
+ }
+
+ i, err := strconv.ParseInt(s, 10, 64)
+ if err != nil {
+ return -1, err
+ }
+
+ return i, nil
+}
+
// ApplySettings applies all settings in sysSettings.
func ApplySettings(sysSettings []Setting) error {
for _, s := range sysSettings {
--
2.44.0

0 comments on commit 47301e6

Please sign in to comment.