-
Notifications
You must be signed in to change notification settings - Fork 153
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #631 from l1b0k/fix/sysconf
fix: sysconf may overrived
- Loading branch information
Showing
2 changed files
with
209 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
208 changes: 208 additions & 0 deletions
208
policy/cilium/0027-pkg-bandwidth-enlarge-sysctl-settings-only-if-they-a.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,208 @@ | ||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 | ||
From: ArthurChiao <arthurchiao@hotmail.com> | ||
Date: Thu, 1 Dec 2022 12:33:39 +0800 | ||
Subject: pkg/bandwidth: enlarge sysctl settings only if they are below | ||
baseline | ||
|
||
On bandwidth manager enabled, several sysctl parameters of the node | ||
will be overwritten by Cilium's baseline ones on agent start. | ||
|
||
Problem arises when baseline values are smaller than the current system | ||
settings. Such as, a node has a deliberately tuned setting | ||
netdev_max_backlog=8000, but bandwidth manager will overwrite it with | ||
its beseline value 1000, which may lead to packet drop issues in high | ||
throughput cases that the node is targeted for. | ||
|
||
This patch fixes that problem by inspecting the current settings, and | ||
only overwrite them when they are below our baseline. | ||
|
||
Fixes: #20878 | ||
|
||
Signed-off-by: ArthurChiao <arthurchiao@hotmail.com> | ||
Signed-off-by: l1b0k <libokang.dev@gmail.com> | ||
--- | ||
pkg/bandwidth/bandwidth.go | 107 ++++++++++++++++++++++++++++--------- | ||
pkg/sysctl/sysctl.go | 21 ++++++++ | ||
2 files changed, 102 insertions(+), 26 deletions(-) | ||
|
||
diff --git a/pkg/bandwidth/bandwidth.go b/pkg/bandwidth/bandwidth.go | ||
index b01c94930e..06974a2868 100644 | ||
--- a/pkg/bandwidth/bandwidth.go | ||
+++ b/pkg/bandwidth/bandwidth.go | ||
@@ -4,6 +4,8 @@ | ||
package bandwidth | ||
|
||
import ( | ||
+ "fmt" | ||
+ | ||
"github.com/sirupsen/logrus" | ||
"github.com/vishvananda/netlink" | ||
"k8s.io/apimachinery/pkg/api/resource" | ||
@@ -82,6 +84,82 @@ func ProbeBandwidthManager() { | ||
} | ||
} | ||
|
||
+func setBaselineSysctls() error { | ||
+ // Ensure interger type sysctls are no smaller than our baseline settings | ||
+ baseIntSettings := map[string]int64{ | ||
+ "net.core.netdev_max_backlog": 1000, | ||
+ "net.core.somaxconn": 4096, | ||
+ "net.ipv4.tcp_max_syn_backlog": 4096, | ||
+ } | ||
+ | ||
+ for name, value := range baseIntSettings { | ||
+ currentValue, err := sysctl.ReadInt(name) | ||
+ if err != nil { | ||
+ return fmt.Errorf("read sysctl %s failed: %s", name, err) | ||
+ } | ||
+ | ||
+ scopedLog := log.WithFields(logrus.Fields{ | ||
+ logfields.SysParamName: name, | ||
+ logfields.SysParamValue: currentValue, | ||
+ "baselineValue": value, | ||
+ }) | ||
+ | ||
+ if currentValue >= value { | ||
+ scopedLog.Info("Skip setting sysctl as it already meets baseline") | ||
+ continue | ||
+ } | ||
+ | ||
+ scopedLog.Info("Setting sysctl to baseline for BPF bandwidth manager") | ||
+ if err := sysctl.WriteInt(name, value); err != nil { | ||
+ return fmt.Errorf("set sysctl %s=%d failed: %s", name, value, err) | ||
+ } | ||
+ } | ||
+ | ||
+ // Ensure string type sysctls | ||
+ congctl := "cubic" | ||
+ if option.Config.EnableBBR { | ||
+ congctl = "bbr" | ||
+ } | ||
+ | ||
+ baseStringSettings := map[string]string{ | ||
+ "net.core.default_qdisc": "fq", | ||
+ "net.ipv4.tcp_congestion_control": congctl, | ||
+ } | ||
+ | ||
+ for name, value := range baseStringSettings { | ||
+ log.WithFields(logrus.Fields{ | ||
+ logfields.SysParamName: name, | ||
+ "baselineValue": value, | ||
+ }).Info("Setting sysctl to baseline for BPF bandwidth manager") | ||
+ | ||
+ if err := sysctl.Write(name, value); err != nil { | ||
+ return fmt.Errorf("set sysctl %s=%s failed: %s", name, value, err) | ||
+ } | ||
+ } | ||
+ | ||
+ // Extra settings | ||
+ extraSettings := map[string]int64{ | ||
+ "net.ipv4.tcp_slow_start_after_idle": 0, | ||
+ } | ||
+ | ||
+ // Few extra knobs which can be turned on along with pacing. EnableBBR | ||
+ // also provides the right kernel dependency implicitly as well. | ||
+ if option.Config.EnableBBR { | ||
+ for name, value := range extraSettings { | ||
+ log.WithFields(logrus.Fields{ | ||
+ logfields.SysParamName: name, | ||
+ "baselineValue": value, | ||
+ }).Info("Setting sysctl to baseline for BPF bandwidth manager") | ||
+ | ||
+ if err := sysctl.WriteInt(name, value); err != nil { | ||
+ return fmt.Errorf("set sysctl %s=%d failed: %s", name, value, err) | ||
+ } | ||
+ } | ||
+ } | ||
+ | ||
+ return nil | ||
+} | ||
+ | ||
func InitBandwidthManager() { | ||
if option.Config.DryMode || !option.Config.EnableBandwidthManager { | ||
return | ||
@@ -103,32 +181,9 @@ func InitBandwidthManager() { | ||
if _, err := bwmap.ThrottleMap.OpenOrCreate(); err != nil { | ||
log.WithError(err).Fatal("Failed to access ThrottleMap") | ||
} | ||
- congctl := "cubic" | ||
- if option.Config.EnableBBR { | ||
- congctl = "bbr" | ||
- } | ||
- type setting struct { | ||
- name string | ||
- val string | ||
- } | ||
- baseSettings := []setting{ | ||
- {"net.core.netdev_max_backlog", "1000"}, | ||
- {"net.core.somaxconn", "4096"}, | ||
- {"net.core.default_qdisc", "fq"}, | ||
- {"net.ipv4.tcp_max_syn_backlog", "4096"}, | ||
- {"net.ipv4.tcp_congestion_control", congctl}, | ||
- } | ||
- for _, s := range baseSettings { | ||
- log.WithFields(logrus.Fields{ | ||
- logfields.SysParamName: s.name, | ||
- logfields.SysParamValue: s.val, | ||
- }).Info("Setting sysctl") | ||
- if err := sysctl.Write(s.name, s.val); err != nil { | ||
- log.WithError(err).WithFields(logrus.Fields{ | ||
- logfields.SysParamName: s.name, | ||
- logfields.SysParamValue: s.val, | ||
- }).Fatal("Failed to set sysctl needed by BPF bandwidth manager.") | ||
- } | ||
+ | ||
+ if err := setBaselineSysctls(); err != nil { | ||
+ log.WithError(err).Fatal("Failed to set sysctl needed by BPF bandwidth manager.") | ||
} | ||
return | ||
for _, device := range option.Config.GetDevices() { | ||
diff --git a/pkg/sysctl/sysctl.go b/pkg/sysctl/sysctl.go | ||
index 77d072b145..ea5bee3b03 100644 | ||
--- a/pkg/sysctl/sysctl.go | ||
+++ b/pkg/sysctl/sysctl.go | ||
@@ -10,6 +10,7 @@ import ( | ||
"os" | ||
"path/filepath" | ||
"regexp" | ||
+ "strconv" | ||
"strings" | ||
|
||
"github.com/sirupsen/logrus" | ||
@@ -120,6 +121,11 @@ func Write(name string, val string) error { | ||
return writeSysctl(name, val) | ||
} | ||
|
||
+// WriteInt writes the given integer type sysctl parameter. | ||
+func WriteInt(name string, val int64) error { | ||
+ return writeSysctl(name, strconv.FormatInt(val, 10)) | ||
+} | ||
+ | ||
// Read reads the given sysctl parameter. | ||
func Read(name string) (string, error) { | ||
path, err := parameterPath(name) | ||
@@ -134,6 +140,21 @@ func Read(name string) (string, error) { | ||
return strings.TrimRight(string(val), "\n"), nil | ||
} | ||
|
||
+// ReadInt reads the given sysctl parameter, return an int64 value. | ||
+func ReadInt(name string) (int64, error) { | ||
+ s, err := Read(name) | ||
+ if err != nil { | ||
+ return -1, err | ||
+ } | ||
+ | ||
+ i, err := strconv.ParseInt(s, 10, 64) | ||
+ if err != nil { | ||
+ return -1, err | ||
+ } | ||
+ | ||
+ return i, nil | ||
+} | ||
+ | ||
// ApplySettings applies all settings in sysSettings. | ||
func ApplySettings(sysSettings []Setting) error { | ||
for _, s := range sysSettings { | ||
-- | ||
2.44.0 | ||
|