diff --git a/internal/settings/settings.go b/internal/settings/settings.go index 5c00a69529..67953b4fae 100644 --- a/internal/settings/settings.go +++ b/internal/settings/settings.go @@ -21,6 +21,7 @@ import ( "path/filepath" "regexp" "runtime" + "runtime/debug" "sort" "strconv" "strings" @@ -36,13 +37,13 @@ import ( const ( APIURLEnvVar = "SPLUNK_API_URL" - BallastEnvVar = "SPLUNK_BALLAST_SIZE_MIB" ConfigEnvVar = "SPLUNK_CONFIG" ConfigDirEnvVar = "SPLUNK_CONFIG_DIR" ConfigServerEnabledEnvVar = "SPLUNK_DEBUG_CONFIG_SERVER" ConfigYamlEnvVar = "SPLUNK_CONFIG_YAML" HecLogIngestURLEnvVar = "SPLUNK_HEC_URL" ListenInterfaceEnvVar = "SPLUNK_LISTEN_INTERFACE" + GoMemLimitEnvVar = "GOMEMLIMIT" // nolint:gosec HecTokenEnvVar = "SPLUNK_HEC_TOKEN" // this isn't a hardcoded token IngestURLEnvVar = "SPLUNK_INGEST_URL" @@ -368,15 +369,13 @@ func checkRuntimeParams(settings *Settings) error { } } - ballastSize := setMemoryBallast(memTotalSize) - memLimit, err := setMemoryLimit(memTotalSize) + _, err := setMemoryLimit(memTotalSize) if err != nil { return err } - // Validate memoryLimit and memoryBallast are sane - if 2*ballastSize > memLimit { - return fmt.Errorf("memory limit (%d) is less than 2x ballast (%d). Increase memory limit or decrease ballast size", memLimit, ballastSize) + if _, ok := os.LookupEnv(GoMemLimitEnvVar); !ok { + setSoftMemoryLimit(memTotalSize) } return nil @@ -517,20 +516,12 @@ func envVarAsInt(env string) int { return val } -// Validate and set the memory ballast -func setMemoryBallast(memTotalSizeMiB int) int { - ballastSize := memTotalSizeMiB * DefaultMemoryBallastPercentage / 100 - // Check if the memory ballast is specified via the env var, if so, validate and set properly. - if os.Getenv(BallastEnvVar) != "" { - ballastSize = envVarAsInt(BallastEnvVar) - if 33 > ballastSize { - log.Fatalf("Expected a number greater than 33 for %s env variable but got %d", BallastEnvVar, ballastSize) - } - } - - _ = os.Setenv(BallastEnvVar, strconv.Itoa(ballastSize)) - log.Printf("Set ballast to %d MiB", ballastSize) - return ballastSize +// Check if the GOMEMLIMIT is specified via the env var, if not set the soft memory limit to 90% of the MemLimitMiBEnvVar +func setSoftMemoryLimit(memTotalSizeMiB int) { + memLimit := int64(memTotalSizeMiB * DefaultMemoryLimitPercentage / 100) + // 1 MiB = 1048576 bytes + debug.SetMemoryLimit(memLimit * 1048576) + log.Printf("Set soft memory limit set to %d MiB", memLimit) } // Validate and set the memory limit diff --git a/internal/settings/settings_test.go b/internal/settings/settings_test.go index 44508e63b5..9aac76dd7f 100644 --- a/internal/settings/settings_test.go +++ b/internal/settings/settings_test.go @@ -20,6 +20,7 @@ import ( "log" "os" "path/filepath" + "runtime/debug" "strings" "testing" @@ -219,7 +220,6 @@ func TestCheckRuntimeParams_Default(t *testing.T) { settings, err := New([]string{}) require.NoError(t, err) require.NotNil(t, settings) - require.Equal(t, "168", os.Getenv(BallastEnvVar)) require.Equal(t, "460", os.Getenv(MemLimitMiBEnvVar)) require.Equal(t, "0.0.0.0", os.Getenv(ListenInterfaceEnvVar)) } @@ -231,7 +231,6 @@ func TestCheckRuntimeParams_MemTotalEnv(t *testing.T) { settings, err := New([]string{}) require.NoError(t, err) require.NotNil(t, settings) - require.Equal(t, "330", os.Getenv(BallastEnvVar)) require.Equal(t, "900", os.Getenv(MemLimitMiBEnvVar)) } @@ -244,32 +243,6 @@ func TestCheckRuntimeParams_ListenInterface(t *testing.T) { require.Equal(t, "1.2.3.4", os.Getenv(ListenInterfaceEnvVar)) } -func TestCheckRuntimeParams_MemTotalAndBallastEnvs(t *testing.T) { - t.Cleanup(setRequiredEnvVars(t)) - require.NoError(t, os.Setenv(ConfigEnvVar, localGatewayConfig)) - require.NoError(t, os.Setenv(MemTotalEnvVar, "200")) - require.NoError(t, os.Setenv(BallastEnvVar, "90")) - - settings, err := New([]string{}) - require.NoError(t, err) - require.NotNil(t, settings) - require.Equal(t, "90", os.Getenv(BallastEnvVar)) - require.Equal(t, "180", os.Getenv(MemLimitMiBEnvVar)) -} - -func TestCheckRuntimeParams_LimitAndBallastEnvs(t *testing.T) { - t.Cleanup(setRequiredEnvVars(t)) - require.NoError(t, os.Setenv(ConfigEnvVar, localGatewayConfig)) - require.NoError(t, os.Setenv(MemLimitMiBEnvVar, "250")) - require.NoError(t, os.Setenv(BallastEnvVar, "120")) - - settings, err := New([]string{}) - require.NoError(t, err) - require.NotNil(t, settings) - require.Equal(t, "120", os.Getenv(BallastEnvVar)) - require.Equal(t, "250", os.Getenv(MemLimitMiBEnvVar)) -} - func TestSetDefaultEnvVarsOnlySetsURLsWithRealmSet(t *testing.T) { t.Cleanup(clearEnv(t)) envVars := []string{"SPLUNK_API_URL", "SPLUNK_INGEST_URL", "SPLUNK_TRACE_URL", "SPLUNK_HEC_URL", "SPLUNK_HEC_TOKEN"} @@ -381,17 +354,22 @@ func TestSetDefaultFeatureGatesRespectsOverrides(t *testing.T) { } } -func TestCheckRuntimeParams_MemTotalLimitAndBallastEnvs(t *testing.T) { +func TestSetSoftMemLimitWithoutGoMemLimitEnvVar(t *testing.T) { + + // if GOLIMIT is not set, we expect soft limit to be 90% of the total memory env var or 90% of default total memory 512 Mib. t.Cleanup(setRequiredEnvVars(t)) require.NoError(t, os.Setenv(MemTotalEnvVar, "200")) - require.NoError(t, os.Setenv(MemLimitMiBEnvVar, "150")) - require.NoError(t, os.Setenv(BallastEnvVar, "50")) - settings, err := New([]string{}) require.NoError(t, err) require.NotNil(t, settings) - require.Equal(t, "50", os.Getenv(BallastEnvVar)) - require.Equal(t, "150", os.Getenv(MemLimitMiBEnvVar)) + require.Equal(t, int64(188743680), debug.SetMemoryLimit(100)) + + t.Cleanup(setRequiredEnvVars(t)) + settings, err = New([]string{}) + require.NoError(t, err) + require.NotNil(t, settings) + require.Equal(t, int64(482344960), debug.SetMemoryLimit(100)) + } func TestUseConfigPathsFromEnvVar(t *testing.T) { @@ -604,6 +582,28 @@ func TestConfigArgUnsupportedURI(t *testing.T) { require.Contains(t, logs.String(), `"invalid" is an unsupported config provider scheme for this Collector distribution (not in [env file]).`) } +func TestCheckRuntimeParams_MemTotal(t *testing.T) { + t.Cleanup(setRequiredEnvVars(t)) + require.NoError(t, os.Setenv(ConfigEnvVar, localGatewayConfig)) + require.NoError(t, os.Setenv(MemTotalEnvVar, "200")) + + settings, err := New([]string{}) + require.NoError(t, err) + require.NotNil(t, settings) + require.Equal(t, "180", os.Getenv(MemLimitMiBEnvVar)) +} + +func TestCheckRuntimeParams_Limit(t *testing.T) { + t.Cleanup(setRequiredEnvVars(t)) + require.NoError(t, os.Setenv(ConfigEnvVar, localGatewayConfig)) + require.NoError(t, os.Setenv(MemLimitMiBEnvVar, "250")) + + settings, err := New([]string{}) + require.NoError(t, err) + require.NotNil(t, settings) + require.Equal(t, "250", os.Getenv(MemLimitMiBEnvVar)) +} + func TestDefaultDiscoveryConfigDir(t *testing.T) { t.Cleanup(setRequiredEnvVars(t)) settings, err := New([]string{"--discovery"}) diff --git a/tests/general/default_config_test.go b/tests/general/default_config_test.go index 94436b2423..4698687e03 100644 --- a/tests/general/default_config_test.go +++ b/tests/general/default_config_test.go @@ -103,7 +103,7 @@ func TestDefaultGatewayConfig(t *testing.T) { }, }, "memory_ballast": map[string]any{ - "size_mib": "168", + "size_mib": "", }, "zpages": map[string]any{ "endpoint": fmt.Sprintf("%s:55679", ip), @@ -288,7 +288,7 @@ func TestDefaultAgentConfig(t *testing.T) { "endpoint": fmt.Sprintf("%s:6060", ip), }, }, - "memory_ballast": map[string]any{"size_mib": "168"}, + "memory_ballast": map[string]any{"size_mib": ""}, "smartagent": map[string]any{ "bundleDir": "/usr/lib/splunk-otel-collector/agent-bundle", "collectd": map[string]any{ diff --git a/tests/helm/helm_test.go b/tests/helm/helm_test.go index f6f6a37ab3..a4dba982e7 100644 --- a/tests/helm/helm_test.go +++ b/tests/helm/helm_test.go @@ -137,8 +137,8 @@ gateway: replicaCount: 1 resources: limits: - cpu: 200m - memory: 128Mi + cpu: 1024m + memory: 512Mi `, image, tag) release, err := helm.Install( @@ -151,7 +151,7 @@ gateway: require.NoError(t, err) for _, pod := range []string{"agent-.*", "k8s-cluster-receiver-.*", ".{16}"} { - cluster.WaitForPods(fmt.Sprintf("%s-%s", release.Name, pod), "monitoring", 2*time.Minute) + cluster.WaitForPods(fmt.Sprintf("%s-%s", release.Name, pod), "monitoring", 5*time.Minute) } tc.OTLPReceiverSink.AssertAllMetricsReceived(tc, *tc.ResourceMetrics("all.yaml"), 30*time.Second)