From 6c21464fad27f1f26fd4e9c5d90056371fc9cdd9 Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Wed, 16 Jan 2019 15:26:51 +0100 Subject: [PATCH 01/14] Support for maximum size for Output of checks This PR will allow administrator to limit the size of Output produced by healthchecks, when set at agent level, it will be the maximum output for all healthchecks produced by agents. It will also allow specific healthchecks to go below the limit set by administrator, OutputMaxSize if set in check specification will now allow to go beyond agent level setting, but will allow to reduce it further. We had the following configuration at agent level * check_output_max_size (default: 4k) : max number for all checks of the agent * check.OutputMaxSize: (default: 4k) --- agent/agent.go | 28 ++++++++---- agent/checks/check.go | 23 ++++++---- agent/checks/check_test.go | 72 +++++++++++++++++-------------- agent/config/builder.go | 13 +++++- agent/config/config.go | 2 + agent/config/default.go | 1 + agent/config/flags.go | 1 + agent/config/runtime.go | 5 +++ agent/config/runtime_test.go | 59 ++++++++++++++++++++----- agent/consul/config.go | 5 +++ agent/structs/check_definition.go | 2 + agent/structs/check_type.go | 4 ++ agent/structs/structs.go | 3 ++ 13 files changed, 155 insertions(+), 63 deletions(-) diff --git a/agent/agent.go b/agent/agent.go index c1455a78d81b..c96f3767c358 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -915,6 +915,7 @@ func (a *Agent) consulConfig() (*consul.Config, error) { base.CoordinateUpdateBatchSize = a.config.ConsulCoordinateUpdateBatchSize base.CoordinateUpdateMaxBatches = a.config.ConsulCoordinateUpdateMaxBatches base.CoordinateUpdatePeriod = a.config.ConsulCoordinateUpdatePeriod + base.CheckOutputMaxSize = a.config.CheckOutputMaxSize base.RaftConfig.HeartbeatTimeout = a.config.ConsulRaftHeartbeatTimeout base.RaftConfig.LeaderLeaseTimeout = a.config.ConsulRaftLeaderLeaseTimeout @@ -971,6 +972,9 @@ func (a *Agent) consulConfig() (*consul.Config, error) { if a.config.Bootstrap { base.Bootstrap = true } + if a.config.CheckOutputMaxSize > 0 { + base.CheckOutputMaxSize = a.config.CheckOutputMaxSize + } if a.config.RejoinAfterLeave { base.RejoinAfterLeave = true } @@ -2239,6 +2243,13 @@ func (a *Agent) addCheck(check *structs.HealthCheck, chkType *structs.CheckType, // Check if already registered if chkType != nil { + maxOutputSize := a.config.CheckOutputMaxSize + if maxOutputSize == 0 { + maxOutputSize = 4096 + } + if chkType.OutputMaxSize > 0 && maxOutputSize > chkType.OutputMaxSize { + maxOutputSize = chkType.OutputMaxSize + } switch { case chkType.IsTTL(): @@ -2285,6 +2296,7 @@ func (a *Agent) addCheck(check *structs.HealthCheck, chkType *structs.CheckType, Interval: chkType.Interval, Timeout: chkType.Timeout, Logger: a.logger, + OutputMaxSize: maxOutputSize, TLSClientConfig: tlsClientConfig, } http.Start() @@ -2352,7 +2364,7 @@ func (a *Agent) addCheck(check *structs.HealthCheck, chkType *structs.CheckType, } if a.dockerClient == nil { - dc, err := checks.NewDockerClient(os.Getenv("DOCKER_HOST"), checks.BufSize) + dc, err := checks.NewDockerClient(os.Getenv("DOCKER_HOST"), int64(maxOutputSize)) if err != nil { a.logger.Printf("[ERR] agent: error creating docker client: %s", err) return err @@ -2387,14 +2399,14 @@ func (a *Agent) addCheck(check *structs.HealthCheck, chkType *structs.CheckType, check.CheckID, checks.MinInterval) chkType.Interval = checks.MinInterval } - monitor := &checks.CheckMonitor{ - Notify: a.State, - CheckID: check.CheckID, - ScriptArgs: chkType.ScriptArgs, - Interval: chkType.Interval, - Timeout: chkType.Timeout, - Logger: a.logger, + Notify: a.State, + CheckID: check.CheckID, + ScriptArgs: chkType.ScriptArgs, + Interval: chkType.Interval, + Timeout: chkType.Timeout, + Logger: a.logger, + OutputMaxSize: maxOutputSize, } monitor.Start() a.checkMonitors[check.CheckID] = monitor diff --git a/agent/checks/check.go b/agent/checks/check.go index b09bdc8be16c..6abca5876a21 100644 --- a/agent/checks/check.go +++ b/agent/checks/check.go @@ -56,13 +56,14 @@ type CheckNotifier interface { // determine the health of a given check. It is compatible with // nagios plugins and expects the output in the same format. type CheckMonitor struct { - Notify CheckNotifier - CheckID types.CheckID - Script string - ScriptArgs []string - Interval time.Duration - Timeout time.Duration - Logger *log.Logger + Notify CheckNotifier + CheckID types.CheckID + Script string + ScriptArgs []string + Interval time.Duration + Timeout time.Duration + Logger *log.Logger + OutputMaxSize int stop bool stopCh chan struct{} @@ -122,7 +123,7 @@ func (c *CheckMonitor) check() { } // Collect the output - output, _ := circbuf.NewBuffer(BufSize) + output, _ := circbuf.NewBuffer(int64(c.OutputMaxSize)) cmd.Stdout = output cmd.Stderr = output exec.SetSysProcAttr(cmd) @@ -303,6 +304,7 @@ type CheckHTTP struct { Timeout time.Duration Logger *log.Logger TLSClientConfig *tls.Config + OutputMaxSize int httpClient *http.Client stop bool @@ -339,6 +341,9 @@ func (c *CheckHTTP) Start() { } else if c.Interval < 10*time.Second { c.httpClient.Timeout = c.Interval } + if c.OutputMaxSize < 1 { + c.OutputMaxSize = 4096 + } } c.stop = false @@ -413,7 +418,7 @@ func (c *CheckHTTP) check() { defer resp.Body.Close() // Read the response into a circular buffer to limit the size - output, _ := circbuf.NewBuffer(BufSize) + output, _ := circbuf.NewBuffer(int64(c.OutputMaxSize)) if _, err := io.Copy(output, resp.Body); err != nil { c.Logger.Printf("[WARN] agent: Check %q error while reading body: %s", c.CheckID, err) } diff --git a/agent/checks/check_test.go b/agent/checks/check_test.go index 014d58302b27..02ed9f06b454 100644 --- a/agent/checks/check_test.go +++ b/agent/checks/check_test.go @@ -44,11 +44,12 @@ func TestCheckMonitor_Script(t *testing.T) { t.Run(tt.status, func(t *testing.T) { notif := mock.NewNotify() check := &CheckMonitor{ - Notify: notif, - CheckID: types.CheckID("foo"), - Script: tt.script, - Interval: 25 * time.Millisecond, - Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), + Notify: notif, + CheckID: types.CheckID("foo"), + Script: tt.script, + Interval: 25 * time.Millisecond, + Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), + OutputMaxSize: 4096, } check.Start() defer check.Stop() @@ -79,11 +80,12 @@ func TestCheckMonitor_Args(t *testing.T) { t.Run(tt.status, func(t *testing.T) { notif := mock.NewNotify() check := &CheckMonitor{ - Notify: notif, - CheckID: types.CheckID("foo"), - ScriptArgs: tt.args, - Interval: 25 * time.Millisecond, - Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), + Notify: notif, + CheckID: types.CheckID("foo"), + ScriptArgs: tt.args, + Interval: 25 * time.Millisecond, + Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), + OutputMaxSize: 4096, } check.Start() defer check.Stop() @@ -103,12 +105,13 @@ func TestCheckMonitor_Timeout(t *testing.T) { // t.Parallel() // timing test. no parallel notif := mock.NewNotify() check := &CheckMonitor{ - Notify: notif, - CheckID: types.CheckID("foo"), - ScriptArgs: []string{"sh", "-c", "sleep 1 && exit 0"}, - Interval: 50 * time.Millisecond, - Timeout: 25 * time.Millisecond, - Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), + Notify: notif, + CheckID: types.CheckID("foo"), + ScriptArgs: []string{"sh", "-c", "sleep 1 && exit 0"}, + Interval: 50 * time.Millisecond, + Timeout: 25 * time.Millisecond, + Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), + OutputMaxSize: 4096, } check.Start() defer check.Stop() @@ -128,11 +131,12 @@ func TestCheckMonitor_RandomStagger(t *testing.T) { // t.Parallel() // timing test. no parallel notif := mock.NewNotify() check := &CheckMonitor{ - Notify: notif, - CheckID: types.CheckID("foo"), - ScriptArgs: []string{"sh", "-c", "exit 0"}, - Interval: 25 * time.Millisecond, - Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), + Notify: notif, + CheckID: types.CheckID("foo"), + ScriptArgs: []string{"sh", "-c", "exit 0"}, + Interval: 25 * time.Millisecond, + Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), + OutputMaxSize: 4096, } check.Start() defer check.Stop() @@ -153,11 +157,12 @@ func TestCheckMonitor_LimitOutput(t *testing.T) { t.Parallel() notif := mock.NewNotify() check := &CheckMonitor{ - Notify: notif, - CheckID: types.CheckID("foo"), - ScriptArgs: []string{"od", "-N", "81920", "/dev/urandom"}, - Interval: 25 * time.Millisecond, - Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), + Notify: notif, + CheckID: types.CheckID("foo"), + ScriptArgs: []string{"od", "-N", "81920", "/dev/urandom"}, + Interval: 25 * time.Millisecond, + Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), + OutputMaxSize: 4096, } check.Start() defer check.Stop() @@ -295,13 +300,14 @@ func TestCheckHTTP(t *testing.T) { notif := mock.NewNotify() check := &CheckHTTP{ - Notify: notif, - CheckID: types.CheckID("foo"), - HTTP: server.URL, - Method: tt.method, - Header: tt.header, - Interval: 10 * time.Millisecond, - Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), + Notify: notif, + CheckID: types.CheckID("foo"), + HTTP: server.URL, + Method: tt.method, + OutputMaxSize: 4096, + Header: tt.header, + Interval: 10 * time.Millisecond, + Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), } check.Start() defer check.Stop() diff --git a/agent/config/builder.go b/agent/config/builder.go index dafc25a9f521..3c8523f2d490 100644 --- a/agent/config/builder.go +++ b/agent/config/builder.go @@ -786,6 +786,7 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) { CAPath: b.stringVal(c.CAPath), CertFile: b.stringVal(c.CertFile), CheckUpdateInterval: b.durationVal("check_update_interval", c.CheckUpdateInterval), + CheckOutputMaxSize: b.intValWithDefault(c.CheckOutputMaxSize, 4096), Checks: checks, ClientAddrs: clientAddrs, ConfigEntryBootstrap: configEntries, @@ -964,6 +965,9 @@ func (b *Builder) Validate(rt RuntimeConfig) error { if rt.BootstrapExpect > 0 && rt.Bootstrap { return fmt.Errorf("'bootstrap_expect > 0' and 'bootstrap = true' are mutually exclusive") } + if rt.CheckOutputMaxSize < 1 { + return fmt.Errorf("'check_output_max_size must be positive") + } if rt.AEInterval <= 0 { return fmt.Errorf("ae_interval cannot be %s. Must be positive", rt.AEInterval) } @@ -1174,6 +1178,7 @@ func (b *Builder) checkVal(v *CheckDefinition) *structs.CheckDefinition { Timeout: b.durationVal(fmt.Sprintf("check[%s].timeout", id), v.Timeout), TTL: b.durationVal(fmt.Sprintf("check[%s].ttl", id), v.TTL), DeregisterCriticalServiceAfter: b.durationVal(fmt.Sprintf("check[%s].deregister_critical_service_after", id), v.DeregisterCriticalServiceAfter), + OutputMaxSize: b.intValWithDefault(v.OutputMaxSize, 4096), } } @@ -1347,13 +1352,17 @@ func (b *Builder) durationVal(name string, v *string) (d time.Duration) { return b.durationValWithDefault(name, v, 0) } -func (b *Builder) intVal(v *int) int { +func (b *Builder) intValWithDefault(v *int, defaultVal int) int { if v == nil { - return 0 + return defaultVal } return *v } +func (b *Builder) intVal(v *int) int { + return b.intValWithDefault(v, 0) +} + func (b *Builder) portVal(name string, v *int) int { if v == nil || *v <= 0 { return -1 diff --git a/agent/config/config.go b/agent/config/config.go index fdafdb1636f1..d899f634c0ef 100644 --- a/agent/config/config.go +++ b/agent/config/config.go @@ -184,6 +184,7 @@ type Config struct { CAPath *string `json:"ca_path,omitempty" hcl:"ca_path" mapstructure:"ca_path"` CertFile *string `json:"cert_file,omitempty" hcl:"cert_file" mapstructure:"cert_file"` Check *CheckDefinition `json:"check,omitempty" hcl:"check" mapstructure:"check"` // needs to be a pointer to avoid partial merges + CheckOutputMaxSize *int `json:"check_output_max_size,omitempty" hcl:"check_output_max_size" mapstructure:"check_output_max_size"` CheckUpdateInterval *string `json:"check_update_interval,omitempty" hcl:"check_update_interval" mapstructure:"check_update_interval"` Checks []CheckDefinition `json:"checks,omitempty" hcl:"checks" mapstructure:"checks"` ClientAddr *string `json:"client_addr,omitempty" hcl:"client_addr" mapstructure:"client_addr"` @@ -390,6 +391,7 @@ type CheckDefinition struct { HTTP *string `json:"http,omitempty" hcl:"http" mapstructure:"http"` Header map[string][]string `json:"header,omitempty" hcl:"header" mapstructure:"header"` Method *string `json:"method,omitempty" hcl:"method" mapstructure:"method"` + OutputMaxSize *int `json:"output_max_size,omitempty" hcl:"output_max_size" mapstructure:"output_max_size"` TCP *string `json:"tcp,omitempty" hcl:"tcp" mapstructure:"tcp"` Interval *string `json:"interval,omitempty" hcl:"interval" mapstructure:"interval"` DockerContainerID *string `json:"docker_container_id,omitempty" hcl:"docker_container_id" mapstructure:"docker_container_id"` diff --git a/agent/config/default.go b/agent/config/default.go index 0d6fe3ff6955..10f6ec78e214 100644 --- a/agent/config/default.go +++ b/agent/config/default.go @@ -49,6 +49,7 @@ func DefaultSource() Source { bind_addr = "0.0.0.0" bootstrap = false bootstrap_expect = 0 + check_output_max_size = 4096 check_update_interval = "5m" client_addr = "127.0.0.1" datacenter = "` + consul.DefaultDC + `" diff --git a/agent/config/flags.go b/agent/config/flags.go index 7dc4f5a1f8f6..30bd61d58006 100644 --- a/agent/config/flags.go +++ b/agent/config/flags.go @@ -60,6 +60,7 @@ func AddFlags(fs *flag.FlagSet, f *Flags) { add(&f.Config.Bootstrap, "bootstrap", "Sets server to bootstrap mode.") add(&f.Config.BootstrapExpect, "bootstrap-expect", "Sets server to expect bootstrap mode.") add(&f.Config.ClientAddr, "client", "Sets the address to bind for client access. This includes RPC, DNS, HTTP, HTTPS and gRPC (if configured).") + add(&f.Config.CheckOutputMaxSize, "check_output_max_size", "Set the maximum default size of output for checks on this agent") add(&f.ConfigFiles, "config-dir", "Path to a directory to read configuration files from. This will read every file ending in '.json' as configuration in this directory in alphabetical order. Can be specified multiple times.") add(&f.ConfigFiles, "config-file", "Path to a file in JSON or HCL format with a matching file extension. Can be specified multiple times.") add(&f.ConfigFormat, "config-format", "Config files are in this format irrespective of their extension. Must be 'hcl' or 'json'") diff --git a/agent/config/runtime.go b/agent/config/runtime.go index dc9d567f81dc..5286392f506d 100644 --- a/agent/config/runtime.go +++ b/agent/config/runtime.go @@ -459,6 +459,11 @@ type RuntimeConfig struct { // hcl: check_update_interval = "duration" CheckUpdateInterval time.Duration + // Maximum size for the output of a healtcheck + // hcl check_output_max_size int + // flag: -check_output_max_size int + CheckOutputMaxSize int + // Checks contains the provided check definitions. // // hcl: checks = [ diff --git a/agent/config/runtime_test.go b/agent/config/runtime_test.go index 70e6eb3d9637..58322836a2f6 100644 --- a/agent/config/runtime_test.go +++ b/agent/config/runtime_test.go @@ -2076,8 +2076,8 @@ func TestConfigFlagsAndEdgecases(t *testing.T) { }, patch: func(rt *RuntimeConfig) { rt.Checks = []*structs.CheckDefinition{ - &structs.CheckDefinition{Name: "a", ScriptArgs: []string{"/bin/true"}}, - &structs.CheckDefinition{Name: "b", ScriptArgs: []string{"/bin/false"}}, + &structs.CheckDefinition{Name: "a", ScriptArgs: []string{"/bin/true"}, OutputMaxSize: 4096}, + &structs.CheckDefinition{Name: "b", ScriptArgs: []string{"/bin/false"}, OutputMaxSize: 4096}, } rt.DataDir = dataDir }, @@ -2095,7 +2095,7 @@ func TestConfigFlagsAndEdgecases(t *testing.T) { }, patch: func(rt *RuntimeConfig) { rt.Checks = []*structs.CheckDefinition{ - &structs.CheckDefinition{Name: "a", GRPC: "localhost:12345/foo", GRPCUseTLS: true}, + &structs.CheckDefinition{Name: "a", GRPC: "localhost:12345/foo", GRPCUseTLS: true, OutputMaxSize: 4096}, } rt.DataDir = dataDir }, @@ -2113,7 +2113,7 @@ func TestConfigFlagsAndEdgecases(t *testing.T) { }, patch: func(rt *RuntimeConfig) { rt.Checks = []*structs.CheckDefinition{ - &structs.CheckDefinition{Name: "a", AliasService: "foo"}, + &structs.CheckDefinition{Name: "a", AliasService: "foo", OutputMaxSize: 4096}, } rt.DataDir = dataDir }, @@ -2232,6 +2232,7 @@ func TestConfigFlagsAndEdgecases(t *testing.T) { DockerContainerID: "z", DeregisterCriticalServiceAfter: 10 * time.Second, ScriptArgs: []string{"a", "b"}, + OutputMaxSize: 4096, }, }, Weights: &structs.Weights{ @@ -2497,8 +2498,9 @@ func TestConfigFlagsAndEdgecases(t *testing.T) { Port: 2345, Checks: structs.CheckTypes{ { - TCP: "127.0.0.1:2345", - Interval: 10 * time.Second, + TCP: "127.0.0.1:2345", + Interval: 10 * time.Second, + OutputMaxSize: 4096, }, }, Proxy: &structs.ConnectProxyConfig{ @@ -2592,8 +2594,9 @@ func TestConfigFlagsAndEdgecases(t *testing.T) { Port: 2345, Checks: structs.CheckTypes{ { - TCP: "127.0.0.1:2345", - Interval: 10 * time.Second, + TCP: "127.0.0.1:2345", + Interval: 10 * time.Second, + OutputMaxSize: 4096, }, }, Proxy: &structs.ConnectProxyConfig{ @@ -3025,6 +3028,7 @@ func TestFullConfig(t *testing.T) { "f3r6xFtM": [ "RyuIdDWv", "QbxEcIUM" ] }, "method": "Dou0nGT5", + "output_max_size": 4096, "tcp": "JY6fTTcw", "interval": "18714s", "docker_container_id": "qF66POS9", @@ -3051,6 +3055,7 @@ func TestFullConfig(t *testing.T) { "method": "aldrIQ4l", "tcp": "RJQND605", "interval": "22164s", + "output_max_size": 4096, "docker_container_id": "ipgdFtjd", "shell": "qAeOYy0M", "tls_skip_verify": true, @@ -3074,6 +3079,7 @@ func TestFullConfig(t *testing.T) { "method": "gLrztrNw", "tcp": "4jG5casb", "interval": "28767s", + "output_max_size": 4096, "docker_container_id": "THW6u7rL", "shell": "C1Zt3Zwh", "tls_skip_verify": true, @@ -3274,6 +3280,7 @@ func TestFullConfig(t *testing.T) { "method": "9afLm3Mj", "tcp": "fjiLFqVd", "interval": "23926s", + "output_max_size": 4096, "docker_container_id": "dO5TtRHk", "shell": "e6q2ttES", "tls_skip_verify": true, @@ -3296,6 +3303,7 @@ func TestFullConfig(t *testing.T) { "method": "T66MFBfR", "tcp": "bNnNfx2A", "interval": "22224s", + "output_max_size": 4096, "docker_container_id": "ipgdFtjd", "shell": "omVZq7Sz", "tls_skip_verify": true, @@ -3317,6 +3325,7 @@ func TestFullConfig(t *testing.T) { "method": "ciYHWors", "tcp": "FfvCwlqH", "interval": "12356s", + "output_max_size": 4096, "docker_container_id": "HBndBU6R", "shell": "hVI33JjA", "tls_skip_verify": true, @@ -3352,6 +3361,7 @@ func TestFullConfig(t *testing.T) { "method": "X5DrovFc", "tcp": "ICbxkpSF", "interval": "24392s", + "output_max_size": 4096, "docker_container_id": "ZKXr68Yb", "shell": "CEfzx0Fo", "tls_skip_verify": true, @@ -3390,6 +3400,7 @@ func TestFullConfig(t *testing.T) { "method": "5wkAxCUE", "tcp": "MN3oA9D2", "interval": "32718s", + "output_max_size": 4096, "docker_container_id": "cU15LMet", "shell": "nEz9qz2l", "tls_skip_verify": true, @@ -3411,6 +3422,7 @@ func TestFullConfig(t *testing.T) { "method": "wzByP903", "tcp": "2exjZIGE", "interval": "5656s", + "output_max_size": 4096, "docker_container_id": "5tDBWpfA", "shell": "rlTpLM8s", "tls_skip_verify": true, @@ -3593,6 +3605,7 @@ func TestFullConfig(t *testing.T) { method = "Dou0nGT5" tcp = "JY6fTTcw" interval = "18714s" + output_max_size = 4096 docker_container_id = "qF66POS9" shell = "sOnDy228" tls_skip_verify = true @@ -3617,6 +3630,7 @@ func TestFullConfig(t *testing.T) { method = "aldrIQ4l" tcp = "RJQND605" interval = "22164s" + output_max_size = 4096, docker_container_id = "ipgdFtjd" shell = "qAeOYy0M" tls_skip_verify = true @@ -3640,6 +3654,7 @@ func TestFullConfig(t *testing.T) { method = "gLrztrNw" tcp = "4jG5casb" interval = "28767s" + output_max_size = 4096, docker_container_id = "THW6u7rL" shell = "C1Zt3Zwh" tls_skip_verify = true @@ -3865,6 +3880,7 @@ func TestFullConfig(t *testing.T) { method = "T66MFBfR" tcp = "bNnNfx2A" interval = "22224s" + output_max_size = 4096 docker_container_id = "ipgdFtjd" shell = "omVZq7Sz" tls_skip_verify = true @@ -3886,6 +3902,7 @@ func TestFullConfig(t *testing.T) { method = "ciYHWors" tcp = "FfvCwlqH" interval = "12356s" + output_max_size = 4096 docker_container_id = "HBndBU6R" shell = "hVI33JjA" tls_skip_verify = true @@ -3921,6 +3938,7 @@ func TestFullConfig(t *testing.T) { method = "X5DrovFc" tcp = "ICbxkpSF" interval = "24392s" + output_max_size = 4096 docker_container_id = "ZKXr68Yb" shell = "CEfzx0Fo" tls_skip_verify = true @@ -3959,6 +3977,7 @@ func TestFullConfig(t *testing.T) { method = "5wkAxCUE" tcp = "MN3oA9D2" interval = "32718s" + output_max_size = 4096 docker_container_id = "cU15LMet" shell = "nEz9qz2l" tls_skip_verify = true @@ -3980,6 +3999,7 @@ func TestFullConfig(t *testing.T) { method = "wzByP903" tcp = "2exjZIGE" interval = "5656s" + output_max_size = 4096 docker_container_id = "5tDBWpfA" shell = "rlTpLM8s" tls_skip_verify = true @@ -4248,6 +4268,7 @@ func TestFullConfig(t *testing.T) { CAFile: "erA7T0PM", CAPath: "mQEN1Mfp", CertFile: "7s4QAzDk", + CheckOutputMaxSize: 4096, Checks: []*structs.CheckDefinition{ &structs.CheckDefinition{ ID: "uAjE6m9Z", @@ -4265,6 +4286,7 @@ func TestFullConfig(t *testing.T) { Method: "aldrIQ4l", TCP: "RJQND605", Interval: 22164 * time.Second, + OutputMaxSize: 4096, DockerContainerID: "ipgdFtjd", Shell: "qAeOYy0M", TLSSkipVerify: true, @@ -4286,6 +4308,7 @@ func TestFullConfig(t *testing.T) { "qxvdnSE9": []string{"6wBPUYdF", "YYh8wtSZ"}, }, Method: "gLrztrNw", + OutputMaxSize: 4096, TCP: "4jG5casb", Interval: 28767 * time.Second, DockerContainerID: "THW6u7rL", @@ -4309,6 +4332,7 @@ func TestFullConfig(t *testing.T) { "f3r6xFtM": {"RyuIdDWv", "QbxEcIUM"}, }, Method: "Dou0nGT5", + OutputMaxSize: 4096, TCP: "JY6fTTcw", Interval: 18714 * time.Second, DockerContainerID: "qF66POS9", @@ -4477,6 +4501,7 @@ func TestFullConfig(t *testing.T) { "cVFpko4u": {"gGqdEB6k", "9LsRo22u"}, }, Method: "X5DrovFc", + OutputMaxSize: 4096, TCP: "ICbxkpSF", Interval: 24392 * time.Second, DockerContainerID: "ZKXr68Yb", @@ -4525,6 +4550,7 @@ func TestFullConfig(t *testing.T) { "1UJXjVrT": {"OJgxzTfk", "xZZrFsq7"}, }, Method: "5wkAxCUE", + OutputMaxSize: 4096, TCP: "MN3oA9D2", Interval: 32718 * time.Second, DockerContainerID: "cU15LMet", @@ -4546,6 +4572,7 @@ func TestFullConfig(t *testing.T) { "vr7wY7CS": {"EtCoNPPL", "9vAarJ5s"}, }, Method: "wzByP903", + OutputMaxSize: 4096, TCP: "2exjZIGE", Interval: 5656 * time.Second, DockerContainerID: "5tDBWpfA", @@ -4631,6 +4658,7 @@ func TestFullConfig(t *testing.T) { "SHOVq1Vv": {"jntFhyym", "GYJh32pp"}, }, Method: "T66MFBfR", + OutputMaxSize: 4096, TCP: "bNnNfx2A", Interval: 22224 * time.Second, DockerContainerID: "ipgdFtjd", @@ -4652,6 +4680,7 @@ func TestFullConfig(t *testing.T) { "p2UI34Qz": {"UsG1D0Qh", "NHhRiB6s"}, }, Method: "ciYHWors", + OutputMaxSize: 4096, TCP: "FfvCwlqH", Interval: 12356 * time.Second, DockerContainerID: "HBndBU6R", @@ -4673,6 +4702,7 @@ func TestFullConfig(t *testing.T) { "l4HwQ112": {"fk56MNlo", "dhLK56aZ"}, }, Method: "9afLm3Mj", + OutputMaxSize: 4096, TCP: "fjiLFqVd", Interval: 23926 * time.Second, DockerContainerID: "dO5TtRHk", @@ -5010,6 +5040,7 @@ func TestConfigDecodeBytes(t *testing.T) { func TestSanitize(t *testing.T) { rt := RuntimeConfig{ BindAddr: &net.IPAddr{IP: net.ParseIP("127.0.0.1")}, + CheckOutputMaxSize: 4096, SerfAdvertiseAddrLAN: &net.TCPAddr{IP: net.ParseIP("1.2.3.4"), Port: 5678}, DNSAddrs: []net.Addr{ &net.TCPAddr{IP: net.ParseIP("1.2.3.4"), Port: 5678}, @@ -5032,7 +5063,8 @@ func TestSanitize(t *testing.T) { Name: "foo", Token: "bar", Check: structs.CheckType{ - Name: "blurb", + Name: "blurb", + OutputMaxSize: 4096, }, Weights: &structs.Weights{ Passing: 67, @@ -5042,8 +5074,9 @@ func TestSanitize(t *testing.T) { }, Checks: []*structs.CheckDefinition{ &structs.CheckDefinition{ - Name: "zoo", - Token: "zope", + Name: "zoo", + Token: "zope", + OutputMaxSize: 4096, }, }, } @@ -5083,6 +5116,7 @@ func TestSanitize(t *testing.T) { "CAPath": "", "CertFile": "", "CheckDeregisterIntervalMin": "0s", + "CheckOutputMaxSize": 4096, "CheckReapInterval": "0s", "CheckUpdateInterval": "0s", "Checks": [{ @@ -5099,6 +5133,7 @@ func TestSanitize(t *testing.T) { "Method": "", "Name": "zoo", "Notes": "", + "OutputMaxSize": 4096, "ScriptArgs": [], "ServiceID": "", "Shell": "", @@ -5130,6 +5165,7 @@ func TestSanitize(t *testing.T) { "ConsulCoordinateUpdateMaxBatches": 0, "ConsulCoordinateUpdatePeriod": "15s", "ConsulRaftElectionTimeout": "0s", + "CheckOutputMaxSize": 4096, "ConsulRaftHeartbeatTimeout": "0s", "ConsulRaftLeaderLeaseTimeout": "0s", "GossipLANGossipInterval": "0s", @@ -5269,6 +5305,7 @@ func TestSanitize(t *testing.T) { "Method": "", "Name": "blurb", "Notes": "", + "OutputMaxSize": 4096, "ScriptArgs": [], "Shell": "", "Status": "", diff --git a/agent/consul/config.go b/agent/consul/config.go index d7e92943bb48..5342df24c9a8 100644 --- a/agent/consul/config.go +++ b/agent/consul/config.go @@ -365,6 +365,9 @@ type Config struct { // warning and discard the remaining updates. CoordinateUpdateMaxBatches int + // CheckOutputMaxSize control the max size of output of checks + CheckOutputMaxSize int + // RPCHoldTimeout is how long an RPC can be "held" before it is errored. // This is used to paper over a loss of leadership by instead holding RPCs, // so that the caller experiences a slow response rather than an error. @@ -499,6 +502,8 @@ func DefaultConfig() *Config { CoordinateUpdateBatchSize: 128, CoordinateUpdateMaxBatches: 5, + CheckOutputMaxSize: 4096, + RPCRate: rate.Inf, RPCMaxBurst: 1000, diff --git a/agent/structs/check_definition.go b/agent/structs/check_definition.go index 4252b4449d80..aaadda7c7fcb 100644 --- a/agent/structs/check_definition.go +++ b/agent/structs/check_definition.go @@ -37,6 +37,7 @@ type CheckDefinition struct { Timeout time.Duration TTL time.Duration DeregisterCriticalServiceAfter time.Duration + OutputMaxSize int } func (c *CheckDefinition) HealthCheck(node string) *HealthCheck { @@ -72,6 +73,7 @@ func (c *CheckDefinition) CheckType() *CheckType { GRPCUseTLS: c.GRPCUseTLS, Header: c.Header, Method: c.Method, + OutputMaxSize: c.OutputMaxSize, TCP: c.TCP, Interval: c.Interval, DockerContainerID: c.DockerContainerID, diff --git a/agent/structs/check_type.go b/agent/structs/check_type.go index 43b76057c6af..9b1b055dae30 100644 --- a/agent/structs/check_type.go +++ b/agent/structs/check_type.go @@ -45,6 +45,7 @@ type CheckType struct { // service, if any, to be deregistered if this check is critical for // longer than this duration. DeregisterCriticalServiceAfter time.Duration + OutputMaxSize int } type CheckTypes []*CheckType @@ -67,6 +68,9 @@ func (c *CheckType) Validate() error { if !intervalCheck && !c.IsAlias() && c.TTL <= 0 { return fmt.Errorf("TTL must be > 0 for TTL checks") } + if c.OutputMaxSize < 0 { + return fmt.Errorf("MaxOutputMaxSize must be positive") + } return nil } diff --git a/agent/structs/structs.go b/agent/structs/structs.go index dd2cea214702..f20b8233c882 100644 --- a/agent/structs/structs.go +++ b/agent/structs/structs.go @@ -958,6 +958,7 @@ type HealthCheckDefinition struct { Method string `json:",omitempty"` TCP string `json:",omitempty"` Interval time.Duration `json:",omitempty"` + OutputMaxSize uint `json:",omitempty"` Timeout time.Duration `json:",omitempty"` DeregisterCriticalServiceAfter time.Duration `json:",omitempty"` } @@ -966,11 +967,13 @@ func (d *HealthCheckDefinition) MarshalJSON() ([]byte, error) { type Alias HealthCheckDefinition exported := &struct { Interval string `json:",omitempty"` + OutputMaxSize uint `json:",omitempty"` Timeout string `json:",omitempty"` DeregisterCriticalServiceAfter string `json:",omitempty"` *Alias }{ Interval: d.Interval.String(), + OutputMaxSize: d.OutputMaxSize, Timeout: d.Timeout.String(), DeregisterCriticalServiceAfter: d.DeregisterCriticalServiceAfter.String(), Alias: (*Alias)(d), From b5717aa0d4c9651a3b14ce882d85243b5d503b81 Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Thu, 24 Jan 2019 10:44:41 +0100 Subject: [PATCH 02/14] Added unit test --- agent/checks/check_test.go | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/agent/checks/check_test.go b/agent/checks/check_test.go index 02ed9f06b454..8d9c825ac26a 100644 --- a/agent/checks/check_test.go +++ b/agent/checks/check_test.go @@ -328,6 +328,43 @@ func TestCheckHTTP(t *testing.T) { } } +func TestCheckMaxOutputSize(t *testing.T) { + t.Parallel() + timeout := 5 * time.Millisecond + server := httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, req *http.Request) { + body := bytes.Repeat([]byte{'x'}, 2*BufSize) + writer.WriteHeader(200) + writer.Write(body) + })) + defer server.Close() + + notif := mock.NewNotify() + maxOutputSize := 32 + check := &CheckHTTP{ + Notify: notif, + CheckID: types.CheckID("bar"), + HTTP: server.URL + "/v1/agent/self", + Timeout: timeout, + Interval: 2 * time.Millisecond, + Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), + OutputMaxSize: maxOutputSize, + } + + check.Start() + defer check.Stop() + retry.Run(t, func(r *retry.R) { + if got, want := notif.Updates("bar"), 2; got < want { + r.Fatalf("got %d updates want at least %d", got, want) + } + if got, want := notif.State("bar"), api.HealthPassing; got != want { + r.Fatalf("got state %q want %q", got, want) + } + if got, want := notif.Output("bar"), "HTTP GET "+server.URL+"/v1/agent/self: 200 OK Output: "+strings.Repeat("x", maxOutputSize); got != want { + r.Fatalf("got state %q want %q", got, want) + } + }) +} + func TestCheckHTTPTimeout(t *testing.T) { t.Parallel() timeout := 5 * time.Millisecond From b5f00ffe65acf61343b7460a78ab36c825b4a569 Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Wed, 22 May 2019 11:30:43 +0200 Subject: [PATCH 03/14] Fixed code comments from @freddygv * Renamed `checks.BufSize` to `checks.DefaultBufSize` * Use the constant where appropriate * Better error message when `check_output_max_size` has wrong value --- agent/agent.go | 2 +- agent/agent_endpoint.go | 4 +- agent/agent_endpoint_test.go | 4 +- agent/checks/check.go | 8 ++-- agent/checks/check_test.go | 26 +++++------ agent/config/builder.go | 5 ++- agent/config/default.go | 3 +- agent/config/runtime_test.go | 83 ++++++++++++++++++------------------ agent/consul/config.go | 3 +- 9 files changed, 71 insertions(+), 67 deletions(-) diff --git a/agent/agent.go b/agent/agent.go index c96f3767c358..0ed64c82866d 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -2245,7 +2245,7 @@ func (a *Agent) addCheck(check *structs.HealthCheck, chkType *structs.CheckType, if chkType != nil { maxOutputSize := a.config.CheckOutputMaxSize if maxOutputSize == 0 { - maxOutputSize = 4096 + maxOutputSize = checks.DefaultBufSize } if chkType.OutputMaxSize > 0 && maxOutputSize > chkType.OutputMaxSize { maxOutputSize = chkType.OutputMaxSize diff --git a/agent/agent_endpoint.go b/agent/agent_endpoint.go index 229969df04b0..4d9b4fb05dc1 100644 --- a/agent/agent_endpoint.go +++ b/agent/agent_endpoint.go @@ -716,9 +716,9 @@ func (s *HTTPServer) AgentCheckUpdate(resp http.ResponseWriter, req *http.Reques } total := len(update.Output) - if total > checks.BufSize { + if total > checks.DefaultBufSize { update.Output = fmt.Sprintf("%s ... (captured %d of %d bytes)", - update.Output[:checks.BufSize], checks.BufSize, total) + update.Output[:checks.DefaultBufSize], checks.DefaultBufSize, total) } checkID := types.CheckID(strings.TrimPrefix(req.URL.Path, "/v1/agent/check/update/")) diff --git a/agent/agent_endpoint_test.go b/agent/agent_endpoint_test.go index 967147239f11..44f619809397 100644 --- a/agent/agent_endpoint_test.go +++ b/agent/agent_endpoint_test.go @@ -2340,7 +2340,7 @@ func TestAgent_UpdateCheck(t *testing.T) { t.Run("log output limit", func(t *testing.T) { args := checkUpdate{ Status: api.HealthPassing, - Output: strings.Repeat("-= bad -=", 5*checks.BufSize), + Output: strings.Repeat("-= bad -=", 5*checks.DefaultBufSize), } req, _ := http.NewRequest("PUT", "/v1/agent/check/update/test", jsonReader(args)) resp := httptest.NewRecorder() @@ -2359,7 +2359,7 @@ func TestAgent_UpdateCheck(t *testing.T) { // rough check that the output buffer was cut down so this test // isn't super brittle. state := a.State.Checks()["test"] - if state.Status != api.HealthPassing || len(state.Output) > 2*checks.BufSize { + if state.Status != api.HealthPassing || len(state.Output) > 2*checks.DefaultBufSize { t.Fatalf("bad: %v", state) } }) diff --git a/agent/checks/check.go b/agent/checks/check.go index 6abca5876a21..41d577928ee0 100644 --- a/agent/checks/check.go +++ b/agent/checks/check.go @@ -28,10 +28,10 @@ const ( // Otherwise we risk fork bombing a system. MinInterval = time.Second - // BufSize is the maximum size of the captured - // check output. Prevents an enormous buffer + // DefaultBufSize is the maximum size of the captured + // check output by defaut. Prevents an enormous buffer // from being captured - BufSize = 4 * 1024 // 4KB + DefaultBufSize = 4 * 1024 // 4KB // UserAgent is the value of the User-Agent header // for HTTP health checks. @@ -342,7 +342,7 @@ func (c *CheckHTTP) Start() { c.httpClient.Timeout = c.Interval } if c.OutputMaxSize < 1 { - c.OutputMaxSize = 4096 + c.OutputMaxSize = DefaultBufSize } } diff --git a/agent/checks/check_test.go b/agent/checks/check_test.go index 8d9c825ac26a..b61bbc19c89e 100644 --- a/agent/checks/check_test.go +++ b/agent/checks/check_test.go @@ -49,7 +49,7 @@ func TestCheckMonitor_Script(t *testing.T) { Script: tt.script, Interval: 25 * time.Millisecond, Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), - OutputMaxSize: 4096, + OutputMaxSize: DefaultBufSize, } check.Start() defer check.Stop() @@ -85,7 +85,7 @@ func TestCheckMonitor_Args(t *testing.T) { ScriptArgs: tt.args, Interval: 25 * time.Millisecond, Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), - OutputMaxSize: 4096, + OutputMaxSize: DefaultBufSize, } check.Start() defer check.Stop() @@ -111,7 +111,7 @@ func TestCheckMonitor_Timeout(t *testing.T) { Interval: 50 * time.Millisecond, Timeout: 25 * time.Millisecond, Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), - OutputMaxSize: 4096, + OutputMaxSize: DefaultBufSize, } check.Start() defer check.Stop() @@ -136,7 +136,7 @@ func TestCheckMonitor_RandomStagger(t *testing.T) { ScriptArgs: []string{"sh", "-c", "exit 0"}, Interval: 25 * time.Millisecond, Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), - OutputMaxSize: 4096, + OutputMaxSize: DefaultBufSize, } check.Start() defer check.Stop() @@ -162,7 +162,7 @@ func TestCheckMonitor_LimitOutput(t *testing.T) { ScriptArgs: []string{"od", "-N", "81920", "/dev/urandom"}, Interval: 25 * time.Millisecond, Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), - OutputMaxSize: 4096, + OutputMaxSize: DefaultBufSize, } check.Start() defer check.Stop() @@ -170,7 +170,7 @@ func TestCheckMonitor_LimitOutput(t *testing.T) { time.Sleep(50 * time.Millisecond) // Allow for extra bytes for the truncation message - if len(notif.Output("foo")) > BufSize+100 { + if len(notif.Output("foo")) > DefaultBufSize+100 { t.Fatalf("output size is too long") } } @@ -292,7 +292,7 @@ func TestCheckHTTP(t *testing.T) { } // Body larger than 4k limit - body := bytes.Repeat([]byte{'a'}, 2*BufSize) + body := bytes.Repeat([]byte{'a'}, 2*DefaultBufSize) w.WriteHeader(tt.code) w.Write(body) })) @@ -304,7 +304,7 @@ func TestCheckHTTP(t *testing.T) { CheckID: types.CheckID("foo"), HTTP: server.URL, Method: tt.method, - OutputMaxSize: 4096, + OutputMaxSize: DefaultBufSize, Header: tt.header, Interval: 10 * time.Millisecond, Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), @@ -319,9 +319,9 @@ func TestCheckHTTP(t *testing.T) { if got, want := notif.State("foo"), tt.status; got != want { r.Fatalf("got state %q want %q", got, want) } - // Allow slightly more data than BufSize, for the header - if n := len(notif.Output("foo")); n > (BufSize + 256) { - r.Fatalf("output too long: %d (%d-byte limit)", n, BufSize) + // Allow slightly more data than DefaultBufSize, for the header + if n := len(notif.Output("foo")); n > (DefaultBufSize + 256) { + r.Fatalf("output too long: %d (%d-byte limit)", n, DefaultBufSize) } }) }) @@ -332,7 +332,7 @@ func TestCheckMaxOutputSize(t *testing.T) { t.Parallel() timeout := 5 * time.Millisecond server := httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, req *http.Request) { - body := bytes.Repeat([]byte{'x'}, 2*BufSize) + body := bytes.Repeat([]byte{'x'}, 2*DefaultBufSize) writer.WriteHeader(200) writer.Write(body) })) @@ -415,7 +415,7 @@ func TestCheckHTTP_disablesKeepAlives(t *testing.T) { func largeBodyHandler(code int) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { // Body larger than 4k limit - body := bytes.Repeat([]byte{'a'}, 2*BufSize) + body := bytes.Repeat([]byte{'a'}, 2*DefaultBufSize) w.WriteHeader(code) w.Write(body) }) diff --git a/agent/config/builder.go b/agent/config/builder.go index 3c8523f2d490..7a7a63e5164e 100644 --- a/agent/config/builder.go +++ b/agent/config/builder.go @@ -14,6 +14,7 @@ import ( "strings" "time" + "github.com/hashicorp/consul/agent/checks" "github.com/hashicorp/consul/agent/connect/ca" "github.com/hashicorp/consul/agent/consul" "github.com/hashicorp/consul/agent/structs" @@ -966,7 +967,7 @@ func (b *Builder) Validate(rt RuntimeConfig) error { return fmt.Errorf("'bootstrap_expect > 0' and 'bootstrap = true' are mutually exclusive") } if rt.CheckOutputMaxSize < 1 { - return fmt.Errorf("'check_output_max_size must be positive") + return fmt.Errorf("check_output_max_size must be positive, to discard check output use the discard_check_output flag") } if rt.AEInterval <= 0 { return fmt.Errorf("ae_interval cannot be %s. Must be positive", rt.AEInterval) @@ -1178,7 +1179,7 @@ func (b *Builder) checkVal(v *CheckDefinition) *structs.CheckDefinition { Timeout: b.durationVal(fmt.Sprintf("check[%s].timeout", id), v.Timeout), TTL: b.durationVal(fmt.Sprintf("check[%s].ttl", id), v.TTL), DeregisterCriticalServiceAfter: b.durationVal(fmt.Sprintf("check[%s].deregister_critical_service_after", id), v.DeregisterCriticalServiceAfter), - OutputMaxSize: b.intValWithDefault(v.OutputMaxSize, 4096), + OutputMaxSize: b.intValWithDefault(v.OutputMaxSize, checks.DefaultBufSize), } } diff --git a/agent/config/default.go b/agent/config/default.go index 10f6ec78e214..477c144355c4 100644 --- a/agent/config/default.go +++ b/agent/config/default.go @@ -4,6 +4,7 @@ import ( "fmt" "strconv" + "github.com/hashicorp/consul/agent/checks" "github.com/hashicorp/consul/agent/consul" "github.com/hashicorp/consul/version" ) @@ -49,7 +50,7 @@ func DefaultSource() Source { bind_addr = "0.0.0.0" bootstrap = false bootstrap_expect = 0 - check_output_max_size = 4096 + check_output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + ` check_update_interval = "5m" client_addr = "127.0.0.1" datacenter = "` + consul.DefaultDC + `" diff --git a/agent/config/runtime_test.go b/agent/config/runtime_test.go index 58322836a2f6..8db83356e8fc 100644 --- a/agent/config/runtime_test.go +++ b/agent/config/runtime_test.go @@ -18,6 +18,7 @@ import ( "testing" "time" + "github.com/hashicorp/consul/agent/checks" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/lib" "github.com/hashicorp/consul/sdk/testutil" @@ -2076,8 +2077,8 @@ func TestConfigFlagsAndEdgecases(t *testing.T) { }, patch: func(rt *RuntimeConfig) { rt.Checks = []*structs.CheckDefinition{ - &structs.CheckDefinition{Name: "a", ScriptArgs: []string{"/bin/true"}, OutputMaxSize: 4096}, - &structs.CheckDefinition{Name: "b", ScriptArgs: []string{"/bin/false"}, OutputMaxSize: 4096}, + &structs.CheckDefinition{Name: "a", ScriptArgs: []string{"/bin/true"}, OutputMaxSize: checks.DefaultBufSize}, + &structs.CheckDefinition{Name: "b", ScriptArgs: []string{"/bin/false"}, OutputMaxSize: checks.DefaultBufSize}, } rt.DataDir = dataDir }, @@ -2095,7 +2096,7 @@ func TestConfigFlagsAndEdgecases(t *testing.T) { }, patch: func(rt *RuntimeConfig) { rt.Checks = []*structs.CheckDefinition{ - &structs.CheckDefinition{Name: "a", GRPC: "localhost:12345/foo", GRPCUseTLS: true, OutputMaxSize: 4096}, + &structs.CheckDefinition{Name: "a", GRPC: "localhost:12345/foo", GRPCUseTLS: true, OutputMaxSize: checks.DefaultBufSize}, } rt.DataDir = dataDir }, @@ -2113,7 +2114,7 @@ func TestConfigFlagsAndEdgecases(t *testing.T) { }, patch: func(rt *RuntimeConfig) { rt.Checks = []*structs.CheckDefinition{ - &structs.CheckDefinition{Name: "a", AliasService: "foo", OutputMaxSize: 4096}, + &structs.CheckDefinition{Name: "a", AliasService: "foo", OutputMaxSize: checks.DefaultBufSize}, } rt.DataDir = dataDir }, @@ -2232,7 +2233,7 @@ func TestConfigFlagsAndEdgecases(t *testing.T) { DockerContainerID: "z", DeregisterCriticalServiceAfter: 10 * time.Second, ScriptArgs: []string{"a", "b"}, - OutputMaxSize: 4096, + OutputMaxSize: checks.DefaultBufSize, }, }, Weights: &structs.Weights{ @@ -2500,7 +2501,7 @@ func TestConfigFlagsAndEdgecases(t *testing.T) { { TCP: "127.0.0.1:2345", Interval: 10 * time.Second, - OutputMaxSize: 4096, + OutputMaxSize: checks.DefaultBufSize, }, }, Proxy: &structs.ConnectProxyConfig{ @@ -2596,7 +2597,7 @@ func TestConfigFlagsAndEdgecases(t *testing.T) { { TCP: "127.0.0.1:2345", Interval: 10 * time.Second, - OutputMaxSize: 4096, + OutputMaxSize: checks.DefaultBufSize, }, }, Proxy: &structs.ConnectProxyConfig{ @@ -3028,7 +3029,7 @@ func TestFullConfig(t *testing.T) { "f3r6xFtM": [ "RyuIdDWv", "QbxEcIUM" ] }, "method": "Dou0nGT5", - "output_max_size": 4096, + "output_max_size": ` + strconv.Itoa(checks.DefaultBufSize) + `, "tcp": "JY6fTTcw", "interval": "18714s", "docker_container_id": "qF66POS9", @@ -3055,7 +3056,7 @@ func TestFullConfig(t *testing.T) { "method": "aldrIQ4l", "tcp": "RJQND605", "interval": "22164s", - "output_max_size": 4096, + "output_max_size": ` + strconv.Itoa(checks.DefaultBufSize) + `, "docker_container_id": "ipgdFtjd", "shell": "qAeOYy0M", "tls_skip_verify": true, @@ -3079,7 +3080,7 @@ func TestFullConfig(t *testing.T) { "method": "gLrztrNw", "tcp": "4jG5casb", "interval": "28767s", - "output_max_size": 4096, + "output_max_size": ` + strconv.Itoa(checks.DefaultBufSize) + `, "docker_container_id": "THW6u7rL", "shell": "C1Zt3Zwh", "tls_skip_verify": true, @@ -3280,7 +3281,7 @@ func TestFullConfig(t *testing.T) { "method": "9afLm3Mj", "tcp": "fjiLFqVd", "interval": "23926s", - "output_max_size": 4096, + "output_max_size": ` + strconv.Itoa(checks.DefaultBufSize) + `, "docker_container_id": "dO5TtRHk", "shell": "e6q2ttES", "tls_skip_verify": true, @@ -3303,7 +3304,7 @@ func TestFullConfig(t *testing.T) { "method": "T66MFBfR", "tcp": "bNnNfx2A", "interval": "22224s", - "output_max_size": 4096, + "output_max_size": ` + strconv.Itoa(checks.DefaultBufSize) + `, "docker_container_id": "ipgdFtjd", "shell": "omVZq7Sz", "tls_skip_verify": true, @@ -3325,7 +3326,7 @@ func TestFullConfig(t *testing.T) { "method": "ciYHWors", "tcp": "FfvCwlqH", "interval": "12356s", - "output_max_size": 4096, + "output_max_size": ` + strconv.Itoa(checks.DefaultBufSize) + `, "docker_container_id": "HBndBU6R", "shell": "hVI33JjA", "tls_skip_verify": true, @@ -3361,7 +3362,7 @@ func TestFullConfig(t *testing.T) { "method": "X5DrovFc", "tcp": "ICbxkpSF", "interval": "24392s", - "output_max_size": 4096, + "output_max_size": ` + strconv.Itoa(checks.DefaultBufSize) + `, "docker_container_id": "ZKXr68Yb", "shell": "CEfzx0Fo", "tls_skip_verify": true, @@ -3400,7 +3401,7 @@ func TestFullConfig(t *testing.T) { "method": "5wkAxCUE", "tcp": "MN3oA9D2", "interval": "32718s", - "output_max_size": 4096, + "output_max_size": ` + strconv.Itoa(checks.DefaultBufSize) + `, "docker_container_id": "cU15LMet", "shell": "nEz9qz2l", "tls_skip_verify": true, @@ -3422,7 +3423,7 @@ func TestFullConfig(t *testing.T) { "method": "wzByP903", "tcp": "2exjZIGE", "interval": "5656s", - "output_max_size": 4096, + "output_max_size": ` + strconv.Itoa(checks.DefaultBufSize) + `, "docker_container_id": "5tDBWpfA", "shell": "rlTpLM8s", "tls_skip_verify": true, @@ -3605,7 +3606,7 @@ func TestFullConfig(t *testing.T) { method = "Dou0nGT5" tcp = "JY6fTTcw" interval = "18714s" - output_max_size = 4096 + output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + `, docker_container_id = "qF66POS9" shell = "sOnDy228" tls_skip_verify = true @@ -3630,7 +3631,7 @@ func TestFullConfig(t *testing.T) { method = "aldrIQ4l" tcp = "RJQND605" interval = "22164s" - output_max_size = 4096, + output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + `,, docker_container_id = "ipgdFtjd" shell = "qAeOYy0M" tls_skip_verify = true @@ -3654,7 +3655,7 @@ func TestFullConfig(t *testing.T) { method = "gLrztrNw" tcp = "4jG5casb" interval = "28767s" - output_max_size = 4096, + output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + `,, docker_container_id = "THW6u7rL" shell = "C1Zt3Zwh" tls_skip_verify = true @@ -3880,7 +3881,7 @@ func TestFullConfig(t *testing.T) { method = "T66MFBfR" tcp = "bNnNfx2A" interval = "22224s" - output_max_size = 4096 + output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + `, docker_container_id = "ipgdFtjd" shell = "omVZq7Sz" tls_skip_verify = true @@ -3902,7 +3903,7 @@ func TestFullConfig(t *testing.T) { method = "ciYHWors" tcp = "FfvCwlqH" interval = "12356s" - output_max_size = 4096 + output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + `, docker_container_id = "HBndBU6R" shell = "hVI33JjA" tls_skip_verify = true @@ -3938,7 +3939,7 @@ func TestFullConfig(t *testing.T) { method = "X5DrovFc" tcp = "ICbxkpSF" interval = "24392s" - output_max_size = 4096 + output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + `, docker_container_id = "ZKXr68Yb" shell = "CEfzx0Fo" tls_skip_verify = true @@ -3977,7 +3978,7 @@ func TestFullConfig(t *testing.T) { method = "5wkAxCUE" tcp = "MN3oA9D2" interval = "32718s" - output_max_size = 4096 + output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + `, docker_container_id = "cU15LMet" shell = "nEz9qz2l" tls_skip_verify = true @@ -3999,7 +4000,7 @@ func TestFullConfig(t *testing.T) { method = "wzByP903" tcp = "2exjZIGE" interval = "5656s" - output_max_size = 4096 + output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + `, docker_container_id = "5tDBWpfA" shell = "rlTpLM8s" tls_skip_verify = true @@ -4268,7 +4269,7 @@ func TestFullConfig(t *testing.T) { CAFile: "erA7T0PM", CAPath: "mQEN1Mfp", CertFile: "7s4QAzDk", - CheckOutputMaxSize: 4096, + CheckOutputMaxSize: checks.DefaultBufSize, Checks: []*structs.CheckDefinition{ &structs.CheckDefinition{ ID: "uAjE6m9Z", @@ -4286,7 +4287,7 @@ func TestFullConfig(t *testing.T) { Method: "aldrIQ4l", TCP: "RJQND605", Interval: 22164 * time.Second, - OutputMaxSize: 4096, + OutputMaxSize: checks.DefaultBufSize, DockerContainerID: "ipgdFtjd", Shell: "qAeOYy0M", TLSSkipVerify: true, @@ -4308,7 +4309,7 @@ func TestFullConfig(t *testing.T) { "qxvdnSE9": []string{"6wBPUYdF", "YYh8wtSZ"}, }, Method: "gLrztrNw", - OutputMaxSize: 4096, + OutputMaxSize: checks.DefaultBufSize, TCP: "4jG5casb", Interval: 28767 * time.Second, DockerContainerID: "THW6u7rL", @@ -4332,7 +4333,7 @@ func TestFullConfig(t *testing.T) { "f3r6xFtM": {"RyuIdDWv", "QbxEcIUM"}, }, Method: "Dou0nGT5", - OutputMaxSize: 4096, + OutputMaxSize: checks.DefaultBufSize, TCP: "JY6fTTcw", Interval: 18714 * time.Second, DockerContainerID: "qF66POS9", @@ -4501,7 +4502,7 @@ func TestFullConfig(t *testing.T) { "cVFpko4u": {"gGqdEB6k", "9LsRo22u"}, }, Method: "X5DrovFc", - OutputMaxSize: 4096, + OutputMaxSize: checks.DefaultBufSize, TCP: "ICbxkpSF", Interval: 24392 * time.Second, DockerContainerID: "ZKXr68Yb", @@ -4550,7 +4551,7 @@ func TestFullConfig(t *testing.T) { "1UJXjVrT": {"OJgxzTfk", "xZZrFsq7"}, }, Method: "5wkAxCUE", - OutputMaxSize: 4096, + OutputMaxSize: checks.DefaultBufSize, TCP: "MN3oA9D2", Interval: 32718 * time.Second, DockerContainerID: "cU15LMet", @@ -4572,7 +4573,7 @@ func TestFullConfig(t *testing.T) { "vr7wY7CS": {"EtCoNPPL", "9vAarJ5s"}, }, Method: "wzByP903", - OutputMaxSize: 4096, + OutputMaxSize: checks.DefaultBufSize, TCP: "2exjZIGE", Interval: 5656 * time.Second, DockerContainerID: "5tDBWpfA", @@ -4658,7 +4659,7 @@ func TestFullConfig(t *testing.T) { "SHOVq1Vv": {"jntFhyym", "GYJh32pp"}, }, Method: "T66MFBfR", - OutputMaxSize: 4096, + OutputMaxSize: checks.DefaultBufSize, TCP: "bNnNfx2A", Interval: 22224 * time.Second, DockerContainerID: "ipgdFtjd", @@ -4680,7 +4681,7 @@ func TestFullConfig(t *testing.T) { "p2UI34Qz": {"UsG1D0Qh", "NHhRiB6s"}, }, Method: "ciYHWors", - OutputMaxSize: 4096, + OutputMaxSize: checks.DefaultBufSize, TCP: "FfvCwlqH", Interval: 12356 * time.Second, DockerContainerID: "HBndBU6R", @@ -4702,7 +4703,7 @@ func TestFullConfig(t *testing.T) { "l4HwQ112": {"fk56MNlo", "dhLK56aZ"}, }, Method: "9afLm3Mj", - OutputMaxSize: 4096, + OutputMaxSize: checks.DefaultBufSize, TCP: "fjiLFqVd", Interval: 23926 * time.Second, DockerContainerID: "dO5TtRHk", @@ -5040,7 +5041,7 @@ func TestConfigDecodeBytes(t *testing.T) { func TestSanitize(t *testing.T) { rt := RuntimeConfig{ BindAddr: &net.IPAddr{IP: net.ParseIP("127.0.0.1")}, - CheckOutputMaxSize: 4096, + CheckOutputMaxSize: checks.DefaultBufSize, SerfAdvertiseAddrLAN: &net.TCPAddr{IP: net.ParseIP("1.2.3.4"), Port: 5678}, DNSAddrs: []net.Addr{ &net.TCPAddr{IP: net.ParseIP("1.2.3.4"), Port: 5678}, @@ -5064,7 +5065,7 @@ func TestSanitize(t *testing.T) { Token: "bar", Check: structs.CheckType{ Name: "blurb", - OutputMaxSize: 4096, + OutputMaxSize: checks.DefaultBufSize, }, Weights: &structs.Weights{ Passing: 67, @@ -5076,7 +5077,7 @@ func TestSanitize(t *testing.T) { &structs.CheckDefinition{ Name: "zoo", Token: "zope", - OutputMaxSize: 4096, + OutputMaxSize: checks.DefaultBufSize, }, }, } @@ -5116,7 +5117,7 @@ func TestSanitize(t *testing.T) { "CAPath": "", "CertFile": "", "CheckDeregisterIntervalMin": "0s", - "CheckOutputMaxSize": 4096, + "CheckOutputMaxSize": checks.DefaultBufSize, "CheckReapInterval": "0s", "CheckUpdateInterval": "0s", "Checks": [{ @@ -5133,7 +5134,7 @@ func TestSanitize(t *testing.T) { "Method": "", "Name": "zoo", "Notes": "", - "OutputMaxSize": 4096, + "OutputMaxSize": checks.DefaultBufSize, "ScriptArgs": [], "ServiceID": "", "Shell": "", @@ -5165,7 +5166,7 @@ func TestSanitize(t *testing.T) { "ConsulCoordinateUpdateMaxBatches": 0, "ConsulCoordinateUpdatePeriod": "15s", "ConsulRaftElectionTimeout": "0s", - "CheckOutputMaxSize": 4096, + "CheckOutputMaxSize": checks.DefaultBufSize, "ConsulRaftHeartbeatTimeout": "0s", "ConsulRaftLeaderLeaseTimeout": "0s", "GossipLANGossipInterval": "0s", @@ -5305,7 +5306,7 @@ func TestSanitize(t *testing.T) { "Method": "", "Name": "blurb", "Notes": "", - "OutputMaxSize": 4096, + "OutputMaxSize": checks.DefaultBufSize, "ScriptArgs": [], "Shell": "", "Status": "", diff --git a/agent/consul/config.go b/agent/consul/config.go index 5342df24c9a8..2a62b8c3dd7d 100644 --- a/agent/consul/config.go +++ b/agent/consul/config.go @@ -7,6 +7,7 @@ import ( "os" "time" + "github.com/hashicorp/consul/agent/checks" "github.com/hashicorp/consul/agent/consul/autopilot" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/lib" @@ -502,7 +503,7 @@ func DefaultConfig() *Config { CoordinateUpdateBatchSize: 128, CoordinateUpdateMaxBatches: 5, - CheckOutputMaxSize: 4096, + CheckOutputMaxSize: checks.DefaultBufSize, RPCRate: rate.Inf, RPCMaxBurst: 1000, From 79a02fa94dd116da4c6e3fd3d00d3812074c4109 Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Wed, 22 May 2019 11:50:23 +0200 Subject: [PATCH 04/14] Added documentation for `OutputMaxSize` and `check_output_max_size` --- website/source/api/agent/check.html.md | 6 ++++++ website/source/docs/agent/options.html.md | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/website/source/api/agent/check.html.md b/website/source/api/agent/check.html.md index dfb04d853d04..69d91cc31733 100644 --- a/website/source/api/agent/check.html.md +++ b/website/source/api/agent/check.html.md @@ -183,6 +183,12 @@ The table below shows this endpoint's support for case of a Script, HTTP, TCP, or gRPC check. Can be specified in the form of "10s" or "5m" (i.e., 10 seconds or 5 minutes, respectively). +- `OutputMaxSize` `(positive int: 4096)` - Allow to put a maximum size of text + for the given check. This value must be between 0 and 4096, by default, the value + is 4k. + The value can be further limited for all checks of a given agent using the + `check_output_max_size` flag in the agent. + - `TLSSkipVerify` `(bool: false)` - Specifies if the certificate for an HTTPS check should not be verified. diff --git a/website/source/docs/agent/options.html.md b/website/source/docs/agent/options.html.md index 5dadddf2a830..9f702fed6bd7 100644 --- a/website/source/docs/agent/options.html.md +++ b/website/source/docs/agent/options.html.md @@ -117,6 +117,14 @@ will exit with an error at startup. [go-sockaddr](https://godoc.org/github.com/hashicorp/go-sockaddr/template) template +* `-check_output_max_size - + Override the default limit of 4k for maximum size of checks, this is a value + between 1 and 4096, setting it to 0 is not allowed. + By limiting this size, it allows to put less pressure on Consul servers when + many checks are having a very large output in their checks. + In order to completely disable check output capture, it is possible to + use `discard_check_output`. + * `-client` - The address to which Consul will bind client interfaces, including the HTTP and DNS servers. By default, this is "127.0.0.1", allowing only loopback connections. In Consul From 8dcf910456ae3d3e78c437833bd6010c0537a8f0 Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Wed, 22 May 2019 14:27:48 +0200 Subject: [PATCH 05/14] Fixed errors in HCL for some unit tests in `runtime_test.go` --- agent/config/runtime_test.go | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/agent/config/runtime_test.go b/agent/config/runtime_test.go index 8db83356e8fc..405d8a1406d1 100644 --- a/agent/config/runtime_test.go +++ b/agent/config/runtime_test.go @@ -3606,7 +3606,7 @@ func TestFullConfig(t *testing.T) { method = "Dou0nGT5" tcp = "JY6fTTcw" interval = "18714s" - output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + `, + output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + ` docker_container_id = "qF66POS9" shell = "sOnDy228" tls_skip_verify = true @@ -3631,7 +3631,7 @@ func TestFullConfig(t *testing.T) { method = "aldrIQ4l" tcp = "RJQND605" interval = "22164s" - output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + `,, + output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + ` docker_container_id = "ipgdFtjd" shell = "qAeOYy0M" tls_skip_verify = true @@ -3655,7 +3655,7 @@ func TestFullConfig(t *testing.T) { method = "gLrztrNw" tcp = "4jG5casb" interval = "28767s" - output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + `,, + output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + ` docker_container_id = "THW6u7rL" shell = "C1Zt3Zwh" tls_skip_verify = true @@ -3881,7 +3881,7 @@ func TestFullConfig(t *testing.T) { method = "T66MFBfR" tcp = "bNnNfx2A" interval = "22224s" - output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + `, + output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + ` docker_container_id = "ipgdFtjd" shell = "omVZq7Sz" tls_skip_verify = true @@ -3903,7 +3903,7 @@ func TestFullConfig(t *testing.T) { method = "ciYHWors" tcp = "FfvCwlqH" interval = "12356s" - output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + `, + output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + ` docker_container_id = "HBndBU6R" shell = "hVI33JjA" tls_skip_verify = true @@ -3939,7 +3939,7 @@ func TestFullConfig(t *testing.T) { method = "X5DrovFc" tcp = "ICbxkpSF" interval = "24392s" - output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + `, + output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + ` docker_container_id = "ZKXr68Yb" shell = "CEfzx0Fo" tls_skip_verify = true @@ -3978,7 +3978,7 @@ func TestFullConfig(t *testing.T) { method = "5wkAxCUE" tcp = "MN3oA9D2" interval = "32718s" - output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + `, + output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + ` docker_container_id = "cU15LMet" shell = "nEz9qz2l" tls_skip_verify = true @@ -4000,7 +4000,7 @@ func TestFullConfig(t *testing.T) { method = "wzByP903" tcp = "2exjZIGE" interval = "5656s" - output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + `, + output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + ` docker_container_id = "5tDBWpfA" shell = "rlTpLM8s" tls_skip_verify = true @@ -5117,7 +5117,7 @@ func TestSanitize(t *testing.T) { "CAPath": "", "CertFile": "", "CheckDeregisterIntervalMin": "0s", - "CheckOutputMaxSize": checks.DefaultBufSize, + "CheckOutputMaxSize": ` + strconv.Itoa(checks.DefaultBufSize) + `, "CheckReapInterval": "0s", "CheckUpdateInterval": "0s", "Checks": [{ @@ -5134,7 +5134,7 @@ func TestSanitize(t *testing.T) { "Method": "", "Name": "zoo", "Notes": "", - "OutputMaxSize": checks.DefaultBufSize, + "OutputMaxSize": ` + strconv.Itoa(checks.DefaultBufSize) + `, "ScriptArgs": [], "ServiceID": "", "Shell": "", @@ -5166,7 +5166,7 @@ func TestSanitize(t *testing.T) { "ConsulCoordinateUpdateMaxBatches": 0, "ConsulCoordinateUpdatePeriod": "15s", "ConsulRaftElectionTimeout": "0s", - "CheckOutputMaxSize": checks.DefaultBufSize, + "CheckOutputMaxSize": ` + strconv.Itoa(checks.DefaultBufSize) + `, "ConsulRaftHeartbeatTimeout": "0s", "ConsulRaftLeaderLeaseTimeout": "0s", "GossipLANGossipInterval": "0s", @@ -5306,7 +5306,7 @@ func TestSanitize(t *testing.T) { "Method": "", "Name": "blurb", "Notes": "", - "OutputMaxSize": checks.DefaultBufSize, + "OutputMaxSize": ` + strconv.Itoa(checks.DefaultBufSize) + `, "ScriptArgs": [], "Shell": "", "Status": "", From bc0ace6a726dd277e786bb39b5ddfb6df954c77d Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Tue, 4 Jun 2019 18:05:04 +0200 Subject: [PATCH 06/14] Update website/source/docs/agent/options.html.md suggestion from @freddygv Co-Authored-By: Freddy --- website/source/docs/agent/options.html.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/source/docs/agent/options.html.md b/website/source/docs/agent/options.html.md index 9f702fed6bd7..bff3b8c49d59 100644 --- a/website/source/docs/agent/options.html.md +++ b/website/source/docs/agent/options.html.md @@ -117,7 +117,7 @@ will exit with an error at startup. [go-sockaddr](https://godoc.org/github.com/hashicorp/go-sockaddr/template) template -* `-check_output_max_size - +* `-check_output_max_size` - Override the default limit of 4k for maximum size of checks, this is a value between 1 and 4096, setting it to 0 is not allowed. By limiting this size, it allows to put less pressure on Consul servers when From a684019c0570fcd9dcd1ed05b42fdc710c5eecd9 Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Tue, 4 Jun 2019 18:05:49 +0200 Subject: [PATCH 07/14] Update agent/config/flags.go suggestion from @freddygv Co-Authored-By: Freddy --- agent/config/flags.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agent/config/flags.go b/agent/config/flags.go index 30bd61d58006..a1ea99bbabac 100644 --- a/agent/config/flags.go +++ b/agent/config/flags.go @@ -60,7 +60,7 @@ func AddFlags(fs *flag.FlagSet, f *Flags) { add(&f.Config.Bootstrap, "bootstrap", "Sets server to bootstrap mode.") add(&f.Config.BootstrapExpect, "bootstrap-expect", "Sets server to expect bootstrap mode.") add(&f.Config.ClientAddr, "client", "Sets the address to bind for client access. This includes RPC, DNS, HTTP, HTTPS and gRPC (if configured).") - add(&f.Config.CheckOutputMaxSize, "check_output_max_size", "Set the maximum default size of output for checks on this agent") + add(&f.Config.CheckOutputMaxSize, "check_output_max_size", "Sets the maximum output size for checks on this agent") add(&f.ConfigFiles, "config-dir", "Path to a directory to read configuration files from. This will read every file ending in '.json' as configuration in this directory in alphabetical order. Can be specified multiple times.") add(&f.ConfigFiles, "config-file", "Path to a file in JSON or HCL format with a matching file extension. Can be specified multiple times.") add(&f.ConfigFormat, "config-format", "Config files are in this format irrespective of their extension. Must be 'hcl' or 'json'") From 89b5a23a82283ad5846972b25f3261c8e8dc16df Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Fri, 14 Jun 2019 16:35:26 +0200 Subject: [PATCH 08/14] Updated doc --- website/source/docs/agent/options.html.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/source/docs/agent/options.html.md b/website/source/docs/agent/options.html.md index bff3b8c49d59..899fc72b0aba 100644 --- a/website/source/docs/agent/options.html.md +++ b/website/source/docs/agent/options.html.md @@ -118,8 +118,8 @@ will exit with an error at startup. template * `-check_output_max_size` - - Override the default limit of 4k for maximum size of checks, this is a value - between 1 and 4096, setting it to 0 is not allowed. + Override the default limit of 4k for maximum size of checks, this is a positive + value. By limiting this size, it allows to put less pressure on Consul servers when many checks are having a very large output in their checks. In order to completely disable check output capture, it is possible to From 0a95339c7bb3685104e2c4c2003ff5987ad7cbf8 Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Mon, 24 Jun 2019 17:38:03 +0200 Subject: [PATCH 09/14] Ensure TTL HealthChecks are properly truncated everywhere --- agent/agent_endpoint.go | 7 ------- agent/checks/check.go | 11 ++++++++++- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/agent/agent_endpoint.go b/agent/agent_endpoint.go index 4d9b4fb05dc1..e25f1ea6b197 100644 --- a/agent/agent_endpoint.go +++ b/agent/agent_endpoint.go @@ -19,7 +19,6 @@ import ( "github.com/hashicorp/consul/acl" cachetype "github.com/hashicorp/consul/agent/cache-types" - "github.com/hashicorp/consul/agent/checks" "github.com/hashicorp/consul/agent/debug" "github.com/hashicorp/consul/agent/local" "github.com/hashicorp/consul/agent/structs" @@ -715,12 +714,6 @@ func (s *HTTPServer) AgentCheckUpdate(resp http.ResponseWriter, req *http.Reques return nil, nil } - total := len(update.Output) - if total > checks.DefaultBufSize { - update.Output = fmt.Sprintf("%s ... (captured %d of %d bytes)", - update.Output[:checks.DefaultBufSize], checks.DefaultBufSize, total) - } - checkID := types.CheckID(strings.TrimPrefix(req.URL.Path, "/v1/agent/check/update/")) // Get the provided token, if any, and vet against any ACL policies. diff --git a/agent/checks/check.go b/agent/checks/check.go index 41d577928ee0..9997c0f807ca 100644 --- a/agent/checks/check.go +++ b/agent/checks/check.go @@ -223,12 +223,17 @@ type CheckTTL struct { stop bool stopCh chan struct{} stopLock sync.Mutex + + OutputMaxSize int } // Start is used to start a check ttl, runs until Stop() func (c *CheckTTL) Start() { c.stopLock.Lock() defer c.stopLock.Unlock() + if c.OutputMaxSize < 1 { + c.OutputMaxSize = DefaultBufSize + } c.stop = false c.stopCh = make(chan struct{}) c.timer = time.NewTimer(c.TTL) @@ -279,7 +284,11 @@ func (c *CheckTTL) getExpiredOutput() string { func (c *CheckTTL) SetStatus(status, output string) { c.Logger.Printf("[DEBUG] agent: Check %q status is now %s", c.CheckID, status) c.Notify.UpdateCheck(c.CheckID, status, output) - + total := len(output) + if total > c.OutputMaxSize { + output = fmt.Sprintf("%s ... (captured %d of %d bytes)", + output[:c.OutputMaxSize], c.OutputMaxSize, total) + } // Store the last output so we can retain it if the TTL expires. c.lastOutputLock.Lock() c.lastOutput = output From 2b3df47a9d0e11eeff7958b6cbfd13d62b56c083 Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Tue, 25 Jun 2019 00:12:15 +0200 Subject: [PATCH 10/14] Update website/source/api/agent/check.html.md suggestion from @freddygv Co-Authored-By: Freddy --- website/source/api/agent/check.html.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/source/api/agent/check.html.md b/website/source/api/agent/check.html.md index 69d91cc31733..a5ad23a5e40d 100644 --- a/website/source/api/agent/check.html.md +++ b/website/source/api/agent/check.html.md @@ -184,7 +184,7 @@ The table below shows this endpoint's support for or "5m" (i.e., 10 seconds or 5 minutes, respectively). - `OutputMaxSize` `(positive int: 4096)` - Allow to put a maximum size of text - for the given check. This value must be between 0 and 4096, by default, the value + for the given check. This value must be greater than 0, by default, the value is 4k. The value can be further limited for all checks of a given agent using the `check_output_max_size` flag in the agent. From a68ecd0fe7cfccc7fc8f585daa6c1b4d6dd9ad6c Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Wed, 26 Jun 2019 00:05:24 +0200 Subject: [PATCH 11/14] Properly truncate TTL checks Check that OutputMaxSize is properly taken into account. BUGFIX: properly update local state on update of TTL --- agent/agent.go | 21 ++++++++++++++------- agent/agent_endpoint_test.go | 2 +- agent/agent_test.go | 18 ++++++++++++++++-- agent/checks/check.go | 4 +++- 4 files changed, 34 insertions(+), 11 deletions(-) diff --git a/agent/agent.go b/agent/agent.go index 0ed64c82866d..e2f0985ebe8b 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -2259,10 +2259,11 @@ func (a *Agent) addCheck(check *structs.HealthCheck, chkType *structs.CheckType, } ttl := &checks.CheckTTL{ - Notify: a.State, - CheckID: check.CheckID, - TTL: chkType.TTL, - Logger: a.logger, + Notify: a.State, + CheckID: check.CheckID, + TTL: chkType.TTL, + Logger: a.logger, + OutputMaxSize: maxOutputSize, } // Restore persisted state, if any @@ -2878,8 +2879,14 @@ func (a *Agent) updateTTLCheck(checkID types.CheckID, status, output string) err } // Set the status through CheckTTL to reset the TTL. - check.SetStatus(status, output) - + outputTruncated := check.SetStatus(status, output) + stateCheck := a.State.Checks()[checkID] + if stateCheck != nil { + stateCheck.Output = outputTruncated + stateCheck.Status = status + } else { + a.logger.Printf("[WARN] agent: Unexpected missing state for check %v", checkID) + } // We don't write any files in dev mode so bail here. if a.config.DataDir == "" { return nil @@ -2887,7 +2894,7 @@ func (a *Agent) updateTTLCheck(checkID types.CheckID, status, output string) err // Persist the state so the TTL check can come up in a good state after // an agent restart, especially with long TTL values. - if err := a.persistCheckState(check, status, output); err != nil { + if err := a.persistCheckState(check, status, outputTruncated); err != nil { return fmt.Errorf("failed persisting state for check %q: %s", checkID, err) } diff --git a/agent/agent_endpoint_test.go b/agent/agent_endpoint_test.go index 44f619809397..18238dbe5f8a 100644 --- a/agent/agent_endpoint_test.go +++ b/agent/agent_endpoint_test.go @@ -2360,7 +2360,7 @@ func TestAgent_UpdateCheck(t *testing.T) { // isn't super brittle. state := a.State.Checks()["test"] if state.Status != api.HealthPassing || len(state.Output) > 2*checks.DefaultBufSize { - t.Fatalf("bad: %v", state) + t.Fatalf("bad: %v, (len:=%d)", state, len(state.Output)) } }) diff --git a/agent/agent_test.go b/agent/agent_test.go index fecd1c208d12..226ee32355bc 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -1405,7 +1405,7 @@ func TestAgent_updateTTLCheck(t *testing.T) { t.Parallel() a := NewTestAgent(t, t.Name(), "") defer a.Shutdown() - + checkBufSize := 100 health := &structs.HealthCheck{ Node: "foo", CheckID: "mem", @@ -1413,7 +1413,8 @@ func TestAgent_updateTTLCheck(t *testing.T) { Status: api.HealthCritical, } chk := &structs.CheckType{ - TTL: 15 * time.Second, + TTL: 15 * time.Second, + OutputMaxSize: checkBufSize, } // Add check and update it. @@ -1433,6 +1434,19 @@ func TestAgent_updateTTLCheck(t *testing.T) { if status.Output != "foo" { t.Fatalf("bad: %v", status) } + + if err := a.updateTTLCheck("mem", api.HealthCritical, strings.Repeat("--bad-- ", 5*checkBufSize)); err != nil { + t.Fatalf("err: %v", err) + } + + // Ensure we have a check mapping. + status = a.State.Checks()["mem"] + if status.Status != api.HealthCritical { + t.Fatalf("bad: %v", status) + } + if len(status.Output) > checkBufSize*2 { + t.Fatalf("bad: %v", len(status.Output)) + } } func TestAgent_PersistService(t *testing.T) { diff --git a/agent/checks/check.go b/agent/checks/check.go index 9997c0f807ca..8b6c4c971b3a 100644 --- a/agent/checks/check.go +++ b/agent/checks/check.go @@ -281,7 +281,8 @@ func (c *CheckTTL) getExpiredOutput() string { // SetStatus is used to update the status of the check, // and to renew the TTL. If expired, TTL is restarted. -func (c *CheckTTL) SetStatus(status, output string) { +// output is returned (might be truncated) +func (c *CheckTTL) SetStatus(status, output string) string { c.Logger.Printf("[DEBUG] agent: Check %q status is now %s", c.CheckID, status) c.Notify.UpdateCheck(c.CheckID, status, output) total := len(output) @@ -295,6 +296,7 @@ func (c *CheckTTL) SetStatus(status, output string) { c.lastOutputLock.Unlock() c.timer.Reset(c.TTL) + return output } // CheckHTTP is used to periodically make an HTTP request to From acce4eb08d98f19da58e3117e8dc14b4bdcaf56a Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Wed, 26 Jun 2019 01:03:23 +0200 Subject: [PATCH 12/14] No silent error when state of check TTL does not exist in agent --- agent/agent.go | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/agent/agent.go b/agent/agent.go index e2f0985ebe8b..205d089f1fca 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -2877,16 +2877,14 @@ func (a *Agent) updateTTLCheck(checkID types.CheckID, status, output string) err if !ok { return fmt.Errorf("CheckID %q does not have associated TTL", checkID) } - + stateCheck, ok := a.State.Checks()[checkID] + if !ok { + return fmt.Errorf("CheckID %q does not have a state check TTL", checkID) + } // Set the status through CheckTTL to reset the TTL. outputTruncated := check.SetStatus(status, output) - stateCheck := a.State.Checks()[checkID] - if stateCheck != nil { - stateCheck.Output = outputTruncated - stateCheck.Status = status - } else { - a.logger.Printf("[WARN] agent: Unexpected missing state for check %v", checkID) - } + stateCheck.Output = outputTruncated + stateCheck.Status = status // We don't write any files in dev mode so bail here. if a.config.DataDir == "" { return nil From 13710da19f9146ab3cb1460fc9d3e07b1e81ce80 Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Wed, 26 Jun 2019 01:20:56 +0200 Subject: [PATCH 13/14] Check that check_output_max_size is enforced properly --- agent/agent_endpoint_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/agent/agent_endpoint_test.go b/agent/agent_endpoint_test.go index 18238dbe5f8a..98516f05004c 100644 --- a/agent/agent_endpoint_test.go +++ b/agent/agent_endpoint_test.go @@ -18,7 +18,6 @@ import ( "time" "github.com/hashicorp/consul/acl" - "github.com/hashicorp/consul/agent/checks" "github.com/hashicorp/consul/agent/config" "github.com/hashicorp/consul/agent/connect" "github.com/hashicorp/consul/agent/debug" @@ -2299,7 +2298,8 @@ func TestAgent_FailCheck_ACLDeny(t *testing.T) { func TestAgent_UpdateCheck(t *testing.T) { t.Parallel() - a := NewTestAgent(t, t.Name(), "") + maxChecksSize := 256 + a := NewTestAgent(t, t.Name(), fmt.Sprintf("check_output_max_size=%d", maxChecksSize)) defer a.Shutdown() testrpc.WaitForTestAgent(t, a.RPC, "dc1") @@ -2340,7 +2340,7 @@ func TestAgent_UpdateCheck(t *testing.T) { t.Run("log output limit", func(t *testing.T) { args := checkUpdate{ Status: api.HealthPassing, - Output: strings.Repeat("-= bad -=", 5*checks.DefaultBufSize), + Output: strings.Repeat("-= bad -=", 5*maxChecksSize), } req, _ := http.NewRequest("PUT", "/v1/agent/check/update/test", jsonReader(args)) resp := httptest.NewRecorder() @@ -2359,7 +2359,7 @@ func TestAgent_UpdateCheck(t *testing.T) { // rough check that the output buffer was cut down so this test // isn't super brittle. state := a.State.Checks()["test"] - if state.Status != api.HealthPassing || len(state.Output) > 2*checks.DefaultBufSize { + if state.Status != api.HealthPassing || len(state.Output) > 2*maxChecksSize { t.Fatalf("bad: %v, (len:=%d)", state, len(state.Output)) } }) From 84781804aaa53891d8febe97edc6f1de6b0df552 Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Wed, 26 Jun 2019 09:58:49 +0200 Subject: [PATCH 14/14] Truncation of TTL check at proper place aka before notification --- agent/agent.go | 8 ++------ agent/checks/check.go | 2 +- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/agent/agent.go b/agent/agent.go index 205d089f1fca..6e877e7ab766 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -2877,14 +2877,10 @@ func (a *Agent) updateTTLCheck(checkID types.CheckID, status, output string) err if !ok { return fmt.Errorf("CheckID %q does not have associated TTL", checkID) } - stateCheck, ok := a.State.Checks()[checkID] - if !ok { - return fmt.Errorf("CheckID %q does not have a state check TTL", checkID) - } + // Set the status through CheckTTL to reset the TTL. outputTruncated := check.SetStatus(status, output) - stateCheck.Output = outputTruncated - stateCheck.Status = status + // We don't write any files in dev mode so bail here. if a.config.DataDir == "" { return nil diff --git a/agent/checks/check.go b/agent/checks/check.go index 8b6c4c971b3a..f61a68e519d3 100644 --- a/agent/checks/check.go +++ b/agent/checks/check.go @@ -284,12 +284,12 @@ func (c *CheckTTL) getExpiredOutput() string { // output is returned (might be truncated) func (c *CheckTTL) SetStatus(status, output string) string { c.Logger.Printf("[DEBUG] agent: Check %q status is now %s", c.CheckID, status) - c.Notify.UpdateCheck(c.CheckID, status, output) total := len(output) if total > c.OutputMaxSize { output = fmt.Sprintf("%s ... (captured %d of %d bytes)", output[:c.OutputMaxSize], c.OutputMaxSize, total) } + c.Notify.UpdateCheck(c.CheckID, status, output) // Store the last output so we can retain it if the TTL expires. c.lastOutputLock.Lock() c.lastOutput = output