From 87c85191b39f658d5fbe8ea21ea026509d89a2dd Mon Sep 17 00:00:00 2001 From: James Phillips Date: Sat, 15 Jul 2017 00:15:12 -0700 Subject: [PATCH] Adds new config to make script checks opt-in, updates documentation. Fixes #3087. --- agent/agent.go | 11 +++++-- agent/agent_test.go | 31 +++++++++++++++++++ agent/config.go | 7 +++++ agent/config_test.go | 5 +++ agent/consul/structs/check_type.go | 5 +++ agent/testagent.go | 1 + command/agent.go | 1 + testutil/server.go | 4 ++- website/source/docs/agent/checks.html.md | 28 +++++++++++------ website/source/docs/agent/options.html.md | 9 ++++++ website/source/docs/guides/acl.html.md | 8 +++++ .../source/intro/getting-started/join.html.md | 4 +-- 12 files changed, 99 insertions(+), 15 deletions(-) diff --git a/agent/agent.go b/agent/agent.go index e81b91fc31b1..7ba7379a88f1 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -1595,8 +1595,15 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *structs.CheckType, if check.CheckID == "" { return fmt.Errorf("CheckID missing") } - if chkType != nil && !chkType.Valid() { - return fmt.Errorf("Check type is not valid") + + if chkType != nil { + if !chkType.Valid() { + return fmt.Errorf("Check type is not valid") + } + + if chkType.IsExec() && !a.config.CheckEnableExec { + return fmt.Errorf("Check types that exec scripts are disabled on this agent") + } } if check.ServiceID != "" { diff --git a/agent/agent_test.go b/agent/agent_test.go index 97f772cf4126..457b232f4f39 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -797,6 +797,37 @@ func TestAgent_AddCheck_RestoreState(t *testing.T) { } } +func TestAgent_AddCheck_ExecDisable(t *testing.T) { + t.Parallel() + + cfg := TestConfig() + cfg.CheckEnableExec = false + + a := NewTestAgent(t.Name(), cfg) + defer a.Shutdown() + + health := &structs.HealthCheck{ + Node: "foo", + CheckID: "mem", + Name: "memory util", + Status: api.HealthCritical, + } + chk := &structs.CheckType{ + Script: "exit 0", + Interval: 15 * time.Second, + } + err := a.AddCheck(health, chk, false, "") + if err == nil || !strings.Contains(err.Error(), "exec scripts are disabled on this agent") { + t.Fatalf("err: %v", err) + } + + // Ensure we don't have a check mapping + _, ok := a.state.Checks()["mem"] + if ok { + t.Fatalf("should be missing mem check") + } +} + func TestAgent_RemoveCheck(t *testing.T) { t.Parallel() a := NewTestAgent(t.Name(), nil) diff --git a/agent/config.go b/agent/config.go index 34506c6712a0..ef9a9ffea250 100644 --- a/agent/config.go +++ b/agent/config.go @@ -625,6 +625,10 @@ type Config struct { // true, we ignore the leave, and rejoin the cluster on start. RejoinAfterLeave bool `mapstructure:"rejoin_after_leave"` + // CheckEnableExec controls whether health checks which execute scripts + // are enabled. This includes regular script checks and Docker checks. + CheckEnableExec bool `mapstructure:"check_enable_exec"` + // CheckUpdateInterval controls the interval on which the output of a health check // is updated if there is no change to the state. For example, a check in a steady // state may run every 5 second generating a unique output (timestamp, etc), forcing @@ -1932,6 +1936,9 @@ func MergeConfig(a, b *Config) *Config { if b.DNSConfig.RecursorTimeout != 0 { result.DNSConfig.RecursorTimeout = b.DNSConfig.RecursorTimeout } + if b.CheckEnableExec { + result.CheckEnableExec = true + } if b.CheckUpdateIntervalRaw != "" || b.CheckUpdateInterval != 0 { result.CheckUpdateInterval = b.CheckUpdateInterval } diff --git a/agent/config_test.go b/agent/config_test.go index e4f0b16aab72..b83c2c1c0180 100644 --- a/agent/config_test.go +++ b/agent/config_test.go @@ -217,6 +217,10 @@ func TestDecodeConfig(t *testing.T) { in: `{"ca_path":"a"}`, c: &Config{CAPath: "a"}, }, + { + in: `{"check_enable_exec":true}`, + c: &Config{CheckEnableExec: true}, + }, { in: `{"check_update_interval":"2s"}`, c: &Config{CheckUpdateInterval: 2 * time.Second, CheckUpdateIntervalRaw: "2s"}, @@ -1363,6 +1367,7 @@ func TestMergeConfig(t *testing.T) { ReconnectTimeoutLan: 24 * time.Hour, ReconnectTimeoutWanRaw: "36h", ReconnectTimeoutWan: 36 * time.Hour, + CheckEnableExec: true, CheckUpdateInterval: 8 * time.Minute, CheckUpdateIntervalRaw: "8m", ACLToken: "1111", diff --git a/agent/consul/structs/check_type.go b/agent/consul/structs/check_type.go index 07e7007b6184..5ea9f8197894 100644 --- a/agent/consul/structs/check_type.go +++ b/agent/consul/structs/check_type.go @@ -47,6 +47,11 @@ func (c *CheckType) Valid() bool { return c.IsTTL() || c.IsMonitor() || c.IsHTTP() || c.IsTCP() || c.IsDocker() } +// IsExec checks if this is a check that execs some kind of script. +func (c *CheckType) IsExec() bool { + return c.Script != "" +} + // IsTTL checks if this is a TTL type func (c *CheckType) IsTTL() bool { return c.TTL != 0 diff --git a/agent/testagent.go b/agent/testagent.go index cf5d98c4db77..91f24ff4f4a5 100644 --- a/agent/testagent.go +++ b/agent/testagent.go @@ -314,6 +314,7 @@ func TestConfig() *Config { cfg.Datacenter = "dc1" cfg.Bootstrap = true cfg.Server = true + cfg.CheckEnableExec = true ccfg := consul.DefaultConfig() cfg.ConsulConfig = ccfg diff --git a/command/agent.go b/command/agent.go index 3a0aeb4d2037..222b7e69d001 100644 --- a/command/agent.go +++ b/command/agent.go @@ -80,6 +80,7 @@ func (cmd *AgentCommand) readConfig() *agent.Config { "A unique ID for this node across space and time. Defaults to a randomly-generated ID"+ " that persists in the data-dir.") + f.BoolVar(&cmdCfg.CheckEnableExec, "check-enable-exec", false, "Enables health check scripts.") var disableHostNodeID configutil.BoolValue f.Var(&disableHostNodeID, "disable-host-node-id", "Setting this to true will prevent Consul from using information from the"+ diff --git a/testutil/server.go b/testutil/server.go index 3cb959a47af4..aead8def381d 100644 --- a/testutil/server.go +++ b/testutil/server.go @@ -86,6 +86,7 @@ type TestServerConfig struct { VerifyIncomingRPC bool `json:"verify_incoming_rpc,omitempty"` VerifyIncomingHTTPS bool `json:"verify_incoming_https,omitempty"` VerifyOutgoing bool `json:"verify_outgoing,omitempty"` + CheckEnableExec bool `json:"check_enable_exec,omitempty"` ReadyTimeout time.Duration `json:"-"` Stdout, Stderr io.Writer `json:"-"` Args []string `json:"-"` @@ -124,7 +125,8 @@ func defaultServerConfig() *TestServerConfig { Server: randomPort(), RPC: randomPort(), }, - ReadyTimeout: 10 * time.Second, + CheckEnableExec: true, + ReadyTimeout: 10 * time.Second, } } diff --git a/website/source/docs/agent/checks.html.md b/website/source/docs/agent/checks.html.md index b4676fed3f7c..0f17822f6404 100644 --- a/website/source/docs/agent/checks.html.md +++ b/website/source/docs/agent/checks.html.md @@ -24,7 +24,9 @@ There are five different kinds of checks: a script check is limited to 4K. Output larger than this will be truncated. By default, Script checks will be configured with a timeout equal to 30 seconds. It is possible to configure a custom Script check timeout value by specifying the - `timeout` field in the check definition. + `timeout` field in the check definition. In Consul 0.9.0 and later, the agent + must be configured with [`check_enable_exec`](/docs/agent/options.html#_check_enable_exec) + set to `true` in order to enable script checks. * HTTP + Interval - These checks make an HTTP `GET` request every Interval (e.g. every 30 seconds) to the specified URL. The status of the service depends on @@ -74,15 +76,17 @@ There are five different kinds of checks: valid through the end of the TTL from the time of the last check. * Docker + Interval - These checks depend on invoking an external application which -is packaged within a Docker Container. The application is triggered within the running -container via the Docker Exec API. We expect that the Consul agent user has access -to either the Docker HTTP API or the unix socket. Consul uses ```$DOCKER_HOST``` to -determine the Docker API endpoint. The application is expected to run, perform a health -check of the service running inside the container, and exit with an appropriate exit code. -The check should be paired with an invocation interval. The shell on which the check -has to be performed is configurable which makes it possible to run containers which -have different shells on the same host. Check output for Docker is limited to -4K. Any output larger than this will be truncated. + is packaged within a Docker Container. The application is triggered within the running + container via the Docker Exec API. We expect that the Consul agent user has access + to either the Docker HTTP API or the unix socket. Consul uses ```$DOCKER_HOST``` to + determine the Docker API endpoint. The application is expected to run, perform a health + check of the service running inside the container, and exit with an appropriate exit code. + The check should be paired with an invocation interval. The shell on which the check + has to be performed is configurable which makes it possible to run containers which + have different shells on the same host. Check output for Docker is limited to + 4K. Any output larger than this will be truncated. In Consul 0.9.0 and later, the agent + must be configured with [`check_enable_exec`](/docs/agent/options.html#_check_enable_exec) + set to `true` in order to enable script checks. ## Check Definition @@ -210,6 +214,10 @@ This is the only convention that Consul depends on. Any output of the script will be captured and stored in the `notes` field so that it can be viewed by human operators. +In Consul 0.9.0 and later, the agent must be configured with +[`check_enable_exec`](/docs/agent/options.html#_check_enable_exec) set to `true` +in order to enable script checks. + ## Initial Health Check Status By default, when checks are registered against a Consul agent, the state is set diff --git a/website/source/docs/agent/options.html.md b/website/source/docs/agent/options.html.md index 88e8df4b3839..40596ec86278 100644 --- a/website/source/docs/agent/options.html.md +++ b/website/source/docs/agent/options.html.md @@ -96,6 +96,12 @@ will exit with an error at startup. This is an IP address that should be reachable by all other LAN nodes in the cluster. By default, the value follows the same rules as [`-bind` command-line flag](#_bind), and if this is not specified, the `-bind` option is used. This is available in Consul 0.7.1 and later. +* `check-enable-exec` This + controls whether [health checks that execute scripts](/docs/agent/checks.html) are enabled on + this agent, and defaults to `false` so operators must opt-in to allowing these. If enabled, + it is recommended to [enable ACLs](/docs/guides/acl.html) as well to control which users are + allowed to register new checks to execute scripts. This was added in Consul 0.9.0. + * `-client` - The address to which Consul will bind client interfaces, including the HTTP and DNS servers. By default, this is "127.0.0.1", allowing only loopback connections. @@ -606,6 +612,9 @@ Consul will not enable TLS for the HTTP API unless the `https` port has been ass PEM-encoded certificate. The certificate is provided to clients or servers to verify the agent's authenticity. It must be provided along with [`key_file`](#key_file). +* `check_enable_exec` Equivalent to the + [`-check-enable-exec` command-line flag](#_check_enable_exec). + * `check_update_interval` This interval controls how often check output from checks in a steady state is synchronized with the server. By default, this is diff --git a/website/source/docs/guides/acl.html.md b/website/source/docs/guides/acl.html.md index a2794fcd33bc..1554e1c41505 100644 --- a/website/source/docs/guides/acl.html.md +++ b/website/source/docs/guides/acl.html.md @@ -684,6 +684,10 @@ to use for registration events: [checks](/docs/agent/checks.html). Tokens may also be passed to the [HTTP API](/api/index.html) for operations that require them. +In addition to ACLs, in Consul 0.9.0 and later, the agent must be configured with +[`check_enable_exec`](/docs/agent/options.html#_check_enable_exec) set to `true` in order to enable +script checks. + #### Operator Rules The `operator` policy controls access to cluster-level operations in the @@ -866,6 +870,10 @@ to use for registration events: [checks](/docs/agent/checks.html). Tokens may also be passed to the [HTTP API](/api/index.html) for operations that require them. +In addition to ACLs, in Consul 0.9.0 and later, the agent must be configured with +[`check_enable_exec`](/docs/agent/options.html#_check_enable_exec) set to `true` in order to enable +script checks. + #### Session Rules The `session` policy controls access to [Session API](/api/session.html) operations. diff --git a/website/source/intro/getting-started/join.html.md b/website/source/intro/getting-started/join.html.md index 9169e87a4fea..71f40e51b709 100644 --- a/website/source/intro/getting-started/join.html.md +++ b/website/source/intro/getting-started/join.html.md @@ -81,7 +81,7 @@ All together, these settings yield a ```text vagrant@n1:~$ consul agent -server -bootstrap-expect=1 \ -data-dir=/tmp/consul -node=agent-one -bind=172.20.20.10 \ - -config-dir=/etc/consul.d + -check-enable-exec=true -config-dir=/etc/consul.d ... ``` @@ -102,7 +102,7 @@ All together, these settings yield a ```text vagrant@n2:~$ consul agent -data-dir=/tmp/consul -node=agent-two \ - -bind=172.20.20.11 -config-dir=/etc/consul.d + -bind=172.20.20.11 -check-enable-exec=true -config-dir=/etc/consul.d ... ```