Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds new config to make script checks opt-in, updates documentation. #3284

Merged
merged 7 commits into from
Jul 17, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -1595,8 +1595,15 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *structs.CheckType,
if check.CheckID == "" {
return fmt.Errorf("CheckID missing")
}
if chkType != nil && !chkType.Valid() {
return fmt.Errorf("Check type is not valid")

if chkType != nil {
if !chkType.Valid() {
return fmt.Errorf("Check type is not valid")
}

if chkType.IsScript() && !a.config.EnableScriptChecks {
return fmt.Errorf("Check types that exec scripts are disabled on this agent")
}
}

if check.ServiceID != "" {
Expand Down
49 changes: 44 additions & 5 deletions agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -628,7 +628,9 @@ func TestAgent_RemoveServiceRemovesAllChecks(t *testing.T) {

func TestAgent_AddCheck(t *testing.T) {
t.Parallel()
a := NewTestAgent(t.Name(), nil)
cfg := TestConfig()
cfg.EnableScriptChecks = true
a := NewTestAgent(t.Name(), cfg)
defer a.Shutdown()

health := &structs.HealthCheck{
Expand Down Expand Up @@ -665,7 +667,9 @@ func TestAgent_AddCheck(t *testing.T) {

func TestAgent_AddCheck_StartPassing(t *testing.T) {
t.Parallel()
a := NewTestAgent(t.Name(), nil)
cfg := TestConfig()
cfg.EnableScriptChecks = true
a := NewTestAgent(t.Name(), cfg)
defer a.Shutdown()

health := &structs.HealthCheck{
Expand Down Expand Up @@ -702,7 +706,9 @@ func TestAgent_AddCheck_StartPassing(t *testing.T) {

func TestAgent_AddCheck_MinInterval(t *testing.T) {
t.Parallel()
a := NewTestAgent(t.Name(), nil)
cfg := TestConfig()
cfg.EnableScriptChecks = true
a := NewTestAgent(t.Name(), cfg)
defer a.Shutdown()

health := &structs.HealthCheck{
Expand Down Expand Up @@ -735,7 +741,9 @@ func TestAgent_AddCheck_MinInterval(t *testing.T) {

func TestAgent_AddCheck_MissingService(t *testing.T) {
t.Parallel()
a := NewTestAgent(t.Name(), nil)
cfg := TestConfig()
cfg.EnableScriptChecks = true
a := NewTestAgent(t.Name(), cfg)
defer a.Shutdown()

health := &structs.HealthCheck{
Expand Down Expand Up @@ -797,11 +805,40 @@ func TestAgent_AddCheck_RestoreState(t *testing.T) {
}
}

func TestAgent_RemoveCheck(t *testing.T) {
func TestAgent_AddCheck_ExecDisable(t *testing.T) {
t.Parallel()

a := NewTestAgent(t.Name(), nil)
defer a.Shutdown()

health := &structs.HealthCheck{
Node: "foo",
CheckID: "mem",
Name: "memory util",
Status: api.HealthCritical,
}
chk := &structs.CheckType{
Script: "exit 0",
Interval: 15 * time.Second,
}
err := a.AddCheck(health, chk, false, "")
if err == nil || !strings.Contains(err.Error(), "exec scripts are disabled on this agent") {
t.Fatalf("err: %v", err)
}

// Ensure we don't have a check mapping
if memChk := a.state.Checks()["mem"]; memChk != nil {
t.Fatalf("should be missing mem check")
}
}

func TestAgent_RemoveCheck(t *testing.T) {
t.Parallel()
cfg := TestConfig()
cfg.EnableScriptChecks = true
a := NewTestAgent(t.Name(), cfg)
defer a.Shutdown()

// Remove check that doesn't exist
if err := a.RemoveCheck("mem", false); err != nil {
t.Fatalf("err: %v", err)
Expand Down Expand Up @@ -1097,6 +1134,7 @@ func TestAgent_PersistCheck(t *testing.T) {
cfg := TestConfig()
cfg.Server = false
cfg.DataDir = testutil.TempDir(t, "agent") // we manage the data dir
cfg.EnableScriptChecks = true
a := NewTestAgent(t.Name(), cfg)
defer os.RemoveAll(cfg.DataDir)
defer a.Shutdown()
Expand Down Expand Up @@ -1230,6 +1268,7 @@ func TestAgent_PurgeCheckOnDuplicate(t *testing.T) {
cfg := TestConfig()
cfg.Server = false
cfg.DataDir = testutil.TempDir(t, "agent") // we manage the data dir
cfg.EnableScriptChecks = true
a := NewTestAgent(t.Name(), cfg)
defer os.RemoveAll(cfg.DataDir)
defer a.Shutdown()
Expand Down
8 changes: 8 additions & 0 deletions agent/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,11 @@ type Config struct {
// true, we ignore the leave, and rejoin the cluster on start.
RejoinAfterLeave bool `mapstructure:"rejoin_after_leave"`

// EnableScriptChecks controls whether health checks which execute
// scripts are enabled. This includes regular script checks and Docker
// checks.
EnableScriptChecks bool `mapstructure:"enable_script_checks"`

// CheckUpdateInterval controls the interval on which the output of a health check
// is updated if there is no change to the state. For example, a check in a steady
// state may run every 5 second generating a unique output (timestamp, etc), forcing
Expand Down Expand Up @@ -1932,6 +1937,9 @@ func MergeConfig(a, b *Config) *Config {
if b.DNSConfig.RecursorTimeout != 0 {
result.DNSConfig.RecursorTimeout = b.DNSConfig.RecursorTimeout
}
if b.EnableScriptChecks {
result.EnableScriptChecks = true
}
if b.CheckUpdateIntervalRaw != "" || b.CheckUpdateInterval != 0 {
result.CheckUpdateInterval = b.CheckUpdateInterval
}
Expand Down
5 changes: 5 additions & 0 deletions agent/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,10 @@ func TestDecodeConfig(t *testing.T) {
in: `{"disable_keyring_file":true}`,
c: &Config{DisableKeyringFile: true},
},
{
in: `{"enable_script_checks":true}`,
c: &Config{EnableScriptChecks: true},
},
{
in: `{"encrypt_verify_incoming":true}`,
c: &Config{EncryptVerifyIncoming: Bool(true)},
Expand Down Expand Up @@ -1363,6 +1367,7 @@ func TestMergeConfig(t *testing.T) {
ReconnectTimeoutLan: 24 * time.Hour,
ReconnectTimeoutWanRaw: "36h",
ReconnectTimeoutWan: 36 * time.Hour,
EnableScriptChecks: true,
CheckUpdateInterval: 8 * time.Minute,
CheckUpdateIntervalRaw: "8m",
ACLToken: "1111",
Expand Down
5 changes: 5 additions & 0 deletions agent/consul/structs/check_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ func (c *CheckType) Valid() bool {
return c.IsTTL() || c.IsMonitor() || c.IsHTTP() || c.IsTCP() || c.IsDocker()
}

// IsScript checks if this is a check that execs some kind of script.
func (c *CheckType) IsScript() bool {
return c.Script != ""
}

// IsTTL checks if this is a TTL type
func (c *CheckType) IsTTL() bool {
return c.TTL != 0
Expand Down
4 changes: 3 additions & 1 deletion api/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,9 @@ func TestAPI_AgentChecks_serviceBound(t *testing.T) {

func TestAPI_AgentChecks_Docker(t *testing.T) {
t.Parallel()
c, s := makeClient(t)
c, s := makeClientWithConfig(t, nil, func(c *testutil.TestServerConfig) {
c.EnableScriptChecks = true
})
defer s.Stop()

agent := c.Agent()
Expand Down
1 change: 1 addition & 0 deletions command/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ func (cmd *AgentCommand) readConfig() *agent.Config {
"A unique ID for this node across space and time. Defaults to a randomly-generated ID"+
" that persists in the data-dir.")

f.BoolVar(&cmdCfg.EnableScriptChecks, "enable-script-checks", false, "Enables health check scripts.")
var disableHostNodeID configutil.BoolValue
f.Var(&disableHostNodeID, "disable-host-node-id",
"Setting this to true will prevent Consul from using information from the"+
Expand Down
1 change: 1 addition & 0 deletions testutil/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ type TestServerConfig struct {
VerifyIncomingRPC bool `json:"verify_incoming_rpc,omitempty"`
VerifyIncomingHTTPS bool `json:"verify_incoming_https,omitempty"`
VerifyOutgoing bool `json:"verify_outgoing,omitempty"`
EnableScriptChecks bool `json:"enable_script_checks,omitempty"`
ReadyTimeout time.Duration `json:"-"`
Stdout, Stderr io.Writer `json:"-"`
Args []string `json:"-"`
Expand Down
32 changes: 20 additions & 12 deletions website/source/docs/agent/checks.html.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,12 @@ There are five different kinds of checks:
that performs the health check, exits with an appropriate exit code, and potentially
generates some output. A script is paired with an invocation interval (e.g.
every 30 seconds). This is similar to the Nagios plugin system. The output of
a script check is limited to 4K. Output larger than this will be truncated.
a script check is limited to 4KB. Output larger than this will be truncated.
By default, Script checks will be configured with a timeout equal to 30 seconds.
It is possible to configure a custom Script check timeout value by specifying the
`timeout` field in the check definition.
`timeout` field in the check definition. In Consul 0.9.0 and later, the agent
must be configured with [`enable_script_checks`](/docs/agent/options.html#_enable_script_checks)
set to `true` in order to enable script checks.

* HTTP + Interval - These checks make an HTTP `GET` request every Interval (e.g.
every 30 seconds) to the specified URL. The status of the service depends on
Expand All @@ -38,7 +40,7 @@ There are five different kinds of checks:
configured with a request timeout equal to the check interval, with a max of
10 seconds. It is possible to configure a custom HTTP check timeout value by
specifying the `timeout` field in the check definition. The output of the
check is limited to roughly 4K. Responses larger than this will be truncated.
check is limited to roughly 4KB. Responses larger than this will be truncated.
HTTP checks also support SSL. By default, a valid SSL certificate is expected.
Certificate verification can be turned off by setting the `tls_skip_verify`
field to `true` in the check definition.
Expand Down Expand Up @@ -74,15 +76,17 @@ There are five different kinds of checks:
valid through the end of the TTL from the time of the last check.

* Docker + Interval - These checks depend on invoking an external application which
is packaged within a Docker Container. The application is triggered within the running
container via the Docker Exec API. We expect that the Consul agent user has access
to either the Docker HTTP API or the unix socket. Consul uses ```$DOCKER_HOST``` to
determine the Docker API endpoint. The application is expected to run, perform a health
check of the service running inside the container, and exit with an appropriate exit code.
The check should be paired with an invocation interval. The shell on which the check
has to be performed is configurable which makes it possible to run containers which
have different shells on the same host. Check output for Docker is limited to
4K. Any output larger than this will be truncated.
is packaged within a Docker Container. The application is triggered within the running
container via the Docker Exec API. We expect that the Consul agent user has access
to either the Docker HTTP API or the unix socket. Consul uses ```$DOCKER_HOST``` to
determine the Docker API endpoint. The application is expected to run, perform a health
check of the service running inside the container, and exit with an appropriate exit code.
The check should be paired with an invocation interval. The shell on which the check
has to be performed is configurable which makes it possible to run containers which
have different shells on the same host. Check output for Docker is limited to
4KB. Any output larger than this will be truncated. In Consul 0.9.0 and later, the agent
must be configured with [`enable_script_checks`](/docs/agent/options.html#_enable_script_checks)
set to `true` in order to enable Docker health checks.

## Check Definition

Expand Down Expand Up @@ -210,6 +214,10 @@ This is the only convention that Consul depends on. Any output of the script
will be captured and stored in the `notes` field so that it can be viewed
by human operators.

In Consul 0.9.0 and later, the agent must be configured with
[`enable_script_checks`](/docs/agent/options.html#_enable_script_checks) set to `true`
in order to enable script checks.

## Initial Health Check Status

By default, when checks are registered against a Consul agent, the state is set
Expand Down
17 changes: 13 additions & 4 deletions website/source/docs/agent/options.html.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,13 +147,23 @@ will exit with an error at startup.
[Nomad](https://www.nomadproject.io/), so if you opt-in to host-based IDs then Consul and Nomad will use
information on the host to automatically assign the same ID in both systems.

* <a name="_disable_keyring_file"></a><a href="#_disable_keyring_file">`-disable-keyring-file`</a> - If set,
the keyring will not be persisted to a file. Any installed keys will be lost on shutdown, and only the given
`-encrypt` key will be available on startup. This defaults to false.

* <a name="_dns_port"></a><a href="#_dns_port">`-dns-port`</a> - the DNS port to listen on.
This overrides the default port 8600. This is available in Consul 0.7 and later.

* <a name="_domain"></a><a href="#_domain">`-domain`</a> - By default, Consul responds to DNS queries
in the "consul." domain. This flag can be used to change that domain. All queries in this domain
are assumed to be handled by Consul and will not be recursively resolved.

* <a name="_enable_script_checks"></a><a href="#_enable_script_checks">`enable-script-checks`</a> This
controls whether [health checks that execute scripts](/docs/agent/checks.html) are enabled on
this agent, and defaults to `false` so operators must opt-in to allowing these. If enabled,
it is recommended to [enable ACLs](/docs/guides/acl.html) as well to control which users are
allowed to register new checks to execute scripts. This was added in Consul 0.9.0.

* <a name="_encrypt"></a><a href="#_encrypt">`-encrypt`</a> - Specifies the secret key to
use for encryption of Consul
network traffic. This key must be 16-bytes that are Base64-encoded. The
Expand All @@ -167,10 +177,6 @@ will exit with an error at startup.
initialized with an encryption key, then the provided key is ignored and
a warning will be displayed.

* <a name="_disable_keyring_file"></a><a href="#_disable_keyring_file">`-disable-keyring-file`</a> - If set,
the keyring will not be persisted to a file. Any installed keys will be lost on shutdown, and only the given
`-encrypt` key will be available on startup. This defaults to false.

* <a name="_http_port"></a><a href="#_http_port">`-http-port`</a> - the HTTP API port to listen on.
This overrides the default port 8500. This option is very useful when deploying Consul
to an environment which communicates the HTTP port through the environment e.g. PaaS like CloudFoundry, allowing
Expand Down Expand Up @@ -712,6 +718,9 @@ Consul will not enable TLS for the HTTP API unless the `https` port has been ass
* <a name="enable_debug"></a><a href="#enable_debug">`enable_debug`</a> When set, enables some
additional debugging features. Currently, this is only used to set the runtime profiling HTTP endpoints.

* <a name="enable_script_checks"></a><a href="#enable_script_checks">`enable_script_checks`</a> Equivalent to the
[`-enable-script-checks` command-line flag](#_enable_script_checks).

* <a name="enable_syslog"></a><a href="#enable_syslog">`enable_syslog`</a> Equivalent to
the [`-syslog` command-line flag](#_syslog).

Expand Down
8 changes: 8 additions & 0 deletions website/source/docs/guides/acl.html.md
Original file line number Diff line number Diff line change
Expand Up @@ -684,6 +684,10 @@ to use for registration events:
[checks](/docs/agent/checks.html). Tokens may also be passed to the
[HTTP API](/api/index.html) for operations that require them.

In addition to ACLs, in Consul 0.9.0 and later, the agent must be configured with
[`enable_script_checks`](/docs/agent/options.html#_enable_script_checks) set to `true` in order to enable
script checks.

#### Operator Rules

The `operator` policy controls access to cluster-level operations in the
Expand Down Expand Up @@ -866,6 +870,10 @@ to use for registration events:
[checks](/docs/agent/checks.html). Tokens may also be passed to the
[HTTP API](/api/index.html) for operations that require them.

In addition to ACLs, in Consul 0.9.0 and later, the agent must be configured with
[`enable_script_checks`](/docs/agent/options.html#_enable_script_checks) set to `true` in order to enable
script checks.

#### Session Rules

The `session` policy controls access to [Session API](/api/session.html) operations.
Expand Down
10 changes: 8 additions & 2 deletions website/source/intro/getting-started/join.html.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,12 @@ the replicated log until the expected number of servers has successfully joined.
You can read more about this in the [bootstrapping
guide](/docs/guides/bootstrapping.html).

We've included the [`-enable_script_checks`](/docs/agent/options.html#_enable_script_checks)
flag set to `true` in order to enable health checks that can execute external scripts.
This will be used in examples later. For production use, you'd want to configure
[ACLs](/docs/guides/acl.html) in conjunction with this to control the ability to
register arbitrary scripts.

Finally, we add the [`config-dir` flag](/docs/agent/options.html#_config_dir),
marking where service and check definitions can be found.

Expand All @@ -81,7 +87,7 @@ All together, these settings yield a
```text
vagrant@n1:~$ consul agent -server -bootstrap-expect=1 \
-data-dir=/tmp/consul -node=agent-one -bind=172.20.20.10 \
-config-dir=/etc/consul.d
-enable-script-checks=true -config-dir=/etc/consul.d
...
```

Expand All @@ -102,7 +108,7 @@ All together, these settings yield a

```text
vagrant@n2:~$ consul agent -data-dir=/tmp/consul -node=agent-two \
-bind=172.20.20.11 -config-dir=/etc/consul.d
-bind=172.20.20.11 -enable-script-checks=true -config-dir=/etc/consul.d
...
```

Expand Down