Skip to content

Commit

Permalink
Allow ignoring checks by ID when defining a PreparedQuery. Fixes #3727.
Browse files Browse the repository at this point in the history
  • Loading branch information
banks committed Apr 10, 2018
1 parent 464bad0 commit 0d8993e
Show file tree
Hide file tree
Showing 10 changed files with 182 additions and 8 deletions.
15 changes: 15 additions & 0 deletions agent/consul/prepared_query/template_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"testing"

"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/types"
"github.com/mitchellh/copystructure"
)

Expand All @@ -32,6 +33,15 @@ var (
"${agent.segment}",
},
},
IgnoreCheckIDs: []types.CheckID{
"${name.full}",
"${name.prefix}",
"${name.suffix}",
"${match(0)}",
"${match(1)}",
"${match(2)}",
"${agent.segment}",
},
Tags: []string{
"${name.full}",
"${name.prefix}",
Expand Down Expand Up @@ -124,6 +134,7 @@ func TestTemplate_Compile(t *testing.T) {
query.Template.Type = structs.QueryTemplateTypeNamePrefixMatch
query.Template.Regexp = "^(hello)there$"
query.Service.Service = "${name.full}"
query.Service.IgnoreCheckIDs = []types.CheckID{"${match(1)}", "${agent.segment}"}
query.Service.Tags = []string{"${match(1)}", "${agent.segment}"}
backup, err := copystructure.Copy(query)
if err != nil {
Expand Down Expand Up @@ -151,6 +162,10 @@ func TestTemplate_Compile(t *testing.T) {
},
Service: structs.ServiceQuery{
Service: "hellothere",
IgnoreCheckIDs: []types.CheckID{
"hello",
"segment-foo",
},
Tags: []string{
"hello",
"segment-foo",
Expand Down
3 changes: 2 additions & 1 deletion agent/consul/prepared_query_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,8 @@ func (p *PreparedQuery) execute(query *structs.PreparedQuery,
}

// Filter out any unhealthy nodes.
nodes = nodes.Filter(query.Service.OnlyPassing)
nodes = nodes.FilterIgnore(query.Service.OnlyPassing,
query.Service.IgnoreCheckIDs)

// Apply the node metadata filters, if any.
if len(query.Service.NodeMeta) > 0 {
Expand Down
36 changes: 36 additions & 0 deletions agent/consul/prepared_query_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/hashicorp/consul/api"
"github.com/hashicorp/consul/testrpc"
"github.com/hashicorp/consul/testutil/retry"
"github.com/hashicorp/consul/types"
"github.com/hashicorp/net-rpc-msgpackrpc"
"github.com/hashicorp/serf/coordinate"
)
Expand Down Expand Up @@ -2076,6 +2077,41 @@ func TestPreparedQuery_Execute(t *testing.T) {
}
}

// Make the query ignore all our health checks (which have "failing" ID
// implicitly from their name).
query.Query.Service.IgnoreCheckIDs = []types.CheckID{"failing"}
if err := msgpackrpc.CallWithCodec(codec1, "PreparedQuery.Apply", &query, &query.Query.ID); err != nil {
t.Fatalf("err: %v", err)
}

// We should end up with 10 nodes again
{
req := structs.PreparedQueryExecuteRequest{
Datacenter: "dc1",
QueryIDOrName: query.Query.ID,
QueryOptions: structs.QueryOptions{Token: execToken},
}

var reply structs.PreparedQueryExecuteResponse
if err := msgpackrpc.CallWithCodec(codec1, "PreparedQuery.Execute", &req, &reply); err != nil {
t.Fatalf("err: %v", err)
}

if len(reply.Nodes) != 10 ||
reply.Datacenter != "dc1" ||
reply.Service != query.Query.Service.Service ||
!reflect.DeepEqual(reply.DNS, query.Query.DNS) ||
!reply.QueryMeta.KnownLeader {
t.Fatalf("bad: %v", reply)
}
}

// Undo that so all the following tests aren't broken!
query.Query.Service.IgnoreCheckIDs = nil
if err := msgpackrpc.CallWithCodec(codec1, "PreparedQuery.Apply", &query, &query.Query.ID); err != nil {
t.Fatalf("err: %v", err)
}

// Make the query more picky by adding a tag filter. This just proves we
// call into the tag filter, it is tested more thoroughly in a separate
// test.
Expand Down
15 changes: 9 additions & 6 deletions agent/prepared_query_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"testing"

"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/types"
)

// MockPreparedQuery is a fake endpoint that we inject into the Consul server
Expand Down Expand Up @@ -87,9 +88,10 @@ func TestPreparedQuery_Create(t *testing.T) {
NearestN: 4,
Datacenters: []string{"dc1", "dc2"},
},
OnlyPassing: true,
Tags: []string{"foo", "bar"},
NodeMeta: map[string]string{"somekey": "somevalue"},
IgnoreCheckIDs: []types.CheckID{"broken_check"},
OnlyPassing: true,
Tags: []string{"foo", "bar"},
NodeMeta: map[string]string{"somekey": "somevalue"},
},
DNS: structs.QueryDNSOptions{
TTL: "10s",
Expand Down Expand Up @@ -122,9 +124,10 @@ func TestPreparedQuery_Create(t *testing.T) {
"NearestN": 4,
"Datacenters": []string{"dc1", "dc2"},
},
"OnlyPassing": true,
"Tags": []string{"foo", "bar"},
"NodeMeta": map[string]string{"somekey": "somevalue"},
"IgnoreCheckIDs": []string{"broken_check"},
"OnlyPassing": true,
"Tags": []string{"foo", "bar"},
"NodeMeta": map[string]string{"somekey": "somevalue"},
},
"DNS": map[string]interface{}{
"TTL": "10s",
Expand Down
8 changes: 8 additions & 0 deletions agent/structs/prepared_query.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package structs

import "github.com/hashicorp/consul/types"

// QueryDatacenterOptions sets options about how we fail over if there are no
// healthy nodes in the local datacenter.
type QueryDatacenterOptions struct {
Expand Down Expand Up @@ -34,6 +36,12 @@ type ServiceQuery struct {
// discarded)
OnlyPassing bool

// IgnoreCheckIDs is an optional list of health check IDs to ignore when
// considering which nodes are healthy. It is useful as an emergency measure
// to temporarily override some health check that is producing false negatives
// for example.
IgnoreCheckIDs []types.CheckID

// Near allows the query to always prefer the node nearest the given
// node. If the node does not exist, results are returned in their
// normal randomly-shuffled order. Supplying the magic "_agent" value
Expand Down
17 changes: 17 additions & 0 deletions agent/structs/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -580,16 +580,33 @@ func (nodes CheckServiceNodes) Shuffle() {
// check if that option is selected). Note that this returns the filtered
// results AND modifies the receiver for performance.
func (nodes CheckServiceNodes) Filter(onlyPassing bool) CheckServiceNodes {
return nodes.FilterIgnore(onlyPassing, nil)
}

// FilterIgnore removes nodes that are failing health checks just like Filter.
// It also ignores the status of any check with an ID present in ignoreCheckIDs
// as if that check didn't exist. Note that this returns the filtered results
// AND modifies the receiver for performance.
func (nodes CheckServiceNodes) FilterIgnore(onlyPassing bool,
ignoreCheckIDs []types.CheckID) CheckServiceNodes {
n := len(nodes)
OUTER:
for i := 0; i < n; i++ {
node := nodes[i]
INNER:
for _, check := range node.Checks {
for _, ignore := range ignoreCheckIDs {
if check.CheckID == ignore {
// Skip this _check_ but keep looking at other checks for this node.
continue INNER
}
}
if check.Status == api.HealthCritical ||
(onlyPassing && check.Status != api.HealthPassing) {
nodes[i], nodes[n-1] = nodes[n-1], CheckServiceNode{}
n--
i--
// Skip this _node_ now we've swapped it off the end of the list.
continue OUTER
}
}
Expand Down
34 changes: 34 additions & 0 deletions agent/structs/structs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,20 @@ func TestStructs_CheckServiceNodes_Filter(t *testing.T) {
},
},
},
CheckServiceNode{
Node: &Node{
Node: "node4",
Address: "127.0.0.4",
},
Checks: HealthChecks{
// This check has a different ID to the others to ensure it is not
// ignored by accident
&HealthCheck{
CheckID: "failing2",
Status: api.HealthCritical,
},
},
},
}

// Test the case where warnings are allowed.
Expand Down Expand Up @@ -473,6 +487,26 @@ func TestStructs_CheckServiceNodes_Filter(t *testing.T) {
t.Fatalf("bad: %v", filtered)
}
}

// Allow failing checks to be ignored (note that the test checks have empty
// CheckID which is valid).
{
twiddle := make(CheckServiceNodes, len(nodes))
if n := copy(twiddle, nodes); n != len(nodes) {
t.Fatalf("bad: %d", n)
}
filtered := twiddle.FilterIgnore(true, []types.CheckID{""})
expected := CheckServiceNodes{
nodes[0],
nodes[1],
nodes[2], // Node 3's critical check should be ignored.
// Node 4 should still be failing since it's got a critical check with a
// non-ignored ID.
}
if !reflect.DeepEqual(filtered, expected) {
t.Fatalf("bad: %v", filtered)
}
}
}

func TestStructs_DirEntry_Clone(t *testing.T) {
Expand Down
6 changes: 6 additions & 0 deletions api/prepared_query.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ type ServiceQuery struct {
// local datacenter.
Failover QueryDatacenterOptions

// IgnoreCheckIDs is an optional list of health check IDs to ignore when
// considering which nodes are healthy. It is useful as an emergency measure
// to temporarily override some health check that is producing false negatives
// for example.
IgnoreCheckIDs []string

// If OnlyPassing is true then we will only include nodes with passing
// health checks (critical AND warning checks will cause a node to be
// discarded)
Expand Down
47 changes: 47 additions & 0 deletions api/prepared_query_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,53 @@ func TestAPI_PreparedQuery(t *testing.T) {
t.Fatalf("bad datacenter: %v", results)
}

// Add new node with failing health check.
reg2 := reg
reg2.Node = "failingnode"
reg2.Check = &AgentCheck{
Node: "failingnode",
ServiceID: "redis1",
ServiceName: "redis",
Name: "failingcheck",
Status: "critical",
}
retry.Run(t, func(r *retry.R) {
if _, err := catalog.Register(reg2, nil); err != nil {
r.Fatal(err)
}
if _, _, err := catalog.Node("failingnode", nil); err != nil {
r.Fatal(err)
}
})

// Execute by ID. Should return only healthy node.
results, _, err = query.Execute(def.ID, nil)
if err != nil {
t.Fatalf("err: %s", err)
}
if len(results.Nodes) != 1 || results.Nodes[0].Node.Node != "foobar" {
t.Fatalf("bad: %v", results)
}
if wan, ok := results.Nodes[0].Node.TaggedAddresses["wan"]; !ok || wan != "127.0.0.1" {
t.Fatalf("bad: %v", results)
}

// Update PQ with ignore rule for the failing check
def.Service.IgnoreCheckIDs = []string{"failingcheck"}
_, err = query.Update(def, nil)
if err != nil {
t.Fatalf("err: %s", err)
}

// Execute by ID. Should return BOTH nodes ignoring the failing check.
results, _, err = query.Execute(def.ID, nil)
if err != nil {
t.Fatalf("err: %s", err)
}
if len(results.Nodes) != 2 {
t.Fatalf("got %d nodes, want 2", len(results.Nodes))
}

// Delete it.
_, err = query.Delete(def.ID, nil)
if err != nil {
Expand Down
9 changes: 8 additions & 1 deletion website/source/api/query.html.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ section for more details about how prepared queries work with Consul's ACL syste
### Prepared Query Templates

Consul 0.6.4 and later support prepared query templates. These are created
similar to static templates, except with some additional fields and features.
similar to static queries, except with some additional fields and features.
Here is an example prepared query template:

```json
Expand Down Expand Up @@ -206,6 +206,13 @@ The table below shows this endpoint's support for
failover, even if it is selected by both `NearestN` and is listed in
`Datacenters`.

- `IgnoreCheckIDs` `(array<string>: nil)` - Specifies a list of check IDs that
should be ignored when filtering unhealthy instances. This is mostly useful
in an emergency or as a temporary measure when a health check is found to be
unreliable. Being able to ignore it in centrally-defined queries can be
simpler than de-registering the check as an interim solution until the check
can be fixed.

- `OnlyPassing` `(bool: false)` - Specifies the behavior of the query's health
check filtering. If this is set to false, the results will include nodes
with checks in the passing as well as the warning states. If this is set to
Expand Down

0 comments on commit 0d8993e

Please sign in to comment.