Skip to content

Commit

Permalink
Merge pull request #625 from hashicorp/f-maintcmd
Browse files Browse the repository at this point in the history
New "maint" command
  • Loading branch information
armon committed Jan 22, 2015
2 parents 85841cc + 5049a5e commit e1a5d53
Show file tree
Hide file tree
Showing 12 changed files with 574 additions and 29 deletions.
6 changes: 4 additions & 2 deletions api/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -278,9 +278,10 @@ func (a *Agent) ForceLeave(node string) error {

// EnableServiceMaintenance toggles service maintenance mode on
// for the given service ID.
func (a *Agent) EnableServiceMaintenance(serviceID string) error {
func (a *Agent) EnableServiceMaintenance(serviceID, reason string) error {
r := a.c.newRequest("PUT", "/v1/agent/service/maintenance/"+serviceID)
r.params.Set("enable", "true")
r.params.Set("reason", reason)
_, resp, err := requireOK(a.c.doRequest(r))
if err != nil {
return err
Expand All @@ -304,9 +305,10 @@ func (a *Agent) DisableServiceMaintenance(serviceID string) error {

// EnableNodeMaintenance toggles node maintenance mode on for the
// agent we are connected to.
func (a *Agent) EnableNodeMaintenance() error {
func (a *Agent) EnableNodeMaintenance(reason string) error {
r := a.c.newRequest("PUT", "/v1/agent/maintenance")
r.params.Set("enable", "true")
r.params.Set("reason", reason)
_, resp, err := requireOK(a.c.doRequest(r))
if err != nil {
return err
Expand Down
8 changes: 4 additions & 4 deletions api/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ func TestServiceMaintenance(t *testing.T) {
}

// Enable maintenance mode
if err := agent.EnableServiceMaintenance("redis"); err != nil {
if err := agent.EnableServiceMaintenance("redis", "broken"); err != nil {
t.Fatalf("err: %s", err)
}

Expand All @@ -331,7 +331,7 @@ func TestServiceMaintenance(t *testing.T) {
for _, check := range checks {
if strings.Contains(check.CheckID, "maintenance") {
found = true
if check.Status != "critical" {
if check.Status != "critical" || check.Notes != "broken" {
t.Fatalf("bad: %#v", checks)
}
}
Expand Down Expand Up @@ -364,7 +364,7 @@ func TestNodeMaintenance(t *testing.T) {
agent := c.Agent()

// Enable maintenance mode
if err := agent.EnableNodeMaintenance(); err != nil {
if err := agent.EnableNodeMaintenance("broken"); err != nil {
t.Fatalf("err: %s", err)
}

Expand All @@ -377,7 +377,7 @@ func TestNodeMaintenance(t *testing.T) {
for _, check := range checks {
if strings.Contains(check.CheckID, "maintenance") {
found = true
if check.Status != "critical" {
if check.Status != "critical" || check.Notes != "broken" {
t.Fatalf("bad: %#v", checks)
}
}
Expand Down
32 changes: 24 additions & 8 deletions command/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ const (
// The ID of the faux health checks for maintenance mode
serviceMaintCheckPrefix = "_service_maintenance"
nodeMaintCheckID = "_node_maintenance"

// Default reasons for node/service maintenance mode
defaultNodeMaintReason = "Maintenance mode is enabled for this node, " +
"but no reason was provided. This is a default message."
defaultServiceMaintReason = "Maintenance mode is enabled for this " +
"service, but no reason was provided. This is a default message."
)

/*
Expand Down Expand Up @@ -1020,7 +1026,7 @@ func serviceMaintCheckID(serviceID string) string {

// EnableServiceMaintenance will register a false health check against the given
// service ID with critical status. This will exclude the service from queries.
func (a *Agent) EnableServiceMaintenance(serviceID string) error {
func (a *Agent) EnableServiceMaintenance(serviceID, reason string) error {
service, ok := a.state.Services()[serviceID]
if !ok {
return fmt.Errorf("No service registered with ID %q", serviceID)
Expand All @@ -1032,18 +1038,23 @@ func (a *Agent) EnableServiceMaintenance(serviceID string) error {
return nil
}

// Use default notes if no reason provided
if reason == "" {
reason = defaultServiceMaintReason
}

// Create and register the critical health check
check := &structs.HealthCheck{
Node: a.config.NodeName,
CheckID: checkID,
Name: "Service Maintenance Mode",
Notes: "Maintenance mode is enabled for this service",
Notes: reason,
ServiceID: service.ID,
ServiceName: service.Service,
Status: structs.HealthCritical,
}
a.AddCheck(check, nil, true)
a.logger.Printf("[INFO] agent: service %q entered maintenance mode", serviceID)
a.logger.Printf("[INFO] agent: Service %q entered maintenance mode", serviceID)

return nil
}
Expand All @@ -1063,28 +1074,33 @@ func (a *Agent) DisableServiceMaintenance(serviceID string) error {

// Deregister the maintenance check
a.RemoveCheck(checkID, true)
a.logger.Printf("[INFO] agent: service %q left maintenance mode", serviceID)
a.logger.Printf("[INFO] agent: Service %q left maintenance mode", serviceID)

return nil
}

// EnableNodeMaintenance places a node into maintenance mode.
func (a *Agent) EnableNodeMaintenance() {
func (a *Agent) EnableNodeMaintenance(reason string) {
// Ensure node maintenance is not already enabled
if _, ok := a.state.Checks()[nodeMaintCheckID]; ok {
return
}

// Use a default notes value
if reason == "" {
reason = defaultNodeMaintReason
}

// Create and register the node maintenance check
check := &structs.HealthCheck{
Node: a.config.NodeName,
CheckID: nodeMaintCheckID,
Name: "Node Maintenance Mode",
Notes: "Maintenance mode is enabled for this node",
Notes: reason,
Status: structs.HealthCritical,
}
a.AddCheck(check, nil, true)
a.logger.Printf("[INFO] agent: node entered maintenance mode")
a.logger.Printf("[INFO] agent: Node entered maintenance mode")
}

// DisableNodeMaintenance removes a node from maintenance mode
Expand All @@ -1093,5 +1109,5 @@ func (a *Agent) DisableNodeMaintenance() {
return
}
a.RemoveCheck(nodeMaintCheckID, true)
a.logger.Printf("[INFO] agent: node left maintenance mode")
a.logger.Printf("[INFO] agent: Node left maintenance mode")
}
10 changes: 7 additions & 3 deletions command/agent/agent_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,17 +220,21 @@ func (s *HTTPServer) AgentServiceMaintenance(resp http.ResponseWriter, req *http
}

if enable {
if err = s.agent.EnableServiceMaintenance(serviceID); err != nil {
reason := params.Get("reason")
if err = s.agent.EnableServiceMaintenance(serviceID, reason); err != nil {
resp.WriteHeader(404)
resp.Write([]byte(err.Error()))
return nil, nil
}
} else {
if err = s.agent.DisableServiceMaintenance(serviceID); err != nil {
resp.WriteHeader(404)
resp.Write([]byte(err.Error()))
return nil, nil
}
}
return nil, err

return nil, nil
}

func (s *HTTPServer) AgentNodeMaintenance(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
Expand All @@ -257,7 +261,7 @@ func (s *HTTPServer) AgentNodeMaintenance(resp http.ResponseWriter, req *http.Re
}

if enable {
s.agent.EnableNodeMaintenance()
s.agent.EnableNodeMaintenance(params.Get("reason"))
} else {
s.agent.DisableNodeMaintenance()
}
Expand Down
29 changes: 21 additions & 8 deletions command/agent/agent_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -542,8 +542,8 @@ func TestHTTPAgent_ServiceMaintenanceEndpoint_BadRequest(t *testing.T) {
// Fails when bad service ID provided
req, _ = http.NewRequest("PUT", "/v1/agent/service/maintenance/_nope_?enable=true", nil)
resp = httptest.NewRecorder()
if _, err := srv.AgentServiceMaintenance(resp, req); err == nil {
t.Fatalf("should have errored")
if _, err := srv.AgentServiceMaintenance(resp, req); err != nil {
t.Fatalf("err: %s", err)
}
if resp.Code != 404 {
t.Fatalf("expected 404, got %d", resp.Code)
Expand All @@ -566,7 +566,7 @@ func TestHTTPAgent_EnableServiceMaintenance(t *testing.T) {
}

// Force the service into maintenance mode
req, _ := http.NewRequest("PUT", "/v1/agent/service/maintenance/test?enable=true", nil)
req, _ := http.NewRequest("PUT", "/v1/agent/service/maintenance/test?enable=true&reason=broken", nil)
resp := httptest.NewRecorder()
if _, err := srv.AgentServiceMaintenance(resp, req); err != nil {
t.Fatalf("err: %s", err)
Expand All @@ -577,9 +577,15 @@ func TestHTTPAgent_EnableServiceMaintenance(t *testing.T) {

// Ensure the maintenance check was registered
checkID := serviceMaintCheckID("test")
if _, ok := srv.agent.state.Checks()[checkID]; !ok {
check, ok := srv.agent.state.Checks()[checkID]
if !ok {
t.Fatalf("should have registered maintenance check")
}

// Ensure the reason was set in notes
if check.Notes != "broken" {
t.Fatalf("bad: %#v", check)
}
}

func TestHTTPAgent_DisableServiceMaintenance(t *testing.T) {
Expand All @@ -598,7 +604,7 @@ func TestHTTPAgent_DisableServiceMaintenance(t *testing.T) {
}

// Force the service into maintenance mode
if err := srv.agent.EnableServiceMaintenance("test"); err != nil {
if err := srv.agent.EnableServiceMaintenance("test", ""); err != nil {
t.Fatalf("err: %s", err)
}

Expand Down Expand Up @@ -653,7 +659,8 @@ func TestHTTPAgent_EnableNodeMaintenance(t *testing.T) {
defer srv.agent.Shutdown()

// Force the node into maintenance mode
req, _ := http.NewRequest("PUT", "/v1/agent/self/maintenance?enable=true", nil)
req, _ := http.NewRequest(
"PUT", "/v1/agent/self/maintenance?enable=true&reason=broken", nil)
resp := httptest.NewRecorder()
if _, err := srv.AgentNodeMaintenance(resp, req); err != nil {
t.Fatalf("err: %s", err)
Expand All @@ -663,9 +670,15 @@ func TestHTTPAgent_EnableNodeMaintenance(t *testing.T) {
}

// Ensure the maintenance check was registered
if _, ok := srv.agent.state.Checks()[nodeMaintCheckID]; !ok {
check, ok := srv.agent.state.Checks()[nodeMaintCheckID]
if !ok {
t.Fatalf("should have registered maintenance check")
}

// Ensure the reason was set in notes
if check.Notes != "broken" {
t.Fatalf("bad: %#v", check)
}
}

func TestHTTPAgent_DisableNodeMaintenance(t *testing.T) {
Expand All @@ -675,7 +688,7 @@ func TestHTTPAgent_DisableNodeMaintenance(t *testing.T) {
defer srv.agent.Shutdown()

// Force the node into maintenance mode
srv.agent.EnableNodeMaintenance()
srv.agent.EnableNodeMaintenance("")

// Leave maintenance mode
req, _ := http.NewRequest("PUT", "/v1/agent/self/maintenance?enable=false", nil)
Expand Down
46 changes: 42 additions & 4 deletions command/agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -916,16 +916,22 @@ func TestAgent_ServiceMaintenanceMode(t *testing.T) {
}

// Enter maintenance mode for the service
if err := agent.EnableServiceMaintenance("redis"); err != nil {
if err := agent.EnableServiceMaintenance("redis", "broken"); err != nil {
t.Fatalf("err: %s", err)
}

// Make sure the critical health check was added
checkID := serviceMaintCheckID("redis")
if _, ok := agent.state.Checks()[checkID]; !ok {
check, ok := agent.state.Checks()[checkID]
if !ok {
t.Fatalf("should have registered critical maintenance check")
}

// Ensure the reason was set in notes
if check.Notes != "broken" {
t.Fatalf("bad: %#v", check)
}

// Leave maintenance mode
if err := agent.DisableServiceMaintenance("redis"); err != nil {
t.Fatalf("err: %s", err)
Expand All @@ -935,6 +941,20 @@ func TestAgent_ServiceMaintenanceMode(t *testing.T) {
if _, ok := agent.state.Checks()[checkID]; ok {
t.Fatalf("should have deregistered maintenance check")
}

// Enter service maintenance mode without providing a reason
if err := agent.EnableServiceMaintenance("redis", ""); err != nil {
t.Fatalf("err: %s", err)
}

// Ensure the check was registered with the default notes
check, ok = agent.state.Checks()[checkID]
if !ok {
t.Fatalf("should have registered critical check")
}
if check.Notes != defaultServiceMaintReason {
t.Fatalf("bad: %#v", check)
}
}

func TestAgent_NodeMaintenanceMode(t *testing.T) {
Expand All @@ -944,18 +964,36 @@ func TestAgent_NodeMaintenanceMode(t *testing.T) {
defer agent.Shutdown()

// Enter maintenance mode for the node
agent.EnableNodeMaintenance()
agent.EnableNodeMaintenance("broken")

// Make sure the critical health check was added
if _, ok := agent.state.Checks()[nodeMaintCheckID]; !ok {
check, ok := agent.state.Checks()[nodeMaintCheckID]
if !ok {
t.Fatalf("should have registered critical node check")
}

// Ensure the reason was set in notes
if check.Notes != "broken" {
t.Fatalf("bad: %#v", check)
}

// Leave maintenance mode
agent.DisableNodeMaintenance()

// Ensure the check was deregistered
if _, ok := agent.state.Checks()[nodeMaintCheckID]; ok {
t.Fatalf("should have deregistered critical node check")
}

// Enter maintenance mode without passing a reason
agent.EnableNodeMaintenance("")

// Make sure the check was registered with the default note
check, ok = agent.state.Checks()[nodeMaintCheckID]
if !ok {
t.Fatalf("should have registered critical node check")
}
if check.Notes != defaultNodeMaintReason {
t.Fatalf("bad: %#v", check)
}
}
Loading

0 comments on commit e1a5d53

Please sign in to comment.