Skip to content

Commit

Permalink
Merge pull request #3683 from hashicorp/check-logging-redux
Browse files Browse the repository at this point in the history
Cleans up check logging.
  • Loading branch information
slackpad authored Nov 10, 2017
2 parents b797f46 + 990fbbb commit aab5d26
Showing 1 changed file with 23 additions and 30 deletions.
53 changes: 23 additions & 30 deletions agent/checks/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,16 +103,13 @@ func (c *CheckMonitor) check() {
// Create the command
var cmd *osexec.Cmd
var err error
var cmdDisplay string
if len(c.ScriptArgs) > 0 {
cmdDisplay = fmt.Sprintf("%v", c.ScriptArgs)
cmd, err = exec.Subprocess(c.ScriptArgs)
} else {
cmdDisplay = c.Script
cmd, err = exec.Script(c.Script)
}
if err != nil {
c.Logger.Printf("[ERR] agent: failed to setup invoke '%s': %s", cmdDisplay, err)
c.Logger.Printf("[ERR] agent: Check %q failed to setup: %s", c.CheckID, err)
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
return
}
Expand All @@ -129,14 +126,13 @@ func (c *CheckMonitor) check() {
outputStr = fmt.Sprintf("Captured %d of %d bytes\n...\n%s",
output.Size(), output.TotalWritten(), outputStr)
}
c.Logger.Printf("[DEBUG] agent: Check '%s' script '%s' output: %s",
c.CheckID, cmdDisplay, outputStr)
c.Logger.Printf("[TRACE] agent: Check %q output: %s", c.CheckID, outputStr)
return outputStr
}

// Start the check
if err := cmd.Start(); err != nil {
c.Logger.Printf("[ERR] agent: failed to invoke '%s': %s", cmdDisplay, err)
c.Logger.Printf("[ERR] agent: Check %q failed to invoke: %s", c.CheckID, err)
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
return
}
Expand All @@ -154,11 +150,11 @@ func (c *CheckMonitor) check() {
select {
case <-time.After(timeout):
if err := exec.KillCommandSubtree(cmd); err != nil {
c.Logger.Printf("[WARN] Failed to kill check '%s' after timeout: %v", cmdDisplay, err)
c.Logger.Printf("[WARN] Check %q failed to kill after timeout: %s", c.CheckID, err)
}

msg := fmt.Sprintf("Timed out (%s) running check", timeout.String())
c.Logger.Printf("[WARN] %s '%s'", msg, cmdDisplay)
c.Logger.Printf("[WARN] Check %q: %s", c.CheckID, msg)

outputStr := truncateAndLogOutput()
if len(outputStr) > 0 {
Expand All @@ -178,7 +174,7 @@ func (c *CheckMonitor) check() {
// Check if the check passed
outputStr := truncateAndLogOutput()
if err == nil {
c.Logger.Printf("[DEBUG] agent: Check '%v' is passing", c.CheckID)
c.Logger.Printf("[DEBUG] agent: Check %q is passing", c.CheckID)
c.Notify.UpdateCheck(c.CheckID, api.HealthPassing, outputStr)
return
}
Expand All @@ -189,15 +185,15 @@ func (c *CheckMonitor) check() {
if status, ok := exitErr.Sys().(syscall.WaitStatus); ok {
code := status.ExitStatus()
if code == 1 {
c.Logger.Printf("[WARN] agent: Check '%v' is now warning", c.CheckID)
c.Logger.Printf("[WARN] agent: Check %q is now warning", c.CheckID)
c.Notify.UpdateCheck(c.CheckID, api.HealthWarning, outputStr)
return
}
}
}

// Set the health as critical
c.Logger.Printf("[WARN] agent: Check '%v' is now critical", c.CheckID)
c.Logger.Printf("[WARN] agent: Check %q is now critical", c.CheckID)
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, outputStr)
}

Expand Down Expand Up @@ -247,7 +243,7 @@ func (c *CheckTTL) run() {
for {
select {
case <-c.timer.C:
c.Logger.Printf("[WARN] agent: Check '%v' missed TTL, is now critical",
c.Logger.Printf("[WARN] agent: Check %q missed TTL, is now critical",
c.CheckID)
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, c.getExpiredOutput())

Expand All @@ -273,8 +269,7 @@ func (c *CheckTTL) getExpiredOutput() string {
// SetStatus is used to update the status of the check,
// and to renew the TTL. If expired, TTL is restarted.
func (c *CheckTTL) SetStatus(status, output string) {
c.Logger.Printf("[DEBUG] agent: Check '%v' status is now %v",
c.CheckID, status)
c.Logger.Printf("[DEBUG] agent: Check %q status is now %s", c.CheckID, status)
c.Notify.UpdateCheck(c.CheckID, status, output)

// Store the last output so we can retain it if the TTL expires.
Expand Down Expand Up @@ -358,7 +353,6 @@ func (c *CheckHTTP) Stop() {
func (c *CheckHTTP) run() {
// Get the randomized initial pause time
initialPauseTime := lib.RandomStagger(c.Interval)
c.Logger.Printf("[DEBUG] agent: pausing %v before first HTTP request of %s", initialPauseTime, c.HTTP)
next := time.After(initialPauseTime)
for {
select {
Expand All @@ -380,7 +374,7 @@ func (c *CheckHTTP) check() {

req, err := http.NewRequest(method, c.HTTP, nil)
if err != nil {
c.Logger.Printf("[WARN] agent: http request failed '%s': %s", c.HTTP, err)
c.Logger.Printf("[WARN] agent: Check %q HTTP request failed: %s", c.CheckID, err)
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
return
}
Expand All @@ -405,7 +399,7 @@ func (c *CheckHTTP) check() {

resp, err := c.httpClient.Do(req)
if err != nil {
c.Logger.Printf("[WARN] agent: http request failed '%s': %s", c.HTTP, err)
c.Logger.Printf("[WARN] agent: Check %q HTTP request failed: %s", c.CheckID, err)
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
return
}
Expand All @@ -414,27 +408,27 @@ func (c *CheckHTTP) check() {
// Read the response into a circular buffer to limit the size
output, _ := circbuf.NewBuffer(BufSize)
if _, err := io.Copy(output, resp.Body); err != nil {
c.Logger.Printf("[WARN] agent: Check '%v': Get error while reading body: %s", c.CheckID, err)
c.Logger.Printf("[WARN] agent: Check %q error while reading body: %s", c.CheckID, err)
}

// Format the response body
result := fmt.Sprintf("HTTP GET %s: %s Output: %s", c.HTTP, resp.Status, output.String())

if resp.StatusCode >= 200 && resp.StatusCode <= 299 {
// PASSING (2xx)
c.Logger.Printf("[DEBUG] agent: Check '%v' is passing", c.CheckID)
c.Logger.Printf("[DEBUG] agent: Check %q is passing", c.CheckID)
c.Notify.UpdateCheck(c.CheckID, api.HealthPassing, result)

} else if resp.StatusCode == 429 {
// WARNING
// 429 Too Many Requests (RFC 6585)
// The user has sent too many requests in a given amount of time.
c.Logger.Printf("[WARN] agent: Check '%v' is now warning", c.CheckID)
c.Logger.Printf("[WARN] agent: Check %q is now warning", c.CheckID)
c.Notify.UpdateCheck(c.CheckID, api.HealthWarning, result)

} else {
// CRITICAL
c.Logger.Printf("[WARN] agent: Check '%v' is now critical", c.CheckID)
c.Logger.Printf("[WARN] agent: Check %q is now critical", c.CheckID)
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, result)
}
}
Expand Down Expand Up @@ -496,7 +490,6 @@ func (c *CheckTCP) Stop() {
func (c *CheckTCP) run() {
// Get the randomized initial pause time
initialPauseTime := lib.RandomStagger(c.Interval)
c.Logger.Printf("[DEBUG] agent: pausing %v before first socket connection of %s", initialPauseTime, c.TCP)
next := time.After(initialPauseTime)
for {
select {
Expand All @@ -513,12 +506,12 @@ func (c *CheckTCP) run() {
func (c *CheckTCP) check() {
conn, err := c.dialer.Dial(`tcp`, c.TCP)
if err != nil {
c.Logger.Printf("[WARN] agent: socket connection failed '%s': %s", c.TCP, err)
c.Logger.Printf("[WARN] agent: Check %q socket connection failed: %s", c.CheckID, err)
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
return
}
conn.Close()
c.Logger.Printf("[DEBUG] agent: Check '%v' is passing", c.CheckID)
c.Logger.Printf("[DEBUG] agent: Check %q is passing", c.CheckID)
c.Notify.UpdateCheck(c.CheckID, api.HealthPassing, fmt.Sprintf("TCP connect %s: Success", c.TCP))
}

Expand Down Expand Up @@ -585,7 +578,7 @@ func (c *CheckDocker) check() {
var out string
status, b, err := c.doCheck()
if err != nil {
c.Logger.Printf("[DEBUG] agent: Check '%s': %s", c.CheckID, err)
c.Logger.Printf("[DEBUG] agent: Check %q: %s", c.CheckID, err)
out = err.Error()
} else {
// out is already limited to CheckBufSize since we're getting a
Expand All @@ -595,11 +588,11 @@ func (c *CheckDocker) check() {
if int(b.TotalWritten()) > len(out) {
out = fmt.Sprintf("Captured %d of %d bytes\n...\n%s", len(out), b.TotalWritten(), out)
}
c.Logger.Printf("[DEBUG] agent: Check '%s' script '%s' output: %s", c.CheckID, c.Script, out)
c.Logger.Printf("[TRACE] agent: Check %q output: %s", c.CheckID, out)
}

if status == api.HealthCritical {
c.Logger.Printf("[WARN] agent: Check '%v' is now critical", c.CheckID)
c.Logger.Printf("[WARN] agent: Check %q is now critical", c.CheckID)
}

c.Notify.UpdateCheck(c.CheckID, status, out)
Expand Down Expand Up @@ -632,10 +625,10 @@ func (c *CheckDocker) doCheck() (string, *circbuf.Buffer, error) {
case 0:
return api.HealthPassing, buf, nil
case 1:
c.Logger.Printf("[DEBUG] Check failed with exit code: %d", exitCode)
c.Logger.Printf("[DEBUG] Check %q failed with exit code: %d", c.CheckID, exitCode)
return api.HealthWarning, buf, nil
default:
c.Logger.Printf("[DEBUG] Check failed with exit code: %d", exitCode)
c.Logger.Printf("[DEBUG] Check %q failed with exit code: %d", c.CheckID, exitCode)
return api.HealthCritical, buf, nil
}
}

0 comments on commit aab5d26

Please sign in to comment.