diff --git a/agent/checks/check.go b/agent/checks/check.go index 821a03dca34b..9e941d8318e6 100644 --- a/agent/checks/check.go +++ b/agent/checks/check.go @@ -103,16 +103,13 @@ func (c *CheckMonitor) check() { // Create the command var cmd *osexec.Cmd var err error - var cmdDisplay string if len(c.ScriptArgs) > 0 { - cmdDisplay = fmt.Sprintf("%v", c.ScriptArgs) cmd, err = exec.Subprocess(c.ScriptArgs) } else { - cmdDisplay = c.Script cmd, err = exec.Script(c.Script) } if err != nil { - c.Logger.Printf("[ERR] agent: failed to setup invoke '%s': %s", cmdDisplay, err) + c.Logger.Printf("[ERR] agent: Check %q failed to setup: %s", c.CheckID, err) c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error()) return } @@ -129,14 +126,13 @@ func (c *CheckMonitor) check() { outputStr = fmt.Sprintf("Captured %d of %d bytes\n...\n%s", output.Size(), output.TotalWritten(), outputStr) } - c.Logger.Printf("[DEBUG] agent: Check '%s' script '%s' output: %s", - c.CheckID, cmdDisplay, outputStr) + c.Logger.Printf("[TRACE] agent: Check %q output: %s", c.CheckID, outputStr) return outputStr } // Start the check if err := cmd.Start(); err != nil { - c.Logger.Printf("[ERR] agent: failed to invoke '%s': %s", cmdDisplay, err) + c.Logger.Printf("[ERR] agent: Check %q failed to invoke: %s", c.CheckID, err) c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error()) return } @@ -154,11 +150,11 @@ func (c *CheckMonitor) check() { select { case <-time.After(timeout): if err := exec.KillCommandSubtree(cmd); err != nil { - c.Logger.Printf("[WARN] Failed to kill check '%s' after timeout: %v", cmdDisplay, err) + c.Logger.Printf("[WARN] Check %q failed to kill after timeout: %s", c.CheckID, err) } msg := fmt.Sprintf("Timed out (%s) running check", timeout.String()) - c.Logger.Printf("[WARN] %s '%s'", msg, cmdDisplay) + c.Logger.Printf("[WARN] Check %q: %s", c.CheckID, msg) outputStr := truncateAndLogOutput() if len(outputStr) > 0 { @@ -178,7 +174,7 @@ func (c *CheckMonitor) check() { // Check if the check passed outputStr := truncateAndLogOutput() if err == nil { - c.Logger.Printf("[DEBUG] agent: Check '%v' is passing", c.CheckID) + c.Logger.Printf("[DEBUG] agent: Check %q is passing", c.CheckID) c.Notify.UpdateCheck(c.CheckID, api.HealthPassing, outputStr) return } @@ -189,7 +185,7 @@ func (c *CheckMonitor) check() { if status, ok := exitErr.Sys().(syscall.WaitStatus); ok { code := status.ExitStatus() if code == 1 { - c.Logger.Printf("[WARN] agent: Check '%v' is now warning", c.CheckID) + c.Logger.Printf("[WARN] agent: Check %q is now warning", c.CheckID) c.Notify.UpdateCheck(c.CheckID, api.HealthWarning, outputStr) return } @@ -197,7 +193,7 @@ func (c *CheckMonitor) check() { } // Set the health as critical - c.Logger.Printf("[WARN] agent: Check '%v' is now critical", c.CheckID) + c.Logger.Printf("[WARN] agent: Check %q is now critical", c.CheckID) c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, outputStr) } @@ -247,7 +243,7 @@ func (c *CheckTTL) run() { for { select { case <-c.timer.C: - c.Logger.Printf("[WARN] agent: Check '%v' missed TTL, is now critical", + c.Logger.Printf("[WARN] agent: Check %q missed TTL, is now critical", c.CheckID) c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, c.getExpiredOutput()) @@ -273,8 +269,7 @@ func (c *CheckTTL) getExpiredOutput() string { // SetStatus is used to update the status of the check, // and to renew the TTL. If expired, TTL is restarted. func (c *CheckTTL) SetStatus(status, output string) { - c.Logger.Printf("[DEBUG] agent: Check '%v' status is now %v", - c.CheckID, status) + c.Logger.Printf("[DEBUG] agent: Check %q status is now %s", c.CheckID, status) c.Notify.UpdateCheck(c.CheckID, status, output) // Store the last output so we can retain it if the TTL expires. @@ -358,7 +353,6 @@ func (c *CheckHTTP) Stop() { func (c *CheckHTTP) run() { // Get the randomized initial pause time initialPauseTime := lib.RandomStagger(c.Interval) - c.Logger.Printf("[DEBUG] agent: pausing %v before first HTTP request of %s", initialPauseTime, c.HTTP) next := time.After(initialPauseTime) for { select { @@ -380,7 +374,7 @@ func (c *CheckHTTP) check() { req, err := http.NewRequest(method, c.HTTP, nil) if err != nil { - c.Logger.Printf("[WARN] agent: http request failed '%s': %s", c.HTTP, err) + c.Logger.Printf("[WARN] agent: Check %q HTTP request failed: %s", c.CheckID, err) c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error()) return } @@ -405,7 +399,7 @@ func (c *CheckHTTP) check() { resp, err := c.httpClient.Do(req) if err != nil { - c.Logger.Printf("[WARN] agent: http request failed '%s': %s", c.HTTP, err) + c.Logger.Printf("[WARN] agent: Check %q HTTP request failed: %s", c.CheckID, err) c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error()) return } @@ -414,7 +408,7 @@ func (c *CheckHTTP) check() { // Read the response into a circular buffer to limit the size output, _ := circbuf.NewBuffer(BufSize) if _, err := io.Copy(output, resp.Body); err != nil { - c.Logger.Printf("[WARN] agent: Check '%v': Get error while reading body: %s", c.CheckID, err) + c.Logger.Printf("[WARN] agent: Check %q error while reading body: %s", c.CheckID, err) } // Format the response body @@ -422,19 +416,19 @@ func (c *CheckHTTP) check() { if resp.StatusCode >= 200 && resp.StatusCode <= 299 { // PASSING (2xx) - c.Logger.Printf("[DEBUG] agent: Check '%v' is passing", c.CheckID) + c.Logger.Printf("[DEBUG] agent: Check %q is passing", c.CheckID) c.Notify.UpdateCheck(c.CheckID, api.HealthPassing, result) } else if resp.StatusCode == 429 { // WARNING // 429 Too Many Requests (RFC 6585) // The user has sent too many requests in a given amount of time. - c.Logger.Printf("[WARN] agent: Check '%v' is now warning", c.CheckID) + c.Logger.Printf("[WARN] agent: Check %q is now warning", c.CheckID) c.Notify.UpdateCheck(c.CheckID, api.HealthWarning, result) } else { // CRITICAL - c.Logger.Printf("[WARN] agent: Check '%v' is now critical", c.CheckID) + c.Logger.Printf("[WARN] agent: Check %q is now critical", c.CheckID) c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, result) } } @@ -496,7 +490,6 @@ func (c *CheckTCP) Stop() { func (c *CheckTCP) run() { // Get the randomized initial pause time initialPauseTime := lib.RandomStagger(c.Interval) - c.Logger.Printf("[DEBUG] agent: pausing %v before first socket connection of %s", initialPauseTime, c.TCP) next := time.After(initialPauseTime) for { select { @@ -513,12 +506,12 @@ func (c *CheckTCP) run() { func (c *CheckTCP) check() { conn, err := c.dialer.Dial(`tcp`, c.TCP) if err != nil { - c.Logger.Printf("[WARN] agent: socket connection failed '%s': %s", c.TCP, err) + c.Logger.Printf("[WARN] agent: Check %q socket connection failed: %s", c.CheckID, err) c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error()) return } conn.Close() - c.Logger.Printf("[DEBUG] agent: Check '%v' is passing", c.CheckID) + c.Logger.Printf("[DEBUG] agent: Check %q is passing", c.CheckID) c.Notify.UpdateCheck(c.CheckID, api.HealthPassing, fmt.Sprintf("TCP connect %s: Success", c.TCP)) } @@ -585,7 +578,7 @@ func (c *CheckDocker) check() { var out string status, b, err := c.doCheck() if err != nil { - c.Logger.Printf("[DEBUG] agent: Check '%s': %s", c.CheckID, err) + c.Logger.Printf("[DEBUG] agent: Check %q: %s", c.CheckID, err) out = err.Error() } else { // out is already limited to CheckBufSize since we're getting a @@ -595,11 +588,11 @@ func (c *CheckDocker) check() { if int(b.TotalWritten()) > len(out) { out = fmt.Sprintf("Captured %d of %d bytes\n...\n%s", len(out), b.TotalWritten(), out) } - c.Logger.Printf("[DEBUG] agent: Check '%s' script '%s' output: %s", c.CheckID, c.Script, out) + c.Logger.Printf("[TRACE] agent: Check %q output: %s", c.CheckID, out) } if status == api.HealthCritical { - c.Logger.Printf("[WARN] agent: Check '%v' is now critical", c.CheckID) + c.Logger.Printf("[WARN] agent: Check %q is now critical", c.CheckID) } c.Notify.UpdateCheck(c.CheckID, status, out) @@ -632,10 +625,10 @@ func (c *CheckDocker) doCheck() (string, *circbuf.Buffer, error) { case 0: return api.HealthPassing, buf, nil case 1: - c.Logger.Printf("[DEBUG] Check failed with exit code: %d", exitCode) + c.Logger.Printf("[DEBUG] Check %q failed with exit code: %d", c.CheckID, exitCode) return api.HealthWarning, buf, nil default: - c.Logger.Printf("[DEBUG] Check failed with exit code: %d", exitCode) + c.Logger.Printf("[DEBUG] Check %q failed with exit code: %d", c.CheckID, exitCode) return api.HealthCritical, buf, nil } }