From f3101eb2ec2a0634c20a88cfe38b25835f418a53 Mon Sep 17 00:00:00 2001 From: Nikhil Benesch Date: Wed, 23 Aug 2017 16:33:53 -0400 Subject: [PATCH] util/log: don't panic Previously, log.outputLogEntry could panic while holding the log mutex. This would deadlock any goroutine that logged while recovering from the panic, which is approximately all of the recover routines. Most annoyingly, the crash reporter would deadlock, swallowing the cause of the panic. Avoid panicking while holding the log mutex and use l.exit instead, which exists for this very purpose. In the process, enforce the invariant that l.mu is held when l.exit is called. (The previous behavior was, in fact, incorrect, as l.flushAll should not be called without holding l.mu.) Also add a Tcl test to ensure this doesn't break in the future. --- .../test_missing_log_output.tcl | 15 ++++++++ pkg/util/log/clog.go | 36 +++++++++++-------- 2 files changed, 37 insertions(+), 14 deletions(-) diff --git a/pkg/cli/interactive_tests/test_missing_log_output.tcl b/pkg/cli/interactive_tests/test_missing_log_output.tcl index 891deed5c291..f1c391c854f6 100644 --- a/pkg/cli/interactive_tests/test_missing_log_output.tcl +++ b/pkg/cli/interactive_tests/test_missing_log_output.tcl @@ -18,6 +18,21 @@ eexpect "255" eexpect ":/# " end_test +start_test "Check that a broken stderr prints a message to the log files." +send "$argv start -s=path=logs/db --insecure --logtostderr --verbosity=1 2>&1 | cat\r" +eexpect "CockroachDB node starting" +system "killall cat" +eexpect ":/# " +system "grep -F 'log: exiting because of error: write /dev/stderr: broken pipe' logs/db/logs/cockroach.log" +end_test + +start_test "Check that a broken log file prints a message to stderr." +send "$argv start -s=path=logs/db --log-dir=/dev/null --insecure --logtostderr\r" +eexpect "log: exiting because of error: log: cannot create log: open" +eexpect "not a directory" +eexpect ":/# " +end_test + start_test "Check that a server started with only in-memory stores and no --log-dir automatically logs to stderr." send "$argv start --insecure --store=type=mem,size=1GiB\r" eexpect "CockroachDB node starting" diff --git a/pkg/util/log/clog.go b/pkg/util/log/clog.go index 224ba1cf329f..f07ac20da3b5 100644 --- a/pkg/util/log/clog.go +++ b/pkg/util/log/clog.go @@ -754,8 +754,8 @@ func (l *loggingT) outputLogEntry(s Severity, file string, line int, msg string) if err := l.createFile(); err != nil { // Make sure the message appears somewhere. l.outputToStderr(entry, stacks) + l.exitLocked(err) l.mu.Unlock() - l.exit(err) return } } @@ -764,7 +764,9 @@ func (l *loggingT) outputLogEntry(s Severity, file string, line int, msg string) data := buf.Bytes() if _, err := l.file.Write(data); err != nil { - panic(err) + l.exitLocked(err) + l.mu.Unlock() + return } if l.syncWrites { _ = l.file.Flush() @@ -786,7 +788,7 @@ func (l *loggingT) outputLogEntry(s Severity, file string, line int, msg string) func (l *loggingT) outputToStderr(entry Entry, stacks []byte) { buf := l.processForStderr(entry, stacks) if _, err := OrigStderr.Write(buf.Bytes()); err != nil { - panic(err) + l.exitLocked(err) } l.putBuffer(buf) } @@ -843,21 +845,27 @@ func getStacks(all bool) []byte { // would make its use clumsier. var logExitFunc func(error) -// exit is called if there is trouble creating or writing log files. -// It flushes the logs and exits the program; there's no point in hanging around. -// l.mu is held. -func (l *loggingT) exit(err error) { - fmt.Fprintf(OrigStderr, "log: exiting because of error: %s\n", err) +// exitLocked is called if there is trouble creating or writing log files, or +// writing to stderr. It flushes the logs and exits the program; there's no +// point in hanging around. l.mu is held. +func (l *loggingT) exitLocked(err error) { + l.mu.AssertHeld() + // Either stderr or our log file is broken. Try writing the error to both + // streams in the hope that one still works or else the user will have no idea + // why we crashed. + for _, w := range []io.Writer{OrigStderr, l.file} { + if w == nil { + continue + } + fmt.Fprintf(w, "log: exiting because of error: %s\n", err) + } // If logExitFunc is set, we do that instead of exiting. if logExitFunc != nil { logExitFunc(err) return } l.flushAll() - l.mu.Lock() - exitFunc := l.exitFunc - l.mu.Unlock() - exitFunc(2) + l.exitFunc(2) } // syncBuffer joins a bufio.Writer to its underlying file, providing access to the @@ -879,13 +887,13 @@ func (sb *syncBuffer) Sync() error { func (sb *syncBuffer) Write(p []byte) (n int, err error) { if sb.nbytes+int64(len(p)) >= atomic.LoadInt64(&LogFileMaxSize) { if err := sb.rotateFile(timeutil.Now()); err != nil { - sb.logger.exit(err) + sb.logger.exitLocked(err) } } n, err = sb.Writer.Write(p) sb.nbytes += int64(n) if err != nil { - sb.logger.exit(err) + sb.logger.exitLocked(err) } return }