Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve textlogger #362

Merged
merged 14 commits into from
Jan 19, 2023
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
- name: Lint
run: |
docker run --rm -v `pwd`:/go/src/k8s.io/klog -w /go/src/k8s.io/klog \
golangci/golangci-lint:v1.23.8 golangci-lint run --disable-all -v \
golangci/golangci-lint:v1.50.1 golangci-lint run --disable-all -v \
-E govet -E misspell -E gofmt -E ineffassign -E golint
apidiff:
runs-on: ubuntu-latest
Expand Down
48 changes: 11 additions & 37 deletions examples/output_test/output_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,40 +34,13 @@ import (
"k8s.io/klog/v2/textlogger"
)

func init() {
test.InitKlog()
}

// TestKlogOutput tests klog output without a logger.
func TestKlogOutput(t *testing.T) {
test.Output(t, test.OutputConfig{})
}

// TestTextloggerOutput tests the textlogger, directly and as backend.
func TestTextloggerOutput(t *testing.T) {
newLogger := func(out io.Writer, v int, vmodule string) logr.Logger {
config := textlogger.NewConfig(
textlogger.Verbosity(v),
textlogger.Output(out),
)
if err := config.VModule().Set(vmodule); err != nil {
panic(err)
}
return textlogger.NewLogger(config)
}
t.Run("direct", func(t *testing.T) {
test.Output(t, test.OutputConfig{NewLogger: newLogger, SupportsVModule: true})
})
t.Run("klog-backend", func(t *testing.T) {
test.Output(t, test.OutputConfig{NewLogger: newLogger, AsBackend: true})
})
func newLogger(out io.Writer, v int, vmodule string) logr.Logger {
return newZaprLogger(out, v)
}

// TestZaprOutput tests the zapr, directly and as backend.
func TestZaprOutput(t *testing.T) {
newLogger := func(out io.Writer, v int, vmodule string) logr.Logger {
return newZaprLogger(out, v)
}
test.InitKlog(t)
t.Run("direct", func(t *testing.T) {
test.Output(t, test.OutputConfig{NewLogger: newLogger, ExpectedOutputMapping: test.ZaprOutputMappingDirect()})
})
Expand All @@ -76,17 +49,15 @@ func TestZaprOutput(t *testing.T) {
})
}

// TestKlogrOutput tests klogr output via klog.
func TestKlogrOutput(t *testing.T) {
test.Output(t, test.OutputConfig{
NewLogger: func(out io.Writer, v int, vmodule string) logr.Logger {
return klogr.NewWithOptions(klogr.WithFormat(klogr.FormatKlog))
},
})
// Benchmark direct zapr output.
func BenchmarkZaprOutput(b *testing.B) {
test.InitKlog(b)
test.Benchmark(b, test.OutputConfig{NewLogger: newLogger, ExpectedOutputMapping: test.ZaprOutputMappingDirect()})
}

// TestKlogrStackText tests klogr.klogr -> klog -> text logger.
func TestKlogrStackText(t *testing.T) {
test.InitKlog(t)
newLogger := func(out io.Writer, v int, vmodule string) logr.Logger {
// Backend: text output.
config := textlogger.NewConfig(
Expand All @@ -110,6 +81,7 @@ func TestKlogrStackText(t *testing.T) {
// (https://github.com/kubernetes/klog/issues/294) because klogr logging
// records that.
func TestKlogrStackZapr(t *testing.T) {
test.InitKlog(t)
mapping := test.ZaprOutputMappingIndirect()

// klogr doesn't warn about invalid KVs and just inserts
Expand Down Expand Up @@ -150,6 +122,7 @@ func TestKlogrStackZapr(t *testing.T) {

// TestKlogrInternalStackText tests klog.klogr (the simplified version used for contextual logging) -> klog -> text logger.
func TestKlogrInternalStackText(t *testing.T) {
test.InitKlog(t)
newLogger := func(out io.Writer, v int, vmodule string) logr.Logger {
// Backend: text output.
config := textlogger.NewConfig(
Expand All @@ -173,6 +146,7 @@ func TestKlogrInternalStackText(t *testing.T) {
// (https://github.com/kubernetes/klog/issues/294) because klogr logging
// records that.
func TestKlogrInternalStackZapr(t *testing.T) {
test.InitKlog(t)
mapping := test.ZaprOutputMappingIndirect()

// klogr doesn't warn about invalid KVs and just inserts
Expand Down
40 changes: 9 additions & 31 deletions internal/buffer/buffer.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,44 +40,22 @@ type Buffer struct {
next *Buffer
}

// Buffers manages the reuse of individual buffer instances. It is thread-safe.
type Buffers struct {
// mu protects the free list. It is separate from the main mutex
// so buffers can be grabbed and printed to without holding the main lock,
// for better parallelization.
mu sync.Mutex

// freeList is a list of byte buffers, maintained under mu.
freeList *Buffer
var buffers = sync.Pool{
Copy link

@serathius serathius Jan 17, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When comparing performance using benchstat please ensure that p factor is lower than <0.05, so the results are statistically significant. Fact that benchstat returns ~ for time delta doesn't mean that results are equal. It means that there were not enough tries to make any conclusions.

I recommend increase number of tries to when benchmarking to at last 10.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That didn't help. For example:

$ go test -bench=KlogOutput/verbosity_disabled -benchmem -count=10 . >/tmp/run1.log
$ go test -bench=KlogOutput/verbosity_disabled -benchmem -count=10 . >/tmp/run2.log
$ $GOPATH/bin/benchstat /tmp/run1.log /tmp/run2.log 
name                              old time/op    new time/op    delta
KlogOutput/verbosity_disabled-36    14.8ns ± 0%    14.8ns ± 0%   ~     (p=1.000 n=10+10)

name                              old alloc/op   new alloc/op   delta
KlogOutput/verbosity_disabled-36     0.00B          0.00B        ~     (all equal)

name                              old allocs/op  new allocs/op  delta
KlogOutput/verbosity_disabled-36      0.00           0.00        ~     (all equal)

There's no code change at all between the two invocations and the 14.8ns duration is the same with very little variance (0%), but p remains at 1.000.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

High p value means that there is no statistically significant difference between both solutions. It makes sense as both options take exactly 14.8ns. https://pkg.go.dev/golang.org/x/perf/cmd/benchstat#hdr-Tips

This is exactly what we would result to be if you would want to prove that solutions have same performance.

Interesting thing is that variance is so low. I would always expect at 1% variation. Not sure why, maybe problem with benchmark?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now I am confused. Earlier you said:

Fact that benchstat returns ~ for time delta doesn't mean that results are equal. It means that there were not enough tries to make any conclusions.

But in this example it means exactly that: results are equal.

Copy link

@serathius serathius Jan 18, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for confusion, ~ just means it could not determine whether there is a statistically significant improvement difference between two runs. This happens when p is above 0.05 . This can mean either:

  • There is a change, but we didn't get enough runs to be sure. For example we got p equal 0.5, which means we are 50% sure that there is an improvement.
  • Results are equal.

In first case increasing number of runs should result in p decreasing. In that case we should increase the number of runs until we get p below 0.05.

As in example below. We can see that new time is on average 0.06µs faster. However we also see that this difference is around the about the result variance ± 6%. So if we take best results from old time, they are still better then new time. In such situation we need to use statistical analysis to confirm the result. Thankfully benchstat already gives us the p value equal 0.246 which means that there is 75.4% (1-0.246) chance that there is an improvement. This however is usually not enough and we want to have at least 95% certainty (p value below 0.05). In this case we should increasing number of runs should help

name              old time/op    new time/op    delta
Header-36         1.68µs ± 7%    1.62µs ± 6%    ~     (p=0.246 n=5+5)

In second case p will never decrease no matter how many runs we do. Example:

name                             old time/op    new time/op    delta
KlogOutput/verbosity_disabled-36 14.8ns ± 0%    14.8ns ± 0%    ~ (p=1.000 n=10+10)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In second case results are suspicious. There are many factors that should cause execution time to vary, like cpu temperature, boost, background processes etc. However the point stand that increasing number of runs doesn't help. I would recommend double checking if benchmark is correctly implemented.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are many factors that should cause execution time to vary, like cpu temperature, boost, background processes etc.

I am running this on a high-quality desktop with nothing besides the benchmark running. I think it is valid that the results are stable.

New: func() interface{} {
return new(Buffer)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose the sync.Pool is a bit slower in terms of ns/op if you run the benchmark. Would that impact the klog behavior in any way ?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The microbenchmarks which use this buffer (header formatting) didn't show any measurable difference (see commit message).

For a full before/after comparison the bugfix in textlogger would have to be pulled into a separate commit. I've not done that.

},
}

// GetBuffer returns a new, ready-to-use buffer.
func (bl *Buffers) GetBuffer() *Buffer {
bl.mu.Lock()
b := bl.freeList
if b != nil {
bl.freeList = b.next
}
bl.mu.Unlock()
if b == nil {
b = new(Buffer)
} else {
b.next = nil
b.Reset()
}
func GetBuffer() *Buffer {
b := buffers.Get().(*Buffer)
b.Reset()
return b
}

// PutBuffer returns a buffer to the free list.
func (bl *Buffers) PutBuffer(b *Buffer) {
if b.Len() >= 256 {
// Let big buffers die a natural death.
return
}
bl.mu.Lock()
b.next = bl.freeList
bl.freeList = b
bl.mu.Unlock()
func PutBuffer(b *Buffer) {
buffers.Put(b)
}

// Some custom tiny helper functions to print the log header efficiently.
Expand Down
179 changes: 123 additions & 56 deletions internal/serialize/keyvalues.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ import (
"github.com/go-logr/logr"
)

type textWriter interface {
WriteText(*bytes.Buffer)
}

// WithValues implements LogSink.WithValues. The old key/value pairs are
// assumed to be well-formed, the new ones are checked and padded if
// necessary. It returns a new slice.
Expand Down Expand Up @@ -91,6 +95,51 @@ func MergeKVs(first, second []interface{}) []interface{} {
return merged
}

// MergeKVsInto is a variant of MergeKVs which directly formats the key/value
// pairs into a buffer.
func MergeAndFormatKVs(b *bytes.Buffer, first, second []interface{}) {
if len(first) == 0 && len(second) == 0 {
// Nothing to do at all.
return
}

if len(first) == 0 && len(second)%2 == 0 {
// Nothing to be overridden, second slice is well-formed
// and can be used directly.
for i := 0; i < len(second); i += 2 {
KVFormat(b, second[i], second[i+1])
}
return
}

// Determine which keys are in the second slice so that we can skip
// them when iterating over the first one. The code intentionally
// favors performance over completeness: we assume that keys are string
// constants and thus compare equal when the string values are equal. A
// string constant being overridden by, for example, a fmt.Stringer is
// not handled.
overrides := map[interface{}]bool{}
for i := 0; i < len(second); i += 2 {
overrides[second[i]] = true
}
for i := 0; i < len(first); i += 2 {
key := first[i]
if overrides[key] {
continue
}
KVFormat(b, key, first[i+1])
}
// Round down.
l := len(second)
l = l / 2 * 2
for i := 1; i < l; i += 2 {
KVFormat(b, second[i-1], second[i])
}
if len(second)%2 == 1 {
KVFormat(b, second[len(second)-1], missingValue)
}
}

const missingValue = "(MISSING)"

// KVListFormat serializes all key/value pairs into the provided buffer.
Expand All @@ -104,66 +153,74 @@ func KVListFormat(b *bytes.Buffer, keysAndValues ...interface{}) {
} else {
v = missingValue
}
b.WriteByte(' ')
// Keys are assumed to be well-formed according to
// https://github.com/kubernetes/community/blob/master/contributors/devel/sig-instrumentation/migration-to-structured-logging.md#name-arguments
// for the sake of performance. Keys with spaces,
// special characters, etc. will break parsing.
if sK, ok := k.(string); ok {
// Avoid one allocation when the key is a string, which
// normally it should be.
b.WriteString(sK)
} else {
b.WriteString(fmt.Sprintf("%s", k))
}
KVFormat(b, k, v)
}
}

// KVFormat serializes one key/value pair into the provided buffer.
// A space gets inserted before the pair.
func KVFormat(b *bytes.Buffer, k, v interface{}) {
b.WriteByte(' ')
// Keys are assumed to be well-formed according to
// https://github.com/kubernetes/community/blob/master/contributors/devel/sig-instrumentation/migration-to-structured-logging.md#name-arguments
// for the sake of performance. Keys with spaces,
// special characters, etc. will break parsing.
if sK, ok := k.(string); ok {
// Avoid one allocation when the key is a string, which
// normally it should be.
b.WriteString(sK)
} else {
b.WriteString(fmt.Sprintf("%s", k))
}

// The type checks are sorted so that more frequently used ones
// come first because that is then faster in the common
// cases. In Kubernetes, ObjectRef (a Stringer) is more common
// than plain strings
// (https://github.com/kubernetes/kubernetes/pull/106594#issuecomment-975526235).
switch v := v.(type) {
case fmt.Stringer:
writeStringValue(b, true, StringerToString(v))
// The type checks are sorted so that more frequently used ones
// come first because that is then faster in the common
// cases. In Kubernetes, ObjectRef (a Stringer) is more common
// than plain strings
// (https://github.com/kubernetes/kubernetes/pull/106594#issuecomment-975526235).
switch v := v.(type) {
case textWriter:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment about benchmarking as above.

writeTextWriterValue(b, v)
case fmt.Stringer:
writeStringValue(b, true, StringerToString(v))
case string:
writeStringValue(b, true, v)
case error:
writeStringValue(b, true, ErrorToString(v))
case logr.Marshaler:
value := MarshalerToValue(v)
// A marshaler that returns a string is useful for
// delayed formatting of complex values. We treat this
// case like a normal string. This is useful for
// multi-line support.
//
// We could do this by recursively formatting a value,
// but that comes with the risk of infinite recursion
// if a marshaler returns itself. Instead we call it
// only once and rely on it returning the intended
// value directly.
switch value := value.(type) {
case string:
writeStringValue(b, true, v)
case error:
writeStringValue(b, true, ErrorToString(v))
case logr.Marshaler:
value := MarshalerToValue(v)
// A marshaler that returns a string is useful for
// delayed formatting of complex values. We treat this
// case like a normal string. This is useful for
// multi-line support.
//
// We could do this by recursively formatting a value,
// but that comes with the risk of infinite recursion
// if a marshaler returns itself. Instead we call it
// only once and rely on it returning the intended
// value directly.
switch value := value.(type) {
case string:
writeStringValue(b, true, value)
default:
writeStringValue(b, false, fmt.Sprintf("%+v", value))
}
case []byte:
// In https://github.com/kubernetes/klog/pull/237 it was decided
// to format byte slices with "%+q". The advantages of that are:
// - readable output if the bytes happen to be printable
// - non-printable bytes get represented as unicode escape
// sequences (\uxxxx)
//
// The downsides are that we cannot use the faster
// strconv.Quote here and that multi-line output is not
// supported. If developers know that a byte array is
// printable and they want multi-line output, they can
// convert the value to string before logging it.
b.WriteByte('=')
b.WriteString(fmt.Sprintf("%+q", v))
writeStringValue(b, true, value)
default:
writeStringValue(b, false, fmt.Sprintf("%+v", v))
writeStringValue(b, false, fmt.Sprintf("%+v", value))
}
case []byte:
// In https://github.com/kubernetes/klog/pull/237 it was decided
// to format byte slices with "%+q". The advantages of that are:
// - readable output if the bytes happen to be printable
// - non-printable bytes get represented as unicode escape
// sequences (\uxxxx)
//
// The downsides are that we cannot use the faster
// strconv.Quote here and that multi-line output is not
// supported. If developers know that a byte array is
// printable and they want multi-line output, they can
// convert the value to string before logging it.
b.WriteByte('=')
b.WriteString(fmt.Sprintf("%+q", v))
default:
writeStringValue(b, false, fmt.Sprintf("%+v", v))
}
}

Expand Down Expand Up @@ -203,6 +260,16 @@ func ErrorToString(err error) (ret string) {
return
}

func writeTextWriterValue(b *bytes.Buffer, v textWriter) {
b.WriteRune('=')
defer func() {
if err := recover(); err != nil {
fmt.Fprintf(b, `"<panic: %s>"`, err)
}
}()
v.WriteText(b)
}

func writeStringValue(b *bytes.Buffer, quote bool, v string) {
data := []byte(v)
index := bytes.IndexByte(data, '\n')
Expand Down
Loading