-
Notifications
You must be signed in to change notification settings - Fork 218
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
improve textlogger #362
improve textlogger #362
Changes from all commits
f6e8786
e23e29e
50cdc4a
676a553
c57c59c
723e7f4
8c60010
4aa7aa6
718ea45
1c00d46
ab80bfc
3d96ccc
64ee6cf
a9dc48e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,44 +40,22 @@ type Buffer struct { | |
next *Buffer | ||
} | ||
|
||
// Buffers manages the reuse of individual buffer instances. It is thread-safe. | ||
type Buffers struct { | ||
// mu protects the free list. It is separate from the main mutex | ||
// so buffers can be grabbed and printed to without holding the main lock, | ||
// for better parallelization. | ||
mu sync.Mutex | ||
|
||
// freeList is a list of byte buffers, maintained under mu. | ||
freeList *Buffer | ||
var buffers = sync.Pool{ | ||
New: func() interface{} { | ||
return new(Buffer) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suppose the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The microbenchmarks which use this buffer (header formatting) didn't show any measurable difference (see commit message). For a full before/after comparison the bugfix in textlogger would have to be pulled into a separate commit. I've not done that. |
||
}, | ||
} | ||
|
||
// GetBuffer returns a new, ready-to-use buffer. | ||
func (bl *Buffers) GetBuffer() *Buffer { | ||
bl.mu.Lock() | ||
b := bl.freeList | ||
if b != nil { | ||
bl.freeList = b.next | ||
} | ||
bl.mu.Unlock() | ||
if b == nil { | ||
b = new(Buffer) | ||
} else { | ||
b.next = nil | ||
b.Reset() | ||
} | ||
func GetBuffer() *Buffer { | ||
b := buffers.Get().(*Buffer) | ||
b.Reset() | ||
return b | ||
} | ||
|
||
// PutBuffer returns a buffer to the free list. | ||
func (bl *Buffers) PutBuffer(b *Buffer) { | ||
if b.Len() >= 256 { | ||
// Let big buffers die a natural death. | ||
return | ||
} | ||
bl.mu.Lock() | ||
b.next = bl.freeList | ||
bl.freeList = b | ||
bl.mu.Unlock() | ||
func PutBuffer(b *Buffer) { | ||
buffers.Put(b) | ||
} | ||
|
||
// Some custom tiny helper functions to print the log header efficiently. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,6 +24,10 @@ import ( | |
"github.com/go-logr/logr" | ||
) | ||
|
||
type textWriter interface { | ||
WriteText(*bytes.Buffer) | ||
} | ||
|
||
// WithValues implements LogSink.WithValues. The old key/value pairs are | ||
// assumed to be well-formed, the new ones are checked and padded if | ||
// necessary. It returns a new slice. | ||
|
@@ -91,6 +95,51 @@ func MergeKVs(first, second []interface{}) []interface{} { | |
return merged | ||
} | ||
|
||
// MergeKVsInto is a variant of MergeKVs which directly formats the key/value | ||
// pairs into a buffer. | ||
func MergeAndFormatKVs(b *bytes.Buffer, first, second []interface{}) { | ||
if len(first) == 0 && len(second) == 0 { | ||
// Nothing to do at all. | ||
return | ||
} | ||
|
||
if len(first) == 0 && len(second)%2 == 0 { | ||
// Nothing to be overridden, second slice is well-formed | ||
// and can be used directly. | ||
for i := 0; i < len(second); i += 2 { | ||
KVFormat(b, second[i], second[i+1]) | ||
} | ||
return | ||
} | ||
|
||
// Determine which keys are in the second slice so that we can skip | ||
// them when iterating over the first one. The code intentionally | ||
// favors performance over completeness: we assume that keys are string | ||
// constants and thus compare equal when the string values are equal. A | ||
// string constant being overridden by, for example, a fmt.Stringer is | ||
// not handled. | ||
overrides := map[interface{}]bool{} | ||
for i := 0; i < len(second); i += 2 { | ||
overrides[second[i]] = true | ||
} | ||
for i := 0; i < len(first); i += 2 { | ||
key := first[i] | ||
if overrides[key] { | ||
continue | ||
} | ||
KVFormat(b, key, first[i+1]) | ||
} | ||
// Round down. | ||
l := len(second) | ||
l = l / 2 * 2 | ||
for i := 1; i < l; i += 2 { | ||
KVFormat(b, second[i-1], second[i]) | ||
} | ||
if len(second)%2 == 1 { | ||
KVFormat(b, second[len(second)-1], missingValue) | ||
} | ||
} | ||
|
||
const missingValue = "(MISSING)" | ||
|
||
// KVListFormat serializes all key/value pairs into the provided buffer. | ||
|
@@ -104,66 +153,74 @@ func KVListFormat(b *bytes.Buffer, keysAndValues ...interface{}) { | |
} else { | ||
v = missingValue | ||
} | ||
b.WriteByte(' ') | ||
// Keys are assumed to be well-formed according to | ||
// https://github.com/kubernetes/community/blob/master/contributors/devel/sig-instrumentation/migration-to-structured-logging.md#name-arguments | ||
// for the sake of performance. Keys with spaces, | ||
// special characters, etc. will break parsing. | ||
if sK, ok := k.(string); ok { | ||
// Avoid one allocation when the key is a string, which | ||
// normally it should be. | ||
b.WriteString(sK) | ||
} else { | ||
b.WriteString(fmt.Sprintf("%s", k)) | ||
} | ||
KVFormat(b, k, v) | ||
} | ||
} | ||
|
||
// KVFormat serializes one key/value pair into the provided buffer. | ||
// A space gets inserted before the pair. | ||
func KVFormat(b *bytes.Buffer, k, v interface{}) { | ||
b.WriteByte(' ') | ||
// Keys are assumed to be well-formed according to | ||
// https://github.com/kubernetes/community/blob/master/contributors/devel/sig-instrumentation/migration-to-structured-logging.md#name-arguments | ||
// for the sake of performance. Keys with spaces, | ||
// special characters, etc. will break parsing. | ||
if sK, ok := k.(string); ok { | ||
// Avoid one allocation when the key is a string, which | ||
// normally it should be. | ||
b.WriteString(sK) | ||
} else { | ||
b.WriteString(fmt.Sprintf("%s", k)) | ||
} | ||
|
||
// The type checks are sorted so that more frequently used ones | ||
// come first because that is then faster in the common | ||
// cases. In Kubernetes, ObjectRef (a Stringer) is more common | ||
// than plain strings | ||
// (https://github.com/kubernetes/kubernetes/pull/106594#issuecomment-975526235). | ||
switch v := v.(type) { | ||
case fmt.Stringer: | ||
writeStringValue(b, true, StringerToString(v)) | ||
// The type checks are sorted so that more frequently used ones | ||
// come first because that is then faster in the common | ||
// cases. In Kubernetes, ObjectRef (a Stringer) is more common | ||
// than plain strings | ||
// (https://github.com/kubernetes/kubernetes/pull/106594#issuecomment-975526235). | ||
switch v := v.(type) { | ||
case textWriter: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment about benchmarking as above. |
||
writeTextWriterValue(b, v) | ||
case fmt.Stringer: | ||
writeStringValue(b, true, StringerToString(v)) | ||
case string: | ||
writeStringValue(b, true, v) | ||
case error: | ||
writeStringValue(b, true, ErrorToString(v)) | ||
case logr.Marshaler: | ||
value := MarshalerToValue(v) | ||
// A marshaler that returns a string is useful for | ||
// delayed formatting of complex values. We treat this | ||
// case like a normal string. This is useful for | ||
// multi-line support. | ||
// | ||
// We could do this by recursively formatting a value, | ||
// but that comes with the risk of infinite recursion | ||
// if a marshaler returns itself. Instead we call it | ||
// only once and rely on it returning the intended | ||
// value directly. | ||
switch value := value.(type) { | ||
case string: | ||
writeStringValue(b, true, v) | ||
case error: | ||
writeStringValue(b, true, ErrorToString(v)) | ||
case logr.Marshaler: | ||
value := MarshalerToValue(v) | ||
// A marshaler that returns a string is useful for | ||
// delayed formatting of complex values. We treat this | ||
// case like a normal string. This is useful for | ||
// multi-line support. | ||
// | ||
// We could do this by recursively formatting a value, | ||
// but that comes with the risk of infinite recursion | ||
// if a marshaler returns itself. Instead we call it | ||
// only once and rely on it returning the intended | ||
// value directly. | ||
switch value := value.(type) { | ||
case string: | ||
writeStringValue(b, true, value) | ||
default: | ||
writeStringValue(b, false, fmt.Sprintf("%+v", value)) | ||
} | ||
case []byte: | ||
// In https://github.com/kubernetes/klog/pull/237 it was decided | ||
// to format byte slices with "%+q". The advantages of that are: | ||
// - readable output if the bytes happen to be printable | ||
// - non-printable bytes get represented as unicode escape | ||
// sequences (\uxxxx) | ||
// | ||
// The downsides are that we cannot use the faster | ||
// strconv.Quote here and that multi-line output is not | ||
// supported. If developers know that a byte array is | ||
// printable and they want multi-line output, they can | ||
// convert the value to string before logging it. | ||
b.WriteByte('=') | ||
b.WriteString(fmt.Sprintf("%+q", v)) | ||
writeStringValue(b, true, value) | ||
default: | ||
writeStringValue(b, false, fmt.Sprintf("%+v", v)) | ||
writeStringValue(b, false, fmt.Sprintf("%+v", value)) | ||
} | ||
case []byte: | ||
// In https://github.com/kubernetes/klog/pull/237 it was decided | ||
// to format byte slices with "%+q". The advantages of that are: | ||
// - readable output if the bytes happen to be printable | ||
// - non-printable bytes get represented as unicode escape | ||
// sequences (\uxxxx) | ||
// | ||
// The downsides are that we cannot use the faster | ||
// strconv.Quote here and that multi-line output is not | ||
// supported. If developers know that a byte array is | ||
// printable and they want multi-line output, they can | ||
// convert the value to string before logging it. | ||
b.WriteByte('=') | ||
b.WriteString(fmt.Sprintf("%+q", v)) | ||
default: | ||
writeStringValue(b, false, fmt.Sprintf("%+v", v)) | ||
} | ||
} | ||
|
||
|
@@ -203,6 +260,16 @@ func ErrorToString(err error) (ret string) { | |
return | ||
} | ||
|
||
func writeTextWriterValue(b *bytes.Buffer, v textWriter) { | ||
b.WriteRune('=') | ||
defer func() { | ||
if err := recover(); err != nil { | ||
fmt.Fprintf(b, `"<panic: %s>"`, err) | ||
} | ||
}() | ||
v.WriteText(b) | ||
} | ||
|
||
func writeStringValue(b *bytes.Buffer, quote bool, v string) { | ||
data := []byte(v) | ||
index := bytes.IndexByte(data, '\n') | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When comparing performance using benchstat please ensure that
p
factor is lower than <0.05, so the results are statistically significant. Fact that benchstat returns~
for time delta doesn't mean that results are equal. It means that there were not enough tries to make any conclusions.I recommend increase number of tries to when benchmarking to at last 10.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That didn't help. For example:
There's no code change at all between the two invocations and the
14.8ns
duration is the same with very little variance (0%), but p remains at 1.000.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
High p value means that there is no statistically significant difference between both solutions. It makes sense as both options take exactly
14.8ns
. https://pkg.go.dev/golang.org/x/perf/cmd/benchstat#hdr-TipsThis is exactly what we would result to be if you would want to prove that solutions have same performance.
Interesting thing is that variance is so low. I would always expect at 1% variation. Not sure why, maybe problem with benchmark?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Now I am confused. Earlier you said:
But in this example it means exactly that: results are equal.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry for confusion,
~
just means it could not determine whether there is a statistically significant improvement difference between two runs. This happens whenp
is above 0.05 . This can mean either:p
equal 0.5, which means we are 50% sure that there is an improvement.In first case increasing number of runs should result in
p
decreasing. In that case we should increase the number of runs until we getp
below 0.05.As in example below. We can see that new time is on average 0.06µs faster. However we also see that this difference is around the about the result variance
± 6%
. So if we take best results from old time, they are still better then new time. In such situation we need to use statistical analysis to confirm the result. Thankfully benchstat already gives us thep
value equal 0.246 which means that there is 75.4% (1-0.246) chance that there is an improvement. This however is usually not enough and we want to have at least 95% certainty (p value below 0.05). In this case we should increasing number of runs should helpIn second case p will never decrease no matter how many runs we do. Example:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In second case results are suspicious. There are many factors that should cause execution time to vary, like cpu temperature, boost, background processes etc. However the point stand that increasing number of runs doesn't help. I would recommend double checking if benchmark is correctly implemented.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am running this on a high-quality desktop with nothing besides the benchmark running. I think it is valid that the results are stable.