From 4715df0a0aebab67872022f751827de234d1ec71 Mon Sep 17 00:00:00 2001 From: codebien <2103732+codebien@users.noreply.github.com> Date: Thu, 24 Aug 2023 11:52:57 +0200 Subject: [PATCH 1/3] cloudv2: Higher resolution for Histogram --- output/cloud/expv2/hdr.go | 43 ++-- output/cloud/expv2/hdr_test.go | 201 ++++++++++++++---- .../expv2/integration/testdata/metricset.json | 7 +- 3 files changed, 186 insertions(+), 65 deletions(-) diff --git a/output/cloud/expv2/hdr.go b/output/cloud/expv2/hdr.go index d7d351df579..9493db1bd26 100644 --- a/output/cloud/expv2/hdr.go +++ b/output/cloud/expv2/hdr.go @@ -9,6 +9,11 @@ import ( ) const ( + // defaultMinimumResolution is the default resolution used by histogram. + // It allows to have a higher granularity compared to the basic 1.0 value, + // supporting floating points up to 3 digits. + defaultMinimumResolution = .001 + // lowestTrackable represents the minimum value that the histogram tracks. // Essentially, it excludes negative numbers. // Most of metrics tracked by histograms are durations @@ -17,12 +22,6 @@ const ( // In the future, we may expand and include them, // probably after https://github.com/grafana/k6/issues/763. lowestTrackable = 0 - - // highestTrackable represents the maximum - // value that the histogram is able to track with high accuracy (0.1% of error). - // It should be a high enough - // and rationale value for the k6 context; 2^30 = 1_073_741_824 - highestTrackable = 1 << 30 ) // histogram represents a distribution @@ -61,13 +60,18 @@ type histogram struct { // Count is counts the amount of observed values. Count uint32 + + // MinimumResolution represents resolution used by Histogram. + // In principle, it is a multiplier factor for the tracked values. + MinimumResolution float64 } func newHistogram() *histogram { return &histogram{ - Buckets: make(map[uint32]uint32), - Max: -math.MaxFloat64, - Min: math.MaxFloat64, + MinimumResolution: defaultMinimumResolution, + Buckets: make(map[uint32]uint32), + Max: -math.MaxFloat64, + Min: math.MaxFloat64, } } @@ -85,7 +89,9 @@ func (h *histogram) addToBucket(v float64) { h.Count++ h.Sum += v - if v > highestTrackable { + v /= h.MinimumResolution + + if v > math.MaxInt64 { h.ExtraHighBucket++ return } @@ -151,6 +157,9 @@ func histogramAsProto(h *histogram, time int64) *pbcloud.TrendHdrValue { if h.ExtraHighBucket > 0 { hval.ExtraHighValuesCounter = &h.ExtraHighBucket } + // We don't expect to change the minimum resolution at runtime + // so it is safe use directly a pointer without creating a copy + hval.MinResolution = &h.MinimumResolution return hval } @@ -164,7 +173,7 @@ func resolveBucketIndex(val float64) uint32 { // We upscale to the next integer to ensure that each sample falls // within a specific bucket, even when the value is fractional. // This avoids under-representing the distribution in the histogram. - upscaled := uint32(math.Ceil(val)) + upscaled := uint64(math.Ceil(val)) // In histograms, bucket boundaries are usually defined as multiples of powers of 2, // allowing for efficient computation of bucket indexes. @@ -181,11 +190,11 @@ func resolveBucketIndex(val float64) uint32 { // 2^10 = 1024 ~ 1000 = 10^3 // f(x) = 3*x + 1 - empiric formula that works for us // since f(2)=7 and f(3)=10 - const k = uint32(7) + const k = uint64(7) // 256 = 1 << (k+1) if upscaled < 256 { - return upscaled + return uint32(upscaled) } // `nkdiff` helps us find the right bucket for `upscaled`. It does so by determining the @@ -205,8 +214,12 @@ func resolveBucketIndex(val float64) uint32 { // = (n-k+1)<>(n-k) - (1<>(n-k) // - nkdiff := uint32(bits.Len32(upscaled>>k) - 1) // msb index - return (nkdiff << k) + (upscaled >> nkdiff) + nkdiff := uint64(bits.Len64(upscaled>>k)) - 1 // msb index + + // We cast safely downscaling because we don't expect we may hit the uint32 limit + // with the bucket index. The bucket represented from the index as MaxUint32 + // would be a very huge number bigger than the trackable limits. + return uint32((nkdiff << k) + (upscaled >> nkdiff)) } // Add implements the metricValue interface. diff --git a/output/cloud/expv2/hdr_test.go b/output/cloud/expv2/hdr_test.go index 182a1de031b..129a7a24783 100644 --- a/output/cloud/expv2/hdr_test.go +++ b/output/cloud/expv2/hdr_test.go @@ -7,6 +7,7 @@ import ( "time" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "go.k6.io/k6/output/cloud/expv2/pbcloud" "google.golang.org/protobuf/types/known/timestamppb" ) @@ -33,12 +34,24 @@ func TestResolveBucketIndex(t *testing.T) { {in: 282.29, exp: 269}, {in: 1029, exp: 512}, {in: 39751, exp: 1179}, + {in: 100000, exp: 1347}, + {in: 182272, exp: 1458}, + {in: 183000, exp: 1458}, + {in: 184000, exp: 1459}, + {in: 200000, exp: 1475}, + + {in: 1 << 20, exp: 1792}, {in: (1 << 30) - 1, exp: 3071}, - {in: (1 << 30), exp: 3072}, - {in: math.MaxInt32, exp: 3199}, + {in: 1 << 30, exp: 3072}, + {in: 1 << 40, exp: 4352}, + {in: 1 << 62, exp: 7168}, + + {in: math.MaxInt32, exp: 3199}, // 2B + {in: math.MaxUint32, exp: 3327}, // 4B + {in: math.MaxInt64, exp: 7296}, // Huge number // 9.22...e+18 } for _, tc := range tests { - assert.Equal(t, tc.exp, resolveBucketIndex(tc.in), tc.in) + assert.Equal(t, int(tc.exp), int(resolveBucketIndex(tc.in)), tc.in) } } @@ -116,9 +129,11 @@ func TestHistogramAddWithSimpleValues(t *testing.T) { t.Run(strconv.Itoa(i), func(t *testing.T) { t.Parallel() h := newHistogram() + h.MinimumResolution = 1.0 for _, v := range tc.vals { h.Add(v) } + tc.exp.MinimumResolution = 1.0 assert.Equal(t, &tc.exp, h) }) } @@ -128,18 +143,20 @@ func TestHistogramAddWithUntrackables(t *testing.T) { t.Parallel() h := newHistogram() - for _, v := range []float64{5, -3.14, 2 * 1e9, 1} { + h.MinimumResolution = 1.0 + for _, v := range []float64{5, -3.14, 1<<62 + 1, 1} { h.Add(v) } exp := &histogram{ - Buckets: map[uint32]uint32{1: 1, 5: 1}, - ExtraLowBucket: 1, - ExtraHighBucket: 1, - Max: 2 * 1e9, - Min: -3.14, - Sum: 2*1e9 + 5 + 1 - 3.14, - Count: 4, + Buckets: map[uint32]uint32{1: 1, 5: 1}, + ExtraLowBucket: 1, + ExtraHighBucket: 1, + Max: 1 << 62, + Min: -3.14, + Sum: 1<<62 + 1 + 5 + 1 - 3.14, + Count: 4, + MinimumResolution: 1.0, } assert.Equal(t, exp, h) } @@ -148,6 +165,7 @@ func TestHistogramAddWithMultipleOccurances(t *testing.T) { t.Parallel() h := newHistogram() + h.MinimumResolution = 1.0 for _, v := range []float64{51.8, 103.6, 103.6, 103.6, 103.6} { h.Add(v) } @@ -161,6 +179,7 @@ func TestHistogramAddWithMultipleOccurances(t *testing.T) { Sum: 466.20000000000005, Count: 5, } + exp.MinimumResolution = 1.0 assert.Equal(t, exp, h) } @@ -168,16 +187,18 @@ func TestHistogramAddWithNegativeNum(t *testing.T) { t.Parallel() h := newHistogram() + h.MinimumResolution = 1.0 h.Add(-2.42314) exp := &histogram{ - Max: -2.42314, - Min: -2.42314, - Buckets: map[uint32]uint32{}, - ExtraLowBucket: 1, - ExtraHighBucket: 0, - Sum: -2.42314, - Count: 1, + Max: -2.42314, + Min: -2.42314, + Buckets: map[uint32]uint32{}, + ExtraLowBucket: 1, + ExtraHighBucket: 0, + Sum: -2.42314, + Count: 1, + MinimumResolution: 1.0, } assert.Equal(t, exp, h) } @@ -185,33 +206,57 @@ func TestHistogramAddWithNegativeNum(t *testing.T) { func TestHistogramAddWithMultipleNegativeNums(t *testing.T) { t.Parallel() h := newHistogram() + h.MinimumResolution = 1.0 for _, v := range []float64{-0.001, -0.001, -0.001} { h.Add(v) } exp := &histogram{ - Buckets: map[uint32]uint32{}, - ExtraLowBucket: 3, - ExtraHighBucket: 0, - Max: -0.001, - Min: -0.001, - Sum: -0.003, - Count: 3, + Buckets: map[uint32]uint32{}, + ExtraLowBucket: 3, + ExtraHighBucket: 0, + Max: -0.001, + Min: -0.001, + Sum: -0.003, + Count: 3, + MinimumResolution: 1.0, + } + h.MinimumResolution = 1.0 + assert.Equal(t, exp, h) +} + +func TestHistogramAddWithZeroToOneValues(t *testing.T) { + t.Parallel() + h := newHistogram() + for _, v := range []float64{0.000052, 0.002115, 0.012013, 0.05017, 0.250, 0.54, 0.541, 0.803} { + h.Add(v) + } + + exp := &histogram{ + Buckets: map[uint32]uint32{1: 1, 3: 1, 13: 1, 51: 1, 250: 1, 391: 2, 456: 1}, + ExtraLowBucket: 0, + ExtraHighBucket: 0, + Max: .803, + Min: .000052, + Sum: 2.19835, + Count: 8, + MinimumResolution: .001, } assert.Equal(t, exp, h) } -func TestNewHistoramWithNoVals(t *testing.T) { +func TestNewHistoram(t *testing.T) { t.Parallel() h := newHistogram() exp := &histogram{ - Buckets: map[uint32]uint32{}, - ExtraLowBucket: 0, - ExtraHighBucket: 0, - Max: -math.MaxFloat64, - Min: math.MaxFloat64, - Sum: 0, + Buckets: map[uint32]uint32{}, + ExtraLowBucket: 0, + ExtraHighBucket: 0, + Max: -math.MaxFloat64, + Min: math.MaxFloat64, + Sum: 0, + MinimumResolution: 0.001, } assert.Equal(t, exp, h) } @@ -224,20 +269,21 @@ func TestHistogramAsProto(t *testing.T) { } cases := []struct { - name string - vals []float64 - exp *pbcloud.TrendHdrValue + name string + vals []float64 + minResolution float64 + exp *pbcloud.TrendHdrValue }{ { - name: "empty histogram", + name: "EmptyHistogram", exp: &pbcloud.TrendHdrValue{ MaxValue: -math.MaxFloat64, MinValue: math.MaxFloat64, }, }, { - name: "not trackable values", - vals: []float64{-0.23, 1<<30 + 1}, + name: "UntrackableValues", + vals: []float64{-0.23, 1<<62 + 1}, exp: &pbcloud.TrendHdrValue{ ExtraLowValuesCounter: uint32ptr(1), ExtraHighValuesCounter: uint32ptr(1), @@ -245,12 +291,12 @@ func TestHistogramAsProto(t *testing.T) { Spans: nil, Count: 2, MinValue: -0.23, - MaxValue: 1<<30 + 1, - Sum: (1 << 30) + 1 - 0.23, + MaxValue: 1<<62 + 1, + Sum: (1 << 62) + 1 - 0.23, }, }, { - name: "normal values", + name: "SimpleValues", vals: []float64{7, 8, 9, 11, 12, 11.5, 10.5}, exp: &pbcloud.TrendHdrValue{ Count: 7, @@ -267,7 +313,7 @@ func TestHistogramAsProto(t *testing.T) { }, }, { - name: "with Zero-point values", + name: "WithZeroPointValues", vals: []float64{2, 0.01, 3}, exp: &pbcloud.TrendHdrValue{ Count: 3, @@ -286,7 +332,7 @@ func TestHistogramAsProto(t *testing.T) { }, }, { - name: "a basic case", + name: "VeryBasic", vals: []float64{2, 1.1, 3}, exp: &pbcloud.TrendHdrValue{ Count: 3, @@ -305,7 +351,7 @@ func TestHistogramAsProto(t *testing.T) { }, }, { - name: "longer sequence", + name: "LongerSequence", vals: []float64{ 2275, 52.25, 268.85, 383.47, 18.49, 163.85, 4105, 835.27, 52, 18.28, 238.44, 39751, 18.86, @@ -343,18 +389,79 @@ func TestHistogramAsProto(t *testing.T) { Sum: 56153.280000000006, }, }, + { + name: "Unrealistic", + vals: []float64{math.MaxUint32}, + exp: &pbcloud.TrendHdrValue{ + Count: 1, + ExtraLowValuesCounter: nil, + ExtraHighValuesCounter: nil, + Counters: []uint32{1}, + Spans: []*pbcloud.BucketSpan{ + { + Offset: 3327, + Length: 1, + }, + }, + MinValue: math.MaxUint32, + MaxValue: math.MaxUint32, + Sum: math.MaxUint32, + }, + }, + { + name: "DefaultMinimumResolution", + vals: []float64{200, 100, 200.1}, + minResolution: .001, + exp: &pbcloud.TrendHdrValue{ + Count: 3, + ExtraLowValuesCounter: nil, + ExtraHighValuesCounter: nil, + MinResolution: float64ptr(defaultMinimumResolution), + Counters: []uint32{1, 2}, + Spans: []*pbcloud.BucketSpan{ + { + Offset: 1347, + Length: 1, + }, + { + Offset: 127, + Length: 1, + }, + }, + MinValue: 100, + MaxValue: 200.1, + Sum: 500.1, + }, + }, } - for i, tc := range cases { + for _, tc := range cases { tc := tc - t.Run(strconv.Itoa(i), func(t *testing.T) { + t.Run(tc.name, func(t *testing.T) { t.Parallel() + h := newHistogram() + // TODO: refactor + // An hack for preserving as the default for the tests the old value 1.0 + if tc.minResolution == 0 { + tc.minResolution = 1.0 + tc.exp.MinResolution = float64ptr(1.0) + } + h.MinimumResolution = tc.minResolution + for _, v := range tc.vals { h.Add(v) } tc.exp.Time = ×tamppb.Timestamp{Seconds: 1} - assert.Equal(t, tc.exp, histogramAsProto(h, time.Unix(1, 0).UnixNano()), tc.name) + hproto := histogramAsProto(h, time.Unix(1, 0).UnixNano()) + require.Equal(t, tc.exp.Count, hproto.Count) + require.Equal(t, tc.exp.Counters, hproto.Counters) + require.Equal(t, len(tc.exp.Spans), len(hproto.Spans)) + assert.Equal(t, tc.exp, hproto, tc.name) }) } } + +func float64ptr(n float64) *float64 { + return &n +} diff --git a/output/cloud/expv2/integration/testdata/metricset.json b/output/cloud/expv2/integration/testdata/metricset.json index 1bd42ebbdb4..eb283095caa 100644 --- a/output/cloud/expv2/integration/testdata/metricset.json +++ b/output/cloud/expv2/integration/testdata/metricset.json @@ -94,13 +94,14 @@ ], "spans": [ { - "offset": 6, + "offset": 827, "length": 1 } ], "maxValue": 6, "minValue": 6, - "sum": 6 + "sum": 6, + "minResolution": 0.001 } ] } @@ -108,4 +109,4 @@ ] } ] -} \ No newline at end of file +} From a6f757d886c3af74ab2697fcf41194ccc4696b60 Mon Sep 17 00:00:00 2001 From: codebien <2103732+codebien@users.noreply.github.com> Date: Wed, 30 Aug 2023 17:45:48 +0200 Subject: [PATCH 2/3] fixup! cloudv2: Higher resolution for Histogram --- output/cloud/expv2/hdr.go | 11 +++++------ output/cloud/expv2/hdr_test.go | 19 ++++++++++--------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/output/cloud/expv2/hdr.go b/output/cloud/expv2/hdr.go index 9493db1bd26..2aa70689d0f 100644 --- a/output/cloud/expv2/hdr.go +++ b/output/cloud/expv2/hdr.go @@ -28,8 +28,7 @@ const ( // of metrics samples' values as histogram. // // The histogram is the representation of base-2 exponential Histogram with two layers. -// The first layer has primary buckets in the form of a power of two, and a second layer of buckets -// for each primary bucket with an equally distributed amount of buckets inside. +// The first layer has primary buckets in the form of a power of two, and a second layer of buckets for each primary bucket with an equally distributed amount of buckets inside. // // The histogram has a series of (N * 2^m) buckets, where: // N = a power of 2 that defines the number of primary buckets @@ -91,14 +90,14 @@ func (h *histogram) addToBucket(v float64) { v /= h.MinimumResolution - if v > math.MaxInt64 { - h.ExtraHighBucket++ - return - } if v < lowestTrackable { h.ExtraLowBucket++ return } + if v > math.MaxInt64 { + h.ExtraHighBucket++ + return + } h.Buckets[resolveBucketIndex(v)]++ } diff --git a/output/cloud/expv2/hdr_test.go b/output/cloud/expv2/hdr_test.go index 129a7a24783..3950416e4ad 100644 --- a/output/cloud/expv2/hdr_test.go +++ b/output/cloud/expv2/hdr_test.go @@ -46,9 +46,10 @@ func TestResolveBucketIndex(t *testing.T) { {in: 1 << 40, exp: 4352}, {in: 1 << 62, exp: 7168}, - {in: math.MaxInt32, exp: 3199}, // 2B - {in: math.MaxUint32, exp: 3327}, // 4B - {in: math.MaxInt64, exp: 7296}, // Huge number // 9.22...e+18 + {in: math.MaxInt32, exp: 3199}, // 2B + {in: math.MaxUint32, exp: 3327}, // 4B + {in: math.MaxInt64, exp: 7296}, // Huge number // 9.22...e+18 + {in: math.MaxInt64 + 2000, exp: 7296}, // Assert that it does not overflow } for _, tc := range tests { assert.Equal(t, int(tc.exp), int(resolveBucketIndex(tc.in)), tc.in) @@ -144,7 +145,7 @@ func TestHistogramAddWithUntrackables(t *testing.T) { h := newHistogram() h.MinimumResolution = 1.0 - for _, v := range []float64{5, -3.14, 1<<62 + 1, 1} { + for _, v := range []float64{5, -3.14, math.MaxInt64 + 3239, 1} { h.Add(v) } @@ -152,9 +153,9 @@ func TestHistogramAddWithUntrackables(t *testing.T) { Buckets: map[uint32]uint32{1: 1, 5: 1}, ExtraLowBucket: 1, ExtraHighBucket: 1, - Max: 1 << 62, + Max: 9223372036854779046, Min: -3.14, - Sum: 1<<62 + 1 + 5 + 1 - 3.14, + Sum: math.MaxInt64 + 3239 + 5 + 1 - 3.14, Count: 4, MinimumResolution: 1.0, } @@ -283,7 +284,7 @@ func TestHistogramAsProto(t *testing.T) { }, { name: "UntrackableValues", - vals: []float64{-0.23, 1<<62 + 1}, + vals: []float64{-0.23, 1<<64 - 1}, exp: &pbcloud.TrendHdrValue{ ExtraLowValuesCounter: uint32ptr(1), ExtraHighValuesCounter: uint32ptr(1), @@ -291,8 +292,8 @@ func TestHistogramAsProto(t *testing.T) { Spans: nil, Count: 2, MinValue: -0.23, - MaxValue: 1<<62 + 1, - Sum: (1 << 62) + 1 - 0.23, + MaxValue: 1<<64 - 1, + Sum: (1 << 64) - 1 - 0.23, }, }, { From b978d8734c71f85bed6bc7f1108769e487a79000 Mon Sep 17 00:00:00 2001 From: codebien <2103732+codebien@users.noreply.github.com> Date: Wed, 30 Aug 2023 18:07:14 +0200 Subject: [PATCH 3/3] fixup! fixup! cloudv2: Higher resolution for Histogram --- output/cloud/expv2/hdr.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/output/cloud/expv2/hdr.go b/output/cloud/expv2/hdr.go index 2aa70689d0f..b0afa29c4e7 100644 --- a/output/cloud/expv2/hdr.go +++ b/output/cloud/expv2/hdr.go @@ -28,7 +28,8 @@ const ( // of metrics samples' values as histogram. // // The histogram is the representation of base-2 exponential Histogram with two layers. -// The first layer has primary buckets in the form of a power of two, and a second layer of buckets for each primary bucket with an equally distributed amount of buckets inside. +// The first layer has primary buckets in the form of a power of two, and a second layer of buckets +// for each primary bucket with an equally distributed amount of buckets inside. // // The histogram has a series of (N * 2^m) buckets, where: // N = a power of 2 that defines the number of primary buckets