diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 3bf470a..846b8d0 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -12,7 +12,7 @@ jobs: strategy: fail-fast: false matrix: - go: ['1.16', '1.17', '1.18', '1.19', '1.20', '1.21', '1.22', 'tip'] + go: ['1.18', '1.19', '1.20', '1.21', '1.22', 'tip'] steps: - name: Checkout uses: actions/checkout@v3 @@ -21,11 +21,7 @@ jobs: uses: actions/setup-go@v4 with: go-version: ${{ matrix.go }} - - name: Downgrade go.mod for go 1.17 and go 1.16 - if: matrix.go == '1.17' || matrix.go == '1.16' - run: make go/mod_16_for_testing - name: Run go/mod - if: matrix.go != '1.17' && matrix.go != '1.16' run: make go/mod && git diff --exit-code - name: Install Go stable if: matrix.go == 'tip' diff --git a/Makefile b/Makefile index abbaf43..7c2167d 100644 --- a/Makefile +++ b/Makefile @@ -15,8 +15,3 @@ go/mod: cd godeltaprof/ && GO111MODULE=on go mod download cd godeltaprof/ && GO111MODULE=on go mod tidy -.PHONY: go/mod_16_for_testing -go/mod_16_for_testing: - rm -rf godeltaprof/compat/go.mod godeltaprof/compat/go.sum godeltaprof/go.mod godeltaprof/go.sum go.work otelpyroscope/ - cat go.mod_go16_test.txt > go.mod - go mod tidy diff --git a/go.mod b/go.mod index c6ab79c..4fe966d 100644 --- a/go.mod +++ b/go.mod @@ -6,4 +6,4 @@ replace github.com/grafana/pyroscope-go/godeltaprof => ./godeltaprof require github.com/grafana/pyroscope-go/godeltaprof v0.1.6 -require github.com/klauspost/compress v1.17.3 // indirect +require github.com/klauspost/compress v1.17.8 // indirect diff --git a/go.mod_go16_test.txt b/go.mod_go16_test.txt deleted file mode 100644 index 0f0f0dc..0000000 --- a/go.mod_go16_test.txt +++ /dev/null @@ -1,12 +0,0 @@ -module github.com/grafana/pyroscope-go - -go 1.16 - -require ( - github.com/davecgh/go-spew v1.1.1 // indirect - github.com/google/pprof v0.0.0-20231127191134-f3a68a39ae15 - github.com/stretchr/testify v1.7.0 - golang.org/x/mod v0.14.0 // indirect - golang.org/x/tools v0.12.0 - gopkg.in/yaml.v3 v3.0.1 // indirect -) diff --git a/go.sum b/go.sum index 0d8e8f5..734f917 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +1,2 @@ -github.com/klauspost/compress v1.17.3 h1:qkRjuerhUU1EmXLYGkSH6EZL+vPSxIrYjLNAK4slzwA= -github.com/klauspost/compress v1.17.3/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= +github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU= +github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= diff --git a/go.work.sum b/go.work.sum index e663de3..2922f3a 100644 --- a/go.work.sum +++ b/go.work.sum @@ -6,17 +6,12 @@ github.com/chromedp/sysutil v1.0.0 h1:+ZxhTpfpZlmchB58ih/LBHX52ky7w2VhQVKQMucy3I github.com/chromedp/sysutil v1.0.0/go.mod h1:kgWmDdq8fTzXYcKIBqIYvRRTnYb9aNS9moAV0xufSww= github.com/chzyer/readline v1.5.1 h1:upd/6fQk4src78LMRzh5vItIt361/o4uq553V8B5sGI= github.com/chzyer/readline v1.5.1/go.mod h1:Eh+b79XXUwfKfcPLepksvw2tcLE/Ct21YObkaSkeBlk= -github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0= -github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU= github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM= github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og= github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= github.com/gobwas/ws v1.2.1 h1:F2aeBZrm2NDsc7vbovKrWSogd4wvfAxg0FQ89/iqOTk= github.com/gobwas/ws v1.2.1/go.mod h1:hRKAFb8wOxFROYNsT1bqfWnhX+b5MFeJM9r2ZSwg/KY= -github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/ianlancetaylor/demangle v0.0.0-20230524184225-eabc099b10ab h1:BA4a7pe6ZTd9F8kXETBoijjFJ/ntaa//1wiH9BZu4zU= github.com/ianlancetaylor/demangle v0.0.0-20230524184225-eabc099b10ab/go.mod h1:gx7rwoVhcfuVKG5uya9Hs3Sxj7EIvldVofAWIUtGouw= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= @@ -27,7 +22,6 @@ github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= -golang.org/x/crypto v0.0.0-20210921155107-089bfa567519 h1:7I4JAnoQBe7ZtJcBaYHi5UtiO8tQHbUSXxL+pnGRANg= golang.org/x/net v0.19.0 h1:zTwKpTd2XuCqf8huc7Fo2iSy+4RHPd10s4KzeTnVr1c= golang.org/x/net v0.19.0/go.mod h1:CfAk/cbD4CthTvqiEl8NpboMuiuOYsAr/7NOjZJtv1U= golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= diff --git a/godeltaprof/compat/compression_test.go b/godeltaprof/compat/compression_test.go index e9305e4..b5923e3 100644 --- a/godeltaprof/compat/compression_test.go +++ b/godeltaprof/compat/compression_test.go @@ -2,33 +2,27 @@ package compat import ( "io" - "math/rand" "runtime" "testing" - - "github.com/grafana/pyroscope-go/godeltaprof/internal/pprof" ) func BenchmarkHeapCompression(b *testing.B) { - opt := &pprof.ProfileBuilderOptions{ - GenericsFrames: true, - LazyMapping: true, - } - dh := new(pprof.DeltaHeapProfiler) - fs := generateMemProfileRecords(512, 32, 239) - rng := rand.NewSource(239) - objSize := fs[0].AllocBytes / fs[0].AllocObjects - nMutations := int(uint(rng.Int63())) % len(fs) + h := newHeapTestHelper() + fs := h.generateMemProfileRecords(512, 32) + h.rng.Seed(239) + nmutations := int(h.rng.Int63() % int64(len(fs))) + b.ResetTimer() for i := 0; i < b.N; i++ { - _ = WriteHeapProto(dh, opt, io.Discard, fs, int64(runtime.MemProfileRate)) - for j := 0; j < nMutations; j++ { - idx := int(uint(rng.Int63())) % len(fs) - fs[idx].AllocObjects += 1 - fs[idx].AllocBytes += objSize - fs[idx].FreeObjects += 1 - fs[idx].FreeBytes += objSize + if i == 1000 { + v := h.rng.Int63() + if v != 7817861117094116717 { + b.Errorf("unexpected random value: %d. "+ + "The bench should be deterministic for better comparision.", v) + } } + _ = WriteHeapProto(h.dp, h.opt, io.Discard, fs, int64(runtime.MemProfileRate)) + h.mutate(nmutations, fs) } } @@ -43,38 +37,25 @@ func BenchmarkMutexCompression(b *testing.B) { runtime.SetMutexProfileFraction(5) defer runtime.SetMutexProfileFraction(prevMutexProfileFraction) - opt := &pprof.ProfileBuilderOptions{ - GenericsFrames: true, - LazyMapping: true, - } - dh := new(pprof.DeltaMutexProfiler) - fs := generateBlockProfileRecords(512, 32, 239) - rng := rand.NewSource(239) - nMutations := int(uint(rng.Int63())) % len(fs) - oneBlockCycles := fs[0].Cycles / fs[0].Count + h := newMutexTestHelper() + h.scaler = scaler + fs := h.generateBlockProfileRecords(512, 32) + h.rng.Seed(239) + nmutations := int(h.rng.Int63() % int64(len(fs))) b.ResetTimer() for i := 0; i < b.N; i++ { - _ = PrintCountCycleProfile(dh, opt, io.Discard, scaler, fs) - for j := 0; j < nMutations; j++ { - idx := int(uint(rng.Int63())) % len(fs) - fs[idx].Count += 1 - fs[idx].Cycles += oneBlockCycles + if i == 1000 { + v := h.rng.Int63() + if v != 7817861117094116717 { + b.Errorf("unexpected random value: %d. "+ + "The bench should be deterministic for better comparision.", v) + } } + _ = PrintCountCycleProfile(h.dp, h.opt, io.Discard, scaler, fs) + h.mutate(nmutations, fs) } }) } } - -func WriteHeapProto(dp *pprof.DeltaHeapProfiler, opt *pprof.ProfileBuilderOptions, w io.Writer, p []runtime.MemProfileRecord, rate int64) error { - stc := pprof.HeapProfileConfig(rate) - b := pprof.NewProfileBuilder(w, opt, stc) - return dp.WriteHeapProto(b, p, rate) -} - -func PrintCountCycleProfile(d *pprof.DeltaMutexProfiler, opt *pprof.ProfileBuilderOptions, w io.Writer, scaler pprof.MutexProfileScaler, records []runtime.BlockProfileRecord) error { - stc := pprof.MutexProfileConfig() - b := pprof.NewProfileBuilder(w, opt, stc) - return d.PrintCountCycleProfile(b, scaler, records) -} diff --git a/godeltaprof/compat/delta_test.go b/godeltaprof/compat/delta_test.go index 1dded69..2822ef4 100644 --- a/godeltaprof/compat/delta_test.go +++ b/godeltaprof/compat/delta_test.go @@ -1,13 +1,14 @@ package compat import ( - "bytes" - "math/rand" - "runtime" - "testing" - + "fmt" + gprofile "github.com/google/pprof/profile" "github.com/grafana/pyroscope-go/godeltaprof/internal/pprof" "github.com/stretchr/testify/assert" + "reflect" + "runtime" + "strings" + "testing" ) var ( @@ -15,14 +16,51 @@ var ( stack0Marker string stack1 = [32]uintptr{} stack1Marker string + stack2 = [32]uintptr{} + stack2Marker string + stack3 = [32]uintptr{} + stack4 = [32]uintptr{} ) func init() { - fs := getFunctionPointers() - stack0 = [32]uintptr{fs[0], fs[1]} - stack1 = [32]uintptr{fs[2], fs[3]} - stack0Marker = runtime.FuncForPC(fs[1]).Name() + ";" + runtime.FuncForPC(fs[0]).Name() - stack1Marker = runtime.FuncForPC(fs[3]).Name() + ";" + runtime.FuncForPC(fs[2]).Name() + stack0 = [32]uintptr{ + reflect.ValueOf(assert.Truef).Pointer(), + reflect.ValueOf(assert.CallerInfo).Pointer(), + } + stack1 = [32]uintptr{ + reflect.ValueOf(assert.Condition).Pointer(), + reflect.ValueOf(assert.Conditionf).Pointer(), + } + stack2 = [32]uintptr{ + reflect.ValueOf(runtime.GC).Pointer(), + reflect.ValueOf(runtime.FuncForPC).Pointer(), + reflect.ValueOf(TestDeltaBlockProfile).Pointer(), + reflect.ValueOf(TestDeltaHeap).Pointer(), + } + stack3 = [32]uintptr{ // equal , but difference in runtime + reflect.ValueOf(runtime.GC).Pointer() + 1, + reflect.ValueOf(runtime.FuncForPC).Pointer(), + reflect.ValueOf(TestDeltaBlockProfile).Pointer(), + reflect.ValueOf(TestDeltaHeap).Pointer(), + } + + stack4 = [32]uintptr{ // equal , but difference in non runtime frame + reflect.ValueOf(runtime.GC).Pointer(), + reflect.ValueOf(runtime.FuncForPC).Pointer(), + reflect.ValueOf(TestDeltaBlockProfile).Pointer() + 1, + reflect.ValueOf(TestDeltaHeap).Pointer(), + } + marker := func(stk []uintptr) string { + res := []string{} + for i := range stk { + f := stk[len(stk)-1-i] + res = append(res, runtime.FuncForPC(f).Name()) + } + return strings.Join(res, ";") + } + stack0Marker = marker(stack0[:2]) + stack1Marker = marker(stack1[:2]) + stack2Marker = marker(stack2[2:4]) } func TestDeltaHeap(t *testing.T) { @@ -40,56 +78,41 @@ func TestDeltaHeap(t *testing.T) { const testMemProfileRate = 524288 const testObjectSize = 327680 - dh := new(pprof.DeltaHeapProfiler) - opt := new(pprof.ProfileBuilderOptions) - dump := func(r ...runtime.MemProfileRecord) *bytes.Buffer { - buf := bytes.NewBuffer(nil) - err := WriteHeapProto(dh, opt, buf, r, testMemProfileRate) - assert.NoError(t, err) - return buf - } - r := func(AllocObjects, AllocBytes, FreeObjects, FreeBytes int64, s [32]uintptr) runtime.MemProfileRecord { - return runtime.MemProfileRecord{ - AllocObjects: AllocObjects, - AllocBytes: AllocBytes, - FreeBytes: FreeBytes, - FreeObjects: FreeObjects, - Stack0: s, - } - } + h := newHeapTestHelper() + h.rate = testMemProfileRate - p1 := dump( - r(0, 0, 0, 0, stack0), - r(0, 0, 0, 0, stack1), + p1 := h.dump( + h.r(0, 0, 0, 0, stack0), + h.r(0, 0, 0, 0, stack1), ) expectEmptyProfile(t, p1) - p2 := dump( - r(5, 5*testObjectSize, 0, 0, stack0), - r(3, 3*testObjectSize, 3, 3*testObjectSize, stack1), + p2 := h.dump( + h.r(5, 5*testObjectSize, 0, 0, stack0), + h.r(3, 3*testObjectSize, 3, 3*testObjectSize, stack1), ) expectStackFrames(t, p2, stack0Marker, 10, 3525422, 10, 3525422) expectStackFrames(t, p2, stack1Marker, 6, 2115253, 0, 0) for i := 0; i < 3; i++ { // if we write same data, stack0 is in use, stack1 should not be present - p3 := dump( - r(5, 5*testObjectSize, 0, 0, stack0), - r(3, 3*testObjectSize, 3, 3*testObjectSize, stack1), + p3 := h.dump( + h.r(5, 5*testObjectSize, 0, 0, stack0), + h.r(3, 3*testObjectSize, 3, 3*testObjectSize, stack1), ) expectStackFrames(t, p3, stack0Marker, 0, 0, 10, 3525422) expectNoStackFrames(t, p3, stack1Marker) } - p4 := dump( - r(5, 5*testObjectSize, 5, 5*testObjectSize, stack0), - r(3, 3*testObjectSize, 3, 3*testObjectSize, stack1), + p4 := h.dump( + h.r(5, 5*testObjectSize, 5, 5*testObjectSize, stack0), + h.r(3, 3*testObjectSize, 3, 3*testObjectSize, stack1), ) expectEmptyProfile(t, p4) - p5 := dump( - r(8, 8*testObjectSize, 5, 5*testObjectSize, stack0), - r(3, 3*testObjectSize, 3, 3*testObjectSize, stack1), + p5 := h.dump( + h.r(8, 8*testObjectSize, 5, 5*testObjectSize, stack0), + h.r(3, 3*testObjectSize, 3, 3*testObjectSize, stack1), ) // note, this value depends on scale order, it currently tests the current implementation, but we may change it // to alloc objects to be scale(8) - scale(5) = 17-10 = 7 @@ -98,8 +121,6 @@ func TestDeltaHeap(t *testing.T) { } func TestDeltaBlockProfile(t *testing.T) { - cpuGHz := float64(pprof.Runtime_cyclesPerSecond()) / 1e9 - for i, scaler := range mutexProfileScalers { name := "ScalerMutexProfile" if i == 1 { @@ -110,56 +131,35 @@ func TestDeltaBlockProfile(t *testing.T) { runtime.SetMutexProfileFraction(5) defer runtime.SetMutexProfileFraction(prevMutexProfileFraction) - dh := new(pprof.DeltaMutexProfiler) - opt := new(pprof.ProfileBuilderOptions) - - scale := func(rcount, rcycles int64) (int64, int64) { - count, nanosec := pprof.ScaleMutexProfile(scaler, rcount, float64(rcycles)/cpuGHz) - inanosec := int64(nanosec) - return count, inanosec - } - dump := func(r ...runtime.BlockProfileRecord) *bytes.Buffer { - buf := bytes.NewBuffer(nil) - err := PrintCountCycleProfile(dh, opt, buf, scaler, r) - assert.NoError(t, err) - return buf - } - r := func(count, cycles int64, s [32]uintptr) runtime.BlockProfileRecord { - return runtime.BlockProfileRecord{ - Count: count, - Cycles: cycles, - StackRecord: runtime.StackRecord{ - Stack0: s, - }, - } - } + h := newMutexTestHelper() + h.scaler = scaler - p1 := dump( - r(0, 0, stack0), - r(0, 0, stack1), + p1 := h.dump( + h.r(0, 0, stack0), + h.r(0, 0, stack1), ) expectEmptyProfile(t, p1) const cycles = 42 - p2 := dump( - r(239, 239*cycles, stack0), - r(0, 0, stack1), + p2 := h.dump( + h.r(239, 239*cycles, stack0), + h.r(0, 0, stack1), ) - count0, nanos0 := scale(239, 239*cycles) + count0, nanos0 := h.scale(239, 239*cycles) expectStackFrames(t, p2, stack0Marker, count0, nanos0) expectNoStackFrames(t, p2, stack1Marker) for j := 0; j < 2; j++ { - p3 := dump( - r(239, 239*cycles, stack0), - r(0, 0, stack1), + p3 := h.dump( + h.r(239, 239*cycles, stack0), + h.r(0, 0, stack1), ) expectEmptyProfile(t, p3) } - count1, nanos1 := scale(240, 240*cycles) - p4 := dump( - r(240, 240*cycles, stack0), + count1, nanos1 := h.scale(240, 240*cycles) + p4 := h.dump( + h.r(240, 240*cycles, stack0), ) expectStackFrames(t, p4, stack0Marker, count1-count0, nanos1-nanos0) expectNoStackFrames(t, p4, stack1Marker) @@ -168,22 +168,23 @@ func TestDeltaBlockProfile(t *testing.T) { } func BenchmarkHeapDelta(b *testing.B) { - dh := new(pprof.DeltaHeapProfiler) - fs := generateMemProfileRecords(512, 32, 239) - rng := rand.NewSource(239) - objSize := fs[0].AllocBytes / fs[0].AllocObjects - nMutations := int(uint(rng.Int63())) % len(fs) + h := newHeapTestHelper() + fs := h.generateMemProfileRecords(512, 32) builder := &noopBuilder{} + h.rng.Seed(239) + nmutations := int(h.rng.Int63() % int64(len(fs))) + b.ResetTimer() for i := 0; i < b.N; i++ { - _ = dh.WriteHeapProto(builder, fs, int64(runtime.MemProfileRate)) - for j := 0; j < nMutations; j++ { - idx := int(uint(rng.Int63())) % len(fs) - fs[idx].AllocObjects += 1 - fs[idx].AllocBytes += objSize - fs[idx].FreeObjects += 1 - fs[idx].FreeBytes += objSize + if i == 1000 { + v := h.rng.Int63() + if v != 7817861117094116717 { + b.Errorf("unexpected random value: %d. "+ + "The bench should be deterministic for better comparision.", v) + } } + _ = h.dp.WriteHeapProto(builder, fs, int64(runtime.MemProfileRate)) + h.mutate(nmutations, fs) } } @@ -198,37 +199,98 @@ func BenchmarkMutexDelta(b *testing.B) { runtime.SetMutexProfileFraction(5) defer runtime.SetMutexProfileFraction(prevMutexProfileFraction) - dh := new(pprof.DeltaMutexProfiler) - fs := generateBlockProfileRecords(512, 32, 239) - rng := rand.NewSource(239) - nMutations := int(uint(rng.Int63())) % len(fs) - oneBlockCycles := fs[0].Cycles / fs[0].Count + h := newMutexTestHelper() + h.scaler = scaler + fs := h.generateBlockProfileRecords(512, 32) builder := &noopBuilder{} + h.rng.Seed(239) + nmutations := int(h.rng.Int63() % int64(len(fs))) b.ResetTimer() for i := 0; i < b.N; i++ { - _ = dh.PrintCountCycleProfile(builder, scaler, fs) - for j := 0; j < nMutations; j++ { - idx := int(uint(rng.Int63())) % len(fs) - fs[idx].Count += 1 - fs[idx].Cycles += oneBlockCycles + if i == 1000 { + v := h.rng.Int63() + if v != 7817861117094116717 { + b.Errorf("unexpected random value: %d. "+ + "The bench should be deterministic for better comparision.", v) + } } + _ = h.dp.PrintCountCycleProfile(builder, scaler, fs) + h.mutate(nmutations, fs) } }) } } -type noopBuilder struct { -} +func TestMutexDuplicates(t *testing.T) { + prev := runtime.SetMutexProfileFraction(-1) + runtime.SetMutexProfileFraction(1) + defer runtime.SetMutexProfileFraction(prev) -func (b *noopBuilder) LocsForStack(_ []uintptr) []uint64 { - return nil -} -func (b *noopBuilder) Sample(_ []int64, _ []uint64, _ int64) { + h := newMutexTestHelper() + const cycles = 42 + p := h.dump( + h.r(239, 239*cycles, stack0), + h.r(42, 42*cycles, stack1), + h.r(7, 7*cycles, stack0), + ) + expectStackFrames(t, p, stack0Marker, h.scale2(239+7, (239+7)*cycles)...) + expectStackFrames(t, p, stack1Marker, h.scale2(42, (42)*cycles)...) + + expectPPROFLocations(t, p, fmt.Sprintf("^%s$", stack0Marker), 1, h.scale2(239+7, (239+7)*cycles)...) + expectPPROFLocations(t, p, fmt.Sprintf("^%s$", stack1Marker), 1, h.scale2(42, 42*cycles)...) + + p = h.dump( + h.r(239, 239*cycles, stack0), + h.r(42, 42*cycles, stack1), + h.r(7, 7*cycles, stack0), + ) + expectEmptyProfile(t, p) } -func (b *noopBuilder) Build() { +func TestHeapDuplicates(t *testing.T) { + const testMemProfileRate = 524288 + h := newHeapTestHelper() + h.rate = testMemProfileRate + const blockSize = 1024 + const blockSize2 = 2048 + p := h.dump( + h.r(239, 239*blockSize, 239, 239*blockSize, stack0), + h.r(3, 3*blockSize2, 3, 3*blockSize2, stack0), + h.r(42, 42*blockSize, 42, 42*blockSize, stack1), + h.r(7, 7*blockSize, 7, 7*blockSize, stack0), + h.r(3, 3*blockSize, 3, 3*blockSize, stack2), + h.r(5, 5*blockSize, 5, 5*blockSize, stack3), + h.r(11, 11*blockSize, 11, 11*blockSize, stack4), + ) + pp, err := gprofile.ParseData(p.Bytes()) + assert.NoError(t, err) + scale := func(c, b int) []int64 { + c1, b1 := pprof.ScaleHeapSample(int64(c), int64(b), testMemProfileRate) + return []int64{c1, b1, 0, 0} + } + //expectStackFrames(t, p, stack0Marker, scale(239+7, (239+7)*blockSize)...) + //expectStackFrames(t, p, stack1Marker, scale(42, 42*blockSize)...) + + //printProfile(t, p) + expectPPROFLocations(t, p, fmt.Sprintf("^%s$", stack0Marker), 1, scale(239+7, (239+7)*blockSize)...) + expectPPROFLocations(t, p, fmt.Sprintf("^%s$", stack1Marker), 1, scale(42, 42*blockSize)...) + expectPPROFLocations(t, p, fmt.Sprintf("^%s$", stack2Marker), 1, scale(3, 3*blockSize)...) + expectPPROFLocations(t, p, fmt.Sprintf("^%s$", stack2Marker), 1, scale(5, 5*blockSize)...) + expectPPROFLocations(t, p, fmt.Sprintf("^%s$", stack2Marker), 1, scale(11, 11*blockSize)...) + assert.Equal(t, 6, len(pp.Sample)) + + p = h.dump( + h.r(239, 239*blockSize, 239, 239*blockSize, stack0), + h.r(3, 3*blockSize2, 3, 3*blockSize2, stack0), + h.r(42, 42*blockSize, 42, 42*blockSize, stack1), + h.r(7, 7*blockSize, 7, 7*blockSize, stack0), + h.r(3, 3*blockSize, 3, 3*blockSize, stack2), + h.r(5, 5*blockSize, 5, 5*blockSize, stack3), + h.r(11, 11*blockSize, 11, 11*blockSize, stack4), + ) + expectEmptyProfile(t, p) } diff --git a/godeltaprof/compat/reject_order_test.go b/godeltaprof/compat/reject_order_test.go index 9bdd658..6266b78 100644 --- a/godeltaprof/compat/reject_order_test.go +++ b/godeltaprof/compat/reject_order_test.go @@ -13,11 +13,10 @@ import ( ) func TestHeapReject(t *testing.T) { - dh := new(pprof.DeltaHeapProfiler) - opt := new(pprof.ProfileBuilderOptions) - fs := generateMemProfileRecords(512, 32, 239) + h := newHeapTestHelper() + fs := h.generateMemProfileRecords(512, 32) p1 := bytes.NewBuffer(nil) - err := WriteHeapProto(dh, opt, p1, fs, int64(runtime.MemProfileRate)) + err := WriteHeapProto(h.dp, h.opt, p1, fs, int64(runtime.MemProfileRate)) assert.NoError(t, err) p1Size := p1.Len() profile, err := gprofile.Parse(p1) @@ -28,7 +27,7 @@ func TestHeapReject(t *testing.T) { t.Log("p1 size", p1Size) p2 := bytes.NewBuffer(nil) - err = WriteHeapProto(dh, opt, p2, fs, int64(runtime.MemProfileRate)) + err = WriteHeapProto(h.dp, h.opt, p2, fs, int64(runtime.MemProfileRate)) assert.NoError(t, err) p2Size := p2.Len() assert.Less(t, p2Size, 1000) @@ -41,15 +40,11 @@ func TestHeapReject(t *testing.T) { } func BenchmarkHeapRejectOrder(b *testing.B) { - opt := &pprof.ProfileBuilderOptions{ - GenericsFrames: false, - LazyMapping: true, - } - dh := &pprof.DeltaHeapProfiler{} - fs := generateMemProfileRecords(512, 32, 239) + h := newHeapTestHelper() + fs := h.generateMemProfileRecords(512, 32) b.ResetTimer() for i := 0; i < b.N; i++ { - WriteHeapProto(dh, opt, io.Discard, fs, int64(runtime.MemProfileRate)) + WriteHeapProto(h.dp, h.opt, io.Discard, fs, int64(runtime.MemProfileRate)) } } @@ -69,11 +64,11 @@ func TestMutexReject(t *testing.T) { runtime.SetMutexProfileFraction(5) defer runtime.SetMutexProfileFraction(prevMutexProfileFraction) - dh := new(pprof.DeltaMutexProfiler) - opt := new(pprof.ProfileBuilderOptions) - fs := generateBlockProfileRecords(512, 32, 239) + h := newMutexTestHelper() + h.scaler = scaler + fs := h.generateBlockProfileRecords(512, 32) p1 := bytes.NewBuffer(nil) - err := PrintCountCycleProfile(dh, opt, p1, scaler, fs) + err := PrintCountCycleProfile(h.dp, h.opt, p1, scaler, fs) assert.NoError(t, err) p1Size := p1.Len() profile, err := gprofile.Parse(p1) @@ -84,7 +79,7 @@ func TestMutexReject(t *testing.T) { t.Log("p1 size", p1Size) p2 := bytes.NewBuffer(nil) - err = PrintCountCycleProfile(dh, opt, p2, scaler, fs) + err = PrintCountCycleProfile(h.dp, h.opt, p2, scaler, fs) assert.NoError(t, err) p2Size := p2.Len() assert.Less(t, p2Size, 1000) @@ -108,16 +103,13 @@ func BenchmarkMutexRejectOrder(b *testing.B) { prevMutexProfileFraction := runtime.SetMutexProfileFraction(-1) runtime.SetMutexProfileFraction(5) defer runtime.SetMutexProfileFraction(prevMutexProfileFraction) - opt := &pprof.ProfileBuilderOptions{ - GenericsFrames: false, - LazyMapping: true, - } - dh := &pprof.DeltaMutexProfiler{} - fs := generateBlockProfileRecords(512, 32, 239) + h := newMutexTestHelper() + h.scaler = scaler + fs := h.generateBlockProfileRecords(512, 32) b.ResetTimer() for i := 0; i < b.N; i++ { - PrintCountCycleProfile(dh, opt, io.Discard, scaler, fs) + PrintCountCycleProfile(h.dp, h.opt, io.Discard, scaler, fs) } }) diff --git a/godeltaprof/compat/stackcollapse.go b/godeltaprof/compat/stackcollapse.go index 7051ab5..8f95c17 100644 --- a/godeltaprof/compat/stackcollapse.go +++ b/godeltaprof/compat/stackcollapse.go @@ -2,7 +2,9 @@ package compat import ( "bytes" + "fmt" "io" + "reflect" "regexp" "sort" "strings" @@ -26,6 +28,16 @@ func expectEmptyProfile(t *testing.T, buffer io.Reader) { assert.Empty(t, ls) } +func printProfile(t *testing.T, p *bytes.Buffer) { + profile, err := gprofile.Parse(bytes.NewBuffer(p.Bytes())) + require.NoError(t, err) + t.Log("==================") + for _, sample := range profile.Sample { + s := pprofSampleStackToString(sample) + t.Logf("%v %v %v\n", s, sample.Value, sample.NumLabel) + } +} + func expectNoStackFrames(t *testing.T, buffer *bytes.Buffer, sfPattern string) { profile, err := gprofile.ParseData(buffer.Bytes()) require.NoError(t, err) @@ -34,6 +46,7 @@ func expectNoStackFrames(t *testing.T, buffer *bytes.Buffer, sfPattern string) { } func expectStackFrames(t *testing.T, buffer *bytes.Buffer, sfPattern string, values ...int64) { + fmt.Printf("expectStackFrames: %s %+v\n", sfPattern, values) profile, err := gprofile.ParseData(buffer.Bytes()) require.NoError(t, err) line := findStack(t, stackCollapseProfile(t, profile), sfPattern) @@ -45,6 +58,32 @@ func expectStackFrames(t *testing.T, buffer *bytes.Buffer, sfPattern string, val } } +func expectPPROFLocations(t *testing.T, buffer *bytes.Buffer, samplePattern string, expectedCount int, expectedValues ...int64) { + profile, err := gprofile.ParseData(buffer.Bytes()) + require.NoError(t, err) + cnt := 0 + samples := grepSamples(profile, samplePattern) + for _, s := range samples { + if reflect.DeepEqual(s.Value, expectedValues) { + cnt++ + } + } + assert.Equalf(t, expectedCount, cnt, "expected samples re: %s\n values: %v\n count:%d\n all samples:%+v\n", samplePattern, expectedValues, expectedCount, samples) +} + +func grepSamples(profile *gprofile.Profile, samplePattern string) []*gprofile.Sample { + rr := regexp.MustCompile(samplePattern) + var samples []*gprofile.Sample + for i := range profile.Sample { + ss := pprofSampleStackToString(profile.Sample[i]) + if !rr.MatchString(ss) { + continue + } + samples = append(samples, profile.Sample[i]) + } + return samples +} + func findStack(t *testing.T, res []stack, re string) *stack { rr := regexp.MustCompile(re) for i, re := range res { @@ -58,23 +97,9 @@ func findStack(t *testing.T, res []stack, re string) *stack { func stackCollapseProfile(t testing.TB, p *gprofile.Profile) []stack { var ret []stack for _, s := range p.Sample { - var funcs []string - for i := range s.Location { - - loc := s.Location[i] - for _, line := range loc.Line { - f := line.Function - //funcs = append(funcs, fmt.Sprintf("%s:%d", f.Name, line.Line)) - funcs = append(funcs, f.Name) - } - } - for i := 0; i < len(funcs)/2; i++ { - j := len(funcs) - i - 1 - funcs[i], funcs[j] = funcs[j], funcs[i] - } - + funcs, strSample := pprofSampleStackToStrings(s) ret = append(ret, stack{ - line: strings.Join(funcs, ";"), + line: strSample, funcs: funcs, value: s.Value, }) @@ -97,11 +122,36 @@ func stackCollapseProfile(t testing.TB, p *gprofile.Profile) []stack { unique = append(unique, s) } - t.Log("============= stackCollapseProfile ================") - for _, s := range unique { - t.Log(s.line, s.value) - } - t.Log("===================================================") + //t.Log("============= stackCollapseProfile ================") + //for _, s := range unique { + // t.Log(s.line, s.value) + //} + //t.Log("===================================================") return unique } + +func pprofSampleStackToString(s *gprofile.Sample) string { + _, v := pprofSampleStackToStrings(s) + return v +} + +func pprofSampleStackToStrings(s *gprofile.Sample) ([]string, string) { + var funcs []string + for i := range s.Location { + + loc := s.Location[i] + for _, line := range loc.Line { + f := line.Function + //funcs = append(funcs, fmt.Sprintf("%s:%d", f.Name, line.Line)) + funcs = append(funcs, f.Name) + } + } + for i := 0; i < len(funcs)/2; i++ { + j := len(funcs) - i - 1 + funcs[i], funcs[j] = funcs[j], funcs[i] + } + + strSample := strings.Join(funcs, ";") + return funcs, strSample +} diff --git a/godeltaprof/compat/stub_go23_test.go b/godeltaprof/compat/stub_go23_test.go index bf495a9..6c2d227 100644 --- a/godeltaprof/compat/stub_go23_test.go +++ b/godeltaprof/compat/stub_go23_test.go @@ -36,6 +36,9 @@ func TestRuntimeCyclesPerSecond(t *testing.T) { checkSignature(t, "runtime", "pprof_cyclesPerSecond", "func runtime.pprof_cyclesPerSecond() int64") + checkSignature(t, "runtime/pprof", + "pprof_cyclesPerSecond", + "func runtime/pprof.pprof_cyclesPerSecond() int64") checkSignature(t, "github.com/grafana/pyroscope-go/godeltaprof/internal/pprof", "runtime_cyclesPerSecond", "func github.com/grafana/pyroscope-go/godeltaprof/internal/pprof.runtime_cyclesPerSecond() int64") diff --git a/godeltaprof/compat/testdata.go b/godeltaprof/compat/testdata.go index 509ba47..be95c9e 100644 --- a/godeltaprof/compat/testdata.go +++ b/godeltaprof/compat/testdata.go @@ -1,16 +1,13 @@ package compat import ( - "go/types" + "bytes" + "github.com/grafana/pyroscope-go/godeltaprof/internal/pprof" + "github.com/stretchr/testify/assert" + "io" "math/rand" "reflect" "runtime" - "strings" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "golang.org/x/tools/go/packages" ) func getFunctionPointers() []uintptr { @@ -160,12 +157,12 @@ func getFunctionPointers() []uintptr { } } -func generateMemProfileRecords(n, depth, seed int) []runtime.MemProfileRecord { +func (h *heapTestHelper) generateMemProfileRecords(n, depth int) []runtime.MemProfileRecord { var records []runtime.MemProfileRecord - rng := rand.NewSource(int64(seed)) + fs := getFunctionPointers() for i := 0; i < n; i++ { - nobj := int(uint64(rng.Int63())) % 1000000 + nobj := int(uint64(h.rng.Int63())) % 1000000 r := runtime.MemProfileRecord{ AllocObjects: int64(nobj), AllocBytes: int64(nobj * 1024), @@ -173,53 +170,182 @@ func generateMemProfileRecords(n, depth, seed int) []runtime.MemProfileRecord { FreeBytes: int64(nobj * 1024), } for j := 0; j < depth; j++ { - r.Stack0[j] = fs[int(uint64(rng.Int63()))%len(fs)] + r.Stack0[j] = fs[int(uint64(h.rng.Int63()))%len(fs)] } records = append(records, r) } return records } -func generateBlockProfileRecords(n, depth, seed int) []runtime.BlockProfileRecord { +func (h *mutexTestHelper) generateBlockProfileRecords(n, depth int) []runtime.BlockProfileRecord { var records []runtime.BlockProfileRecord - rng := rand.NewSource(int64(seed)) fs := getFunctionPointers() for i := 0; i < n; i++ { - nobj := int(uint64(rng.Int63())) % 1000000 + nobj := int(uint64(h.rng.Int63())) % 1000000 r := runtime.BlockProfileRecord{ Count: int64(nobj), Cycles: int64(nobj * 10), } for j := 0; j < depth; j++ { - r.Stack0[j] = fs[int(uint64(rng.Int63()))%len(fs)] + r.Stack0[j] = fs[int(uint64(h.rng.Int63()))%len(fs)] } records = append(records, r) } return records } -func getFunctions(t testing.TB, pkg string) []*types.Func { - var res []*types.Func - cfg := &packages.Config{ - Mode: packages.NeedImports | packages.NeedExportFile | packages.NeedTypes | packages.NeedSyntax, - Tests: true, +type mutexTestHelper struct { + dp *pprof.DeltaMutexProfiler + opt *pprof.ProfileBuilderOptions + scaler pprof.MutexProfileScaler + rng rand.Source +} + +func newMutexTestHelper() *mutexTestHelper { + res := &mutexTestHelper{ + dp: &pprof.DeltaMutexProfiler{}, + opt: &pprof.ProfileBuilderOptions{ + GenericsFrames: true, + LazyMapping: true, + }, + scaler: pprof.ScalerMutexProfile, + rng: rand.NewSource(239), } - pkgs, err := packages.Load(cfg, pkg) - require.NoError(t, err) - for _, p := range pkgs { - if strings.Contains(p.ID, ".test") { - continue - } - for _, name := range p.Types.Scope().Names() { - f := p.Types.Scope().Lookup(name) - - if f != nil { - ff, ok := f.(*types.Func) - if ok { - res = append(res, ff) - } - } - } + return res + +} + +func (h *mutexTestHelper) scale(rcount, rcycles int64) (int64, int64) { + cpuGHz := float64(pprof.Runtime_cyclesPerSecond()) / 1e9 + count, nanosec := pprof.ScaleMutexProfile(h.scaler, rcount, float64(rcycles)/cpuGHz) + inanosec := int64(nanosec) + return count, inanosec +} + +func (h *mutexTestHelper) scale2(rcount, rcycles int64) []int64 { + c, n := h.scale(rcount, rcycles) + return []int64{c, n} +} + +func (h *mutexTestHelper) dump(r ...runtime.BlockProfileRecord) *bytes.Buffer { + buf := bytes.NewBuffer(nil) + err := PrintCountCycleProfile(h.dp, h.opt, buf, h.scaler, r) + if err != nil { // never happens + panic(err) + } + return buf +} + +func (h *mutexTestHelper) r(count, cycles int64, s [32]uintptr) runtime.BlockProfileRecord { + return runtime.BlockProfileRecord{ + Count: count, + Cycles: cycles, + StackRecord: runtime.StackRecord{ + Stack0: s, + }, + } +} + +func (h *mutexTestHelper) mutate(nmutations int, fs []runtime.BlockProfileRecord) { + oneBlockCycles := fs[0].Cycles / fs[0].Count + for j := 0; j < nmutations; j++ { + idx := int(uint(h.rng.Int63())) % len(fs) + fs[idx].Count += 1 + fs[idx].Cycles += oneBlockCycles + } +} + +type heapTestHelper struct { + dp *pprof.DeltaHeapProfiler + opt *pprof.ProfileBuilderOptions + rate int64 + rng rand.Source +} + +func newHeapTestHelper() *heapTestHelper { + res := &heapTestHelper{ + dp: &pprof.DeltaHeapProfiler{}, + opt: &pprof.ProfileBuilderOptions{ + GenericsFrames: true, + LazyMapping: true, + }, + rng: rand.NewSource(239), + rate: int64(runtime.MemProfileRate), } return res } + +func (h *heapTestHelper) dump(r ...runtime.MemProfileRecord) *bytes.Buffer { + buf := bytes.NewBuffer(nil) + err := WriteHeapProto(h.dp, h.opt, buf, r, h.rate) + if err != nil { // never happens + panic(err) + } + return buf +} + +func (h *heapTestHelper) r(AllocObjects, AllocBytes, FreeObjects, FreeBytes int64, s [32]uintptr) runtime.MemProfileRecord { + return runtime.MemProfileRecord{ + AllocObjects: AllocObjects, + AllocBytes: AllocBytes, + FreeBytes: FreeBytes, + FreeObjects: FreeObjects, + Stack0: s, + } +} + +func (h *heapTestHelper) mutate(nmutations int, fs []runtime.MemProfileRecord) { + objSize := fs[0].AllocBytes / fs[0].AllocObjects + for j := 0; j < nmutations; j++ { + idx := int(uint(h.rng.Int63())) % len(fs) + fs[idx].AllocObjects += 1 + fs[idx].AllocBytes += objSize + fs[idx].FreeObjects += 1 + fs[idx].FreeBytes += objSize + } +} + +func WriteHeapProto(dp *pprof.DeltaHeapProfiler, opt *pprof.ProfileBuilderOptions, w io.Writer, p []runtime.MemProfileRecord, rate int64) error { + stc := pprof.HeapProfileConfig(rate) + b := pprof.NewProfileBuilder(w, opt, stc) + return dp.WriteHeapProto(b, p, rate) +} + +func PrintCountCycleProfile(d *pprof.DeltaMutexProfiler, opt *pprof.ProfileBuilderOptions, w io.Writer, scaler pprof.MutexProfileScaler, records []runtime.BlockProfileRecord) error { + stc := pprof.MutexProfileConfig() + b := pprof.NewProfileBuilder(w, opt, stc) + return d.PrintCountCycleProfile(b, scaler, records) +} + +func dumpMemProfileRecords() []runtime.MemProfileRecord { + var p []runtime.MemProfileRecord + n, ok := runtime.MemProfile(nil, true) + for { + // Allocate room for a slightly bigger profile, + // in case a few more entries have been added + // since the call to MemProfile. + p = make([]runtime.MemProfileRecord, n+50) + n, ok = runtime.MemProfile(p, true) + if ok { + p = p[0:n] + break + } + // Profile grew; try again. + } + return p +} + +type noopBuilder struct { +} + +func (b *noopBuilder) LocsForStack(_ []uintptr) []uint64 { + return nil +} + +func (b *noopBuilder) Sample(_ []int64, _ []uint64, _ int64) { + +} + +func (b *noopBuilder) Build() { + +} diff --git a/godeltaprof/go.mod b/godeltaprof/go.mod index 0fbc602..e36d42c 100644 --- a/godeltaprof/go.mod +++ b/godeltaprof/go.mod @@ -1,5 +1,5 @@ module github.com/grafana/pyroscope-go/godeltaprof -go 1.16 +go 1.18 -require github.com/klauspost/compress v1.17.3 +require github.com/klauspost/compress v1.17.8 diff --git a/godeltaprof/go.sum b/godeltaprof/go.sum index 0d8e8f5..734f917 100644 --- a/godeltaprof/go.sum +++ b/godeltaprof/go.sum @@ -1,2 +1,2 @@ -github.com/klauspost/compress v1.17.3 h1:qkRjuerhUU1EmXLYGkSH6EZL+vPSxIrYjLNAK4slzwA= -github.com/klauspost/compress v1.17.3/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= +github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU= +github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= diff --git a/godeltaprof/internal/pprof/delta_heap.go b/godeltaprof/internal/pprof/delta_heap.go index 0392382..883d020 100644 --- a/godeltaprof/internal/pprof/delta_heap.go +++ b/godeltaprof/internal/pprof/delta_heap.go @@ -6,16 +6,27 @@ import ( "strings" ) +type heapPrevValue struct { + allocObjects int64 +} + +type heapAccValue struct { + allocObjects int64 + inuseObjects int64 +} + type DeltaHeapProfiler struct { - m profMap + m profMap[heapPrevValue, heapAccValue] + //todo consider adding an option to remove block size label and merge allocations of different size } // WriteHeapProto writes the current heap profile in protobuf format to w. func (d *DeltaHeapProfiler) WriteHeapProto(b ProfileBuilder, p []runtime.MemProfileRecord, rate int64) error { values := []int64{0, 0, 0, 0} var locs []uint64 - for _, r := range p { - // do the delta + // deduplicate: accumulate allocObjects and inuseObjects in entry.acc for equal stacks + for i := range p { + r := &p[i] if r.AllocBytes == 0 && r.AllocObjects == 0 && r.FreeObjects == 0 && r.FreeBytes == 0 { // it is a fresh bucket and it will be published after next 1-2 gc cycles continue @@ -25,17 +36,42 @@ func (d *DeltaHeapProfiler) WriteHeapProto(b ProfileBuilder, p []runtime.MemProf blockSize = r.AllocBytes / r.AllocObjects } entry := d.m.Lookup(r.Stack(), uintptr(blockSize)) + entry.acc.allocObjects += r.AllocObjects + entry.acc.inuseObjects += r.InUseObjects() + } + // do the delta using the accumulated values and previous values + for i := range p { + r := &p[i] + if r.AllocBytes == 0 && r.AllocObjects == 0 && r.FreeObjects == 0 && r.FreeBytes == 0 { + // it is a fresh bucket and it will be published after next 1-2 gc cycles + continue + } + var blockSize int64 + if r.AllocObjects > 0 { + blockSize = r.AllocBytes / r.AllocObjects + } + entry := d.m.Lookup(r.Stack(), uintptr(blockSize)) + if entry.acc == (heapAccValue{}) { + continue + } - if (r.AllocObjects - entry.count.v1) < 0 { + allocObjects := entry.acc.allocObjects - entry.prev.allocObjects + if allocObjects < 0 { continue } - AllocObjects := r.AllocObjects - entry.count.v1 - AllocBytes := r.AllocBytes - entry.count.v2 - entry.count.v1 = r.AllocObjects - entry.count.v2 = r.AllocBytes - values[0], values[1] = scaleHeapSample(AllocObjects, AllocBytes, rate) - values[2], values[3] = scaleHeapSample(r.InUseObjects(), r.InUseBytes(), rate) + // allocBytes, inuseBytes is calculated as multiplication of number of objects by blockSize + // This is done to reduce the size of the map entry (i.e. heapAccValue for deduplication and + // heapPrevValue for keeping the delta). + + allocBytes := allocObjects * blockSize + entry.prev.allocObjects = entry.acc.allocObjects + inuseBytes := entry.acc.inuseObjects * blockSize + + values[0], values[1] = ScaleHeapSample(allocObjects, allocBytes, rate) + values[2], values[3] = ScaleHeapSample(entry.acc.inuseObjects, inuseBytes, rate) + + entry.acc = heapAccValue{} if values[0] == 0 && values[1] == 0 && values[2] == 0 && values[3] == 0 { continue @@ -70,7 +106,7 @@ func (d *DeltaHeapProfiler) WriteHeapProto(b ProfileBuilder, p []runtime.MemProf return nil } -// scaleHeapSample adjusts the data from a heap Sample to +// ScaleHeapSample adjusts the data from a heap Sample to // account for its probability of appearing in the collected // data. heap profiles are a sampling of the memory allocations // requests in a program. We estimate the unsampled value by dividing @@ -79,7 +115,7 @@ func (d *DeltaHeapProfiler) WriteHeapProto(b ProfileBuilder, p []runtime.MemProf // which samples to collect, based on the desired average collection // rate R. The probability of a sample of size S to appear in that // profile is 1-exp(-S/R). -func scaleHeapSample(count, size, rate int64) (int64, int64) { +func ScaleHeapSample(count, size, rate int64) (int64, int64) { if count == 0 || size == 0 { return 0, 0 } diff --git a/godeltaprof/internal/pprof/delta_mutex.go b/godeltaprof/internal/pprof/delta_mutex.go index 0b2500a..5c177e3 100644 --- a/godeltaprof/internal/pprof/delta_mutex.go +++ b/godeltaprof/internal/pprof/delta_mutex.go @@ -4,8 +4,18 @@ import ( "runtime" ) +type mutexPrevValue struct { + count int64 + inanosec int64 +} + +type mutexAccValue struct { + count int64 + cycles int64 +} + type DeltaMutexProfiler struct { - m profMap + m profMap[mutexPrevValue, mutexAccValue] } // PrintCountCycleProfile outputs block profile records (for block or mutex profiles) @@ -19,16 +29,33 @@ func (d *DeltaMutexProfiler) PrintCountCycleProfile(b ProfileBuilder, scaler Mut values := []int64{0, 0} var locs []uint64 - for _, r := range records { - count, nanosec := ScaleMutexProfile(scaler, r.Count, float64(r.Cycles)/cpuGHz) + // deduplicate: accumulate count and cycles in entry.acc for equal stacks + for i := range records { + r := &records[i] + entry := d.m.Lookup(r.Stack(), 0) + entry.acc.count += r.Count // accumulate unscaled + entry.acc.cycles += r.Cycles + } + + // do the delta using the accumulated values and previous values + for i := range records { + r := &records[i] + stk := r.Stack() + entry := d.m.Lookup(stk, 0) + accCount := entry.acc.count + accCycles := entry.acc.cycles + if accCount == 0 && accCycles == 0 { + continue + } + entry.acc = mutexAccValue{} + count, nanosec := ScaleMutexProfile(scaler, accCount, float64(accCycles)/cpuGHz) inanosec := int64(nanosec) // do the delta - entry := d.m.Lookup(r.Stack(), 0) - values[0] = count - entry.count.v1 - values[1] = inanosec - entry.count.v2 - entry.count.v1 = count - entry.count.v2 = inanosec + values[0] = count - entry.prev.count + values[1] = inanosec - entry.prev.inanosec + entry.prev.count = count + entry.prev.inanosec = inanosec if values[0] < 0 || values[1] < 0 { continue @@ -39,7 +66,7 @@ func (d *DeltaMutexProfiler) PrintCountCycleProfile(b ProfileBuilder, scaler Mut // For count profiles, all stack addresses are // return PCs, which is what appendLocsForStack expects. - locs = b.LocsForStack(r.Stack()) + locs = b.LocsForStack(stk) b.Sample(values, locs, 0) } b.Build() diff --git a/godeltaprof/internal/pprof/map.go b/godeltaprof/internal/pprof/map.go index 188001e..dcb6e56 100644 --- a/godeltaprof/internal/pprof/map.go +++ b/godeltaprof/internal/pprof/map.go @@ -8,30 +8,23 @@ import "unsafe" // A profMap is a map from (stack, tag) to mapEntry. // It grows without bound, but that's assumed to be OK. -type profMap struct { - hash map[uintptr]*profMapEntry - all *profMapEntry - last *profMapEntry - free []profMapEntry +type profMap[PREV any, ACC any] struct { + hash map[uintptr]*profMapEntry[PREV, ACC] + free []profMapEntry[PREV, ACC] freeStk []uintptr } -type count struct { - // alloc_objects, alloc_bytes for heap - // mutex_count, mutex_duration for mutex - v1, v2 int64 -} - // A profMapEntry is a single entry in the profMap. -type profMapEntry struct { - nextHash *profMapEntry // next in hash list - nextAll *profMapEntry // next in list of all entries +// todo use unsafe.Pointer + len for stk ? +type profMapEntry[PREV any, ACC any] struct { + nextHash *profMapEntry[PREV, ACC] // next in hash list stk []uintptr tag uintptr - count count + prev PREV + acc ACC } -func (m *profMap) Lookup(stk []uintptr, tag uintptr) *profMapEntry { +func (m *profMap[PREV, ACC]) Lookup(stk []uintptr, tag uintptr) *profMapEntry[PREV, ACC] { // Compute hash of (stk, tag). h := uintptr(0) for _, x := range stk { @@ -42,7 +35,7 @@ func (m *profMap) Lookup(stk []uintptr, tag uintptr) *profMapEntry { h += uintptr(tag) * 41 // Find entry if present. - var last *profMapEntry + var last *profMapEntry[PREV, ACC] Search: for e := m.hash[h]; e != nil; last, e = e, e.nextHash { if len(e.stk) != len(stk) || e.tag != tag { @@ -64,7 +57,7 @@ Search: // Add new entry. if len(m.free) < 1 { - m.free = make([]profMapEntry, 128) + m.free = make([]profMapEntry[PREV, ACC], 128) } e := &m.free[0] m.free = m.free[1:] @@ -82,15 +75,8 @@ Search: e.stk[j] = uintptr(stk[j]) } if m.hash == nil { - m.hash = make(map[uintptr]*profMapEntry) + m.hash = make(map[uintptr]*profMapEntry[PREV, ACC]) } m.hash[h] = e - if m.all == nil { - m.all = e - m.last = e - } else { - m.last.nextAll = e - m.last = e - } return e } diff --git a/godeltaprof/internal/pprof/stub_go23.go b/godeltaprof/internal/pprof/stub_go23.go index 37ccbe8..d587cad 100644 --- a/godeltaprof/internal/pprof/stub_go23.go +++ b/godeltaprof/internal/pprof/stub_go23.go @@ -23,5 +23,5 @@ func runtime_FrameSymbolName(f *runtime.Frame) string //go:linkname runtime_expandFinalInlineFrame runtime/pprof.runtime_expandFinalInlineFrame func runtime_expandFinalInlineFrame(stk []uintptr) []uintptr -//go:linkname runtime_cyclesPerSecond runtime.pprof_cyclesPerSecond +//go:linkname runtime_cyclesPerSecond runtime/pprof.runtime_cyclesPerSecond func runtime_cyclesPerSecond() int64