From aacc4984f2f03e697e53348ba98aec1910807c34 Mon Sep 17 00:00:00 2001 From: hi-rustin Date: Wed, 12 Apr 2023 09:56:00 +0800 Subject: [PATCH 1/2] Use binary search in `getSeriesIndex` --- pkg/phlaredb/profiles.go | 18 ++++++++++++------ pkg/pprof/pprof.go | 6 +++--- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/pkg/phlaredb/profiles.go b/pkg/phlaredb/profiles.go index 35ea341af..22d9a8584 100644 --- a/pkg/phlaredb/profiles.go +++ b/pkg/phlaredb/profiles.go @@ -41,14 +41,20 @@ type rowRangeWithSeriesIndex struct { type rowRangesWithSeriesIndex []rowRangeWithSeriesIndex func (s rowRangesWithSeriesIndex) getSeriesIndex(rowNum int64) uint32 { - // todo: binary search - for _, rg := range s { - // it is possible that the series is not existing - if rg.rowRange == nil { + l, r := 0, len(s)-1 + for l <= r { + mid := (l + r) / 2 + if s[mid].rowRange == nil { + l = mid + 1 continue } - if rg.rowNum <= rowNum && rg.rowNum+int64(rg.length) > rowNum { - return rg.seriesIndex + if s[mid].rowNum <= rowNum && s[mid].rowNum+int64(s[mid].length) > rowNum { + return s[mid].seriesIndex + } + if s[mid].rowNum > rowNum { + r = mid - 1 + } else { + l = mid + 1 } } panic("series index not found") diff --git a/pkg/pprof/pprof.go b/pkg/pprof/pprof.go index 8a9f359ab..5e4deb4d2 100644 --- a/pkg/pprof/pprof.go +++ b/pkg/pprof/pprof.go @@ -182,9 +182,9 @@ func FromProfile(p *profile.Profile) (*profilev1.Profile, error) { r.Mapping = append(r.Mapping, &profilev1.Mapping{ Id: m.ID, Filename: addString(strings, m.File), - MemoryStart: (m.Start), - MemoryLimit: (m.Limit), - FileOffset: (m.Offset), + MemoryStart: m.Start, + MemoryLimit: m.Limit, + FileOffset: m.Offset, BuildId: addString(strings, m.BuildID), HasFunctions: m.HasFunctions, HasFilenames: m.HasFilenames, From 00d84e837ddd97ffa658bd8fea3d0ea586cc1f03 Mon Sep 17 00:00:00 2001 From: Christian Simon Date: Thu, 13 Apr 2023 14:45:07 +0100 Subject: [PATCH 2/2] Add a test case, which will trigger the nil row range case --- pkg/phlaredb/profile_store_test.go | 37 ++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/pkg/phlaredb/profile_store_test.go b/pkg/phlaredb/profile_store_test.go index 7eaa885c2..f5e88683a 100644 --- a/pkg/phlaredb/profile_store_test.go +++ b/pkg/phlaredb/profile_store_test.go @@ -79,6 +79,36 @@ func sameProfileStream(i int) *testProfile { return tp } +// This will simulate a profile stream which ends and a new one starts at i > boundary +func profileStreamEndingAndStarting(boundary int) func(int) *testProfile { + return func(i int) *testProfile { + tp := &testProfile{} + + series := "at-beginning" + if i > boundary { + series = "at-end" + } + + tp.profileName = "process_cpu:cpu:nanoseconds:cpu:nanoseconds" + tp.lbls = phlaremodel.LabelsFromStrings( + phlaremodel.LabelNameProfileType, tp.profileName, + "job", "test", + "stream", series, + ) + + tp.p.ID = uuid.MustParse(fmt.Sprintf("00000000-0000-0000-0000-%012d", i)) + tp.p.TimeNanos = time.Second.Nanoseconds() * int64(i) + tp.p.Samples = []*schemav1.Sample{ + { + StacktraceID: 0x1, + Value: 10.0, + }, + } + tp.populateFingerprint() + return tp + } +} + func readFullParquetFile[M any](t *testing.T, path string) ([]M, uint64) { f, err := os.Open(path) require.NoError(t, err) @@ -131,6 +161,13 @@ func TestProfileStore_RowGroupSplitting(t *testing.T) { expectedNumRows: 100, values: sameProfileStream, }, + { + name: "a stream ending after half of the samples and a new one starting", + cfg: &ParquetConfig{MaxRowGroupBytes: 1828, MaxBufferRowCount: 100000}, + expectedNumRGs: 10, + expectedNumRows: 100, + values: profileStreamEndingAndStarting(50), + }, { name: "multiple row groups because of maximum row num", cfg: &ParquetConfig{MaxRowGroupBytes: 128000, MaxBufferRowCount: 10},