Skip to content
This repository has been archived by the owner on Jul 19, 2023. It is now read-only.

Commit

Permalink
Use binary search in getSeriesIndex (#621)
Browse files Browse the repository at this point in the history
* Use binary search in `getSeriesIndex`

* Add a test case, which will trigger the nil row range case

---------

Co-authored-by: Christian Simon <simon@swine.de>
  • Loading branch information
Rustin170506 and simonswine authored Apr 13, 2023
1 parent 4a5ce82 commit b1a77df
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 9 deletions.
37 changes: 37 additions & 0 deletions pkg/phlaredb/profile_store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,36 @@ func sameProfileStream(i int) *testProfile {
return tp
}

// This will simulate a profile stream which ends and a new one starts at i > boundary
func profileStreamEndingAndStarting(boundary int) func(int) *testProfile {
return func(i int) *testProfile {
tp := &testProfile{}

series := "at-beginning"
if i > boundary {
series = "at-end"
}

tp.profileName = "process_cpu:cpu:nanoseconds:cpu:nanoseconds"
tp.lbls = phlaremodel.LabelsFromStrings(
phlaremodel.LabelNameProfileType, tp.profileName,
"job", "test",
"stream", series,
)

tp.p.ID = uuid.MustParse(fmt.Sprintf("00000000-0000-0000-0000-%012d", i))
tp.p.TimeNanos = time.Second.Nanoseconds() * int64(i)
tp.p.Samples = []*schemav1.Sample{
{
StacktraceID: 0x1,
Value: 10.0,
},
}
tp.populateFingerprint()
return tp
}
}

func readFullParquetFile[M any](t *testing.T, path string) ([]M, uint64) {
f, err := os.Open(path)
require.NoError(t, err)
Expand Down Expand Up @@ -131,6 +161,13 @@ func TestProfileStore_RowGroupSplitting(t *testing.T) {
expectedNumRows: 100,
values: sameProfileStream,
},
{
name: "a stream ending after half of the samples and a new one starting",
cfg: &ParquetConfig{MaxRowGroupBytes: 1828, MaxBufferRowCount: 100000},
expectedNumRGs: 10,
expectedNumRows: 100,
values: profileStreamEndingAndStarting(50),
},
{
name: "multiple row groups because of maximum row num",
cfg: &ParquetConfig{MaxRowGroupBytes: 128000, MaxBufferRowCount: 10},
Expand Down
18 changes: 12 additions & 6 deletions pkg/phlaredb/profiles.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,20 @@ type rowRangeWithSeriesIndex struct {
type rowRangesWithSeriesIndex []rowRangeWithSeriesIndex

func (s rowRangesWithSeriesIndex) getSeriesIndex(rowNum int64) uint32 {
// todo: binary search
for _, rg := range s {
// it is possible that the series is not existing
if rg.rowRange == nil {
l, r := 0, len(s)-1
for l <= r {
mid := (l + r) / 2
if s[mid].rowRange == nil {
l = mid + 1
continue
}
if rg.rowNum <= rowNum && rg.rowNum+int64(rg.length) > rowNum {
return rg.seriesIndex
if s[mid].rowNum <= rowNum && s[mid].rowNum+int64(s[mid].length) > rowNum {
return s[mid].seriesIndex
}
if s[mid].rowNum > rowNum {
r = mid - 1
} else {
l = mid + 1
}
}
panic("series index not found")
Expand Down
6 changes: 3 additions & 3 deletions pkg/pprof/pprof.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,9 @@ func FromProfile(p *profile.Profile) (*profilev1.Profile, error) {
r.Mapping = append(r.Mapping, &profilev1.Mapping{
Id: m.ID,
Filename: addString(strings, m.File),
MemoryStart: (m.Start),
MemoryLimit: (m.Limit),
FileOffset: (m.Offset),
MemoryStart: m.Start,
MemoryLimit: m.Limit,
FileOffset: m.Offset,
BuildId: addString(strings, m.BuildID),
HasFunctions: m.HasFunctions,
HasFilenames: m.HasFilenames,
Expand Down

0 comments on commit b1a77df

Please sign in to comment.