Skip to content

Commit

Permalink
planner: a better way to round scale factor when collecting TopN stats (
Browse files Browse the repository at this point in the history
  • Loading branch information
ti-chi-bot authored Dec 28, 2023
1 parent 767ec62 commit cd073ea
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 3 deletions.
4 changes: 1 addition & 3 deletions statistics/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -419,10 +419,8 @@ func BuildHistAndTopN(
}
}

for i := 0; i < len(topNList); i++ {
topNList[i].Count *= uint64(sampleFactor)
}
topn := &TopN{TopN: topNList}
topn.Scale(sampleFactor)

if uint64(count) <= topn.TotalCount() || int(hg.NDV) <= len(topn.TopN) {
// TopN includes all sample data
Expand Down
7 changes: 7 additions & 0 deletions statistics/cmsketch.go
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,13 @@ type TopN struct {
TopN []TopNMeta
}

// Scale scales the TopN by the given factor.
func (c *TopN) Scale(scaleFactor float64) {
for i := range c.TopN {
c.TopN[i].Count = uint64(float64(c.TopN[i].Count) * scaleFactor)
}
}

// AppendTopN appends a topn into the TopN struct.
func (c *TopN) AppendTopN(data []byte, count uint64) {
if c == nil {
Expand Down
20 changes: 20 additions & 0 deletions statistics/cmsketch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -390,3 +390,23 @@ func TestMergePartTopN2GlobalTopNWithHists(t *testing.T) {
require.Equal(t, uint64(55), globalTopN.TotalCount(), "should have 55")
require.Len(t, leftTopN, 1, "should have 1 left topN")
}

func TestTopNScale(t *testing.T) {
for _, scaleFactor := range []float64{0.9999, 1.00001, 1.9999, 4.9999, 5.001, 9.99} {
var data []TopNMeta
sumCount := uint64(0)
for i := 0; i < 20; i++ {
cnt := uint64(rand.Intn(100000))
data = append(data, TopNMeta{
Count: cnt,
})
sumCount += cnt
}
topN := TopN{TopN: data}
topN.Scale(scaleFactor)
scaleCount := float64(sumCount) * scaleFactor
delta := math.Abs(float64(topN.TotalCount()) - scaleCount)
roundErrorRatio := delta / scaleCount
require.Less(t, roundErrorRatio, 0.0001)
}
}

0 comments on commit cd073ea

Please sign in to comment.