Skip to content

Commit

Permalink
mkbench: add write-throughput benchmark data parser
Browse files Browse the repository at this point in the history
To parse raw write-throughput benchmark data produced from the nightly
runs, add the `write` subcommand to the `mkbench` command.

Raw data is read from log files generated by the worker VMs
participating in a benchmark workload run (a workload is a particular
benchmark configuration, e.g. values of size 1024B). Raw data is of the
form:

```console
BenchmarkRawwrite/values=1024 30000 ops/sec true pass 1m0s elapsed 2413192409 bytes 4 levels
BenchmarkRawwrite/values=1024 30094 ops/sec true pass 2m1s elapsed 4384425494 bytes 4 levels
BenchmarkRawwrite/values=1024 30269 ops/sec true pass 3m1s elapsed 6248072011 bytes 4 levels
...
```

Data from each "raw run" for a given day is combined to produce a "run",
which can then be summarized by taking the average over all raw runs for
the day, similar to how the `ycsb` subcommand functions.

A top-level summary file is output that contains a mapping from
benchmark workload name to the daily, summarized average ops/sec
sustainable throughput figure.

Each daily figure reported in the top-level summary file also points to
a summary file for the daily run that contains the raw data from all
worker VMs running the benchmark. This allows the data visualization to
present a top-level figure, but also provide a "drill-down" into the
daily runs to observe how the ops/sec figure trended over the course of
the benchmark.
  • Loading branch information
nicktrav committed Nov 8, 2021
1 parent 32634c4 commit c0661ae
Show file tree
Hide file tree
Showing 14 changed files with 864 additions and 138 deletions.
94 changes: 2 additions & 92 deletions cmd/pebble/write_bench.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ package main
import (
"context"
"fmt"
"sort"
"sync"
"time"

Expand Down Expand Up @@ -391,12 +390,8 @@ func runWriteBenchmark(_ *cobra.Command, args []string) error {
y.reg.Tick(func(tick histogramTick) {
total = tick.Cumulative.TotalCount()
})
split := findOptimalSplit(pass, fail, writeBenchConfig.incBase/2 /* resolution */)
fmt.Println("___elapsed___ops(total)___split(ops/sec)")
fmt.Printf("%10s %12d %16d\n", elapsed.Truncate(time.Second), total, split)

// Print benchmark summary line.
fmt.Printf("BenchmarkSummary%s %d ops/sec\n", name, split)
fmt.Println("___elapsed___ops(total)")
fmt.Printf("%10s %12d\n", elapsed.Truncate(time.Second), total)
},
})

Expand Down Expand Up @@ -472,88 +467,3 @@ func (w *pauseWriter) unpause() {
func (w *pauseWriter) setRate(r float64) {
w.limiter.SetLimit(rate.Limit(r))
}

// findOptimalSplit computes and returns a value that separates the given pass
// and fail measurements optimally, such that the number of mis-classified
// passes (pass values that fall above the split) and fails (fail values that
// fall below the split) is minimized.
//
// The following gives a visual representation of the problem:
//
// Optimal partition (=550) -----> |
// Passes: o o o o o o oo |
// Fails: x x |x x x x x x
// |---------|---------|---------|---------|---------|----|----|---------|---------|---------|---> x
// 0 100 200 300 400 500 | 600 700 800 900
// |
//
// The algorithm works by computing the error (i.e. mis-classifications) at
// various points along the x-axis, starting from the origin and increasing by
// the given increment.
func findOptimalSplit(pass, fail []int, inc int) int {
// Not enough data to compute a sensible score.
if len(pass) == 0 || len(fail) == 0 {
return -1
}

// Maintain counters for the number of incorrectly classified passes and
// fails. All passes are initially incorrect, as we start at 0. Conversely,
// no fails are incorrectly classified, as all scores are >= 0.
pCount, fCount := len(pass), 0
p, f := make([]int, len(pass)), make([]int, len(fail))
copy(p, pass)
copy(f, fail)

// Sort the inputs.
sort.Slice(p, func(i, j int) bool {
return p[i] < p[j]
})
sort.Slice(f, func(i, j int) bool {
return f[i] < f[j]
})

// Find the global min and max.
min, max := p[0], f[len(fail)-1]

// Iterate over the range in increments.
var result [][]int
for x := min; x <= max; x = x + inc {
// Reduce the count of incorrect passes as x increases (i.e. fewer pass
// values are incorrect as x increases).
for len(p) > 0 && p[0] <= x {
pCount--
p = p[1:]
}

// Increase the count of incorrect fails as x increases (i.e. more fail
// values are incorrect as x increases).
for len(f) > 0 && f[0] < x {
fCount++
f = f[1:]
}

// Add a (x, score) tuple to result slice.
result = append(result, []int{x, pCount + fCount})
}

// Sort the (x, score) result slice by score ascending. Tie-break by x
// ascending.
sort.Slice(result, func(i, j int) bool {
if result[i][1] == result[j][1] {
return result[i][0] < result[j][0]
}
return result[i][1] < result[j][1]
})

// If there is more than one interval, split the difference between the min
// and the max.
splitMin, splitMax := result[0][0], result[0][0]
for i := 1; i < len(result); i++ {
if result[i][1] != result[0][1] {
break
}
splitMax = result[i][0]
}

return (splitMin + splitMax) / 2
}
12 changes: 5 additions & 7 deletions internal/mkbench/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,22 +36,20 @@ import (
var rootCmd = &cobra.Command{
Use: "mkbench",
Short: "pebble benchmark data tools",
// For backwards compatibility, parse YCSB data if no subcommand is
// specified.
// TODO(travers): Remove this after updating the call site in the
// nightly-pebble script in cockroach.
RunE: ycsbCmd.RunE,
}

func init() {
y := getYCSBCommand()
rootCmd.AddCommand(getYCSBCommand())
rootCmd.AddCommand(getWriteCommand())
rootCmd.SilenceUsage = true
rootCmd.AddCommand(ycsbCmd)

// For backwards compatability, the YCSB command is run, with the same
// flags, if a subcommand is not specified.
// TODO(travers): Remove this after updating the call site in the
// nightly-pebble script in cockroach.
initYCSBCmd(rootCmd)
*rootCmd.Flags() = *y.Flags()
rootCmd.RunE = y.RunE
}

func main() {
Expand Down
90 changes: 90 additions & 0 deletions internal/mkbench/split.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
package main

import "sort"

const increment = 50 // ops/sec

// findOptimalSplit computes and returns a value that separates the given pass
// and fail measurements optimally, such that the number of mis-classified
// passes (pass values that fall above the split) and fails (fail values that
// fall below the split) is minimized.
//
// The following gives a visual representation of the problem:
//
// Optimal partition (=550) -----> |
// Passes: o o o o o o oo |
// Fails: x x |x x x x x x
// |---------|---------|---------|---------|---------|----|----|---------|---------|---------|---> x
// 0 100 200 300 400 500 | 600 700 800 900
// |
//
// The algorithm works by computing the error (i.e. mis-classifications) at
// various points along the x-axis, starting from the origin and increasing by
// the given increment.
func findOptimalSplit(pass, fail []int) int {
// Not enough data to compute a sensible score.
if len(pass) == 0 || len(fail) == 0 {
return -1
}

// Maintain counters for the number of incorrectly classified passes and
// fails. All passes are initially incorrect, as we start at 0. Conversely,
// no fails are incorrectly classified, as all scores are >= 0.
pCount, fCount := len(pass), 0
p, f := make([]int, len(pass)), make([]int, len(fail))
copy(p, pass)
copy(f, fail)

// Sort the inputs.
sort.Slice(p, func(i, j int) bool {
return p[i] < p[j]
})
sort.Slice(f, func(i, j int) bool {
return f[i] < f[j]
})

// Find the global min and max.
min, max := p[0], f[len(fail)-1]

// Iterate over the range in increments.
var result [][]int
for x := min; x <= max; x = x + increment {
// Reduce the count of incorrect passes as x increases (i.e. fewer pass
// values are incorrect as x increases).
for len(p) > 0 && p[0] <= x {
pCount--
p = p[1:]
}

// Increase the count of incorrect fails as x increases (i.e. more fail
// values are incorrect as x increases).
for len(f) > 0 && f[0] < x {
fCount++
f = f[1:]
}

// Add a (x, score) tuple to result slice.
result = append(result, []int{x, pCount + fCount})
}

// Sort the (x, score) result slice by score ascending. Tie-break by x
// ascending.
sort.Slice(result, func(i, j int) bool {
if result[i][1] == result[j][1] {
return result[i][0] < result[j][0]
}
return result[i][1] < result[j][1]
})

// If there is more than one interval, split the difference between the min
// and the max.
splitMin, splitMax := result[0][0], result[0][0]
for i := 1; i < len(result); i++ {
if result[i][1] != result[0][1] {
break
}
splitMax = result[i][0]
}

return (splitMin + splitMax) / 2
}
15 changes: 4 additions & 11 deletions cmd/pebble/write_bench_test.go → internal/mkbench/split_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,42 +13,36 @@ import (
func TestFindOptimalSplit(t *testing.T) {
testCases := []struct {
passes, fails []int
interval int
want int
}{
{
// Not enough data.
passes: []int{},
fails: []int{},
interval: 50,
want: -1,
},
{
// Not enough data.
passes: []int{1, 2, 3},
fails: []int{},
interval: 50,
want: -1,
},
{
// Not enough data.
passes: []int{},
fails: []int{1, 2, 3},
interval: 50,
want: -1,
},
{
// Trivial example.
passes: []int{10},
fails: []int{20},
interval: 5,
want: 15,
passes: []int{100},
fails: []int{200},
want: 150,
},
{
// Example given in the doc comment for the function.
passes: []int{100, 210, 300, 380, 450, 470, 490, 510, 520},
fails: []int{310, 450, 560, 610, 640, 700, 720, 810},
interval: 50,
want: 550,
},
{
Expand All @@ -65,14 +59,13 @@ func TestFindOptimalSplit(t *testing.T) {
33200, 33200, 33200, 33200, 33100, 33300, 33100, 33100, 33000,
39200, 36100,
},
interval: 50,
want: 33100,
},
}

for _, tc := range testCases {
t.Run("", func(t *testing.T) {
split := findOptimalSplit(tc.passes, tc.fails, tc.interval)
split := findOptimalSplit(tc.passes, tc.fails)
require.Equal(t, tc.want, split)
})
}
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading

0 comments on commit c0661ae

Please sign in to comment.