Skip to content

Commit

Permalink
mkbench: minor improvements / refactoring / cleanup
Browse files Browse the repository at this point in the history
Ahead of a much larger change that is intended to add a parser for
write-throughput benchmark data, split out some smaller tweaks into a
separate patch to reduce noise in the follow up patch.

Use cobra for flags - this allows for using `mkbench` in a
subcommand-style  when there are multiple benchmark parsers (soon to be
two). In the absence of a subcommand, run the YCSB benchmark parser. As
a sanity check, running the following command (with no args / flags)
does not result in a diff to the file:

```bash
pushd ./internal/mkbench/testdata
  # Without flags.
  go run ../

  # The same command with flags.
  go run ../ --dir --in data.js --out data.js
popd
```

Split out test helpers and utility methods into separate files.

Improve the diff output in the `mkbench` tests, making use of the
`difflib` library that is used elsewhere.

Prefix various functions with `ycsb` to avoid name collisions with
similar functions that will be added in a subsequent patch.

Filter YCSB benchmark data in the filesystem walker - this avoids having
to read and iterate through non-YCSB data.

Add copyright notices to files in which they were previously absent.

Append newlines to generated files - a trivial cosmetic change.
  • Loading branch information
nicktrav committed Nov 4, 2021
1 parent 1c17b69 commit 32634c4
Show file tree
Hide file tree
Showing 6 changed files with 234 additions and 143 deletions.
63 changes: 51 additions & 12 deletions internal/mkbench/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,61 @@
// of this source code is governed by a BSD-style license that can be found in
// the LICENSE file.

// mkbench is a utility for processing the raw nightly benchmark data in JSON
// data that can be visualized by docs/js/app.js. The raw data is expected to
// be stored in dated directories underneath the "data/" directory:
//
// data/YYYYMMDD/.../<file>
//
// The files are expected to be bzip2 compressed. Within each file mkbench
// looks for Go-bench-style lines of the form:
//
// Benchmark<name> %d %f ops/sec %d read %d write %f r-amp %f w-amp
//
// The output is written to "data.js". In order to avoid reading all of the raw
// data to regenerate "data.js" on every run, mkbench first reads "data.js",
// noting which days have already been processed and exluding files in those
// directories from being read. This has the additional effect of merging the
// existing "data.js" with new raw data, which avoids needing to have all of
// the raw data present to construct a new "data.js" (only the new raw data is
// necessary).
//
// The nightly Pebble benchmarks are orchestrated from the CockroachDB
// repo:
//
// https://github.com/cockroachdb/cockroach/blob/master/build/teamcity-nightly-pebble.sh
package main

import "flag"
import (
"os"

const (
defaultDir = "data"
defaultCookedFile = "data.js"
"github.com/spf13/cobra"
)

func main() {
var dir, in, out string
flag.StringVar(&dir, "dir", defaultDir, "path to data directory")
flag.StringVar(&in, "in", defaultCookedFile, "path to (possibly non-empty) input cooked data file")
flag.StringVar(&out, "out", defaultCookedFile, "path to output data file")
flag.Parse()
var rootCmd = &cobra.Command{
Use: "mkbench",
Short: "pebble benchmark data tools",
// For backwards compatibility, parse YCSB data if no subcommand is
// specified.
// TODO(travers): Remove this after updating the call site in the
// nightly-pebble script in cockroach.
RunE: ycsbCmd.RunE,
}

func init() {
rootCmd.SilenceUsage = true
rootCmd.AddCommand(ycsbCmd)

// Parse the YCSB benchmark data.
ParseYCSB(dir, in, out)
// For backwards compatability, the YCSB command is run, with the same
// flags, if a subcommand is not specified.
// TODO(travers): Remove this after updating the call site in the
// nightly-pebble script in cockroach.
initYCSBCmd(rootCmd)
}

func main() {
if err := rootCmd.Execute(); err != nil {
// Cobra has already printed the error message.
os.Exit(1)
}
}
2 changes: 1 addition & 1 deletion internal/mkbench/testdata/data.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ data = {
"ycsb/E/values=64": "20211027,218106.4,19431395098,20670111161,3.1,25.6\n20211028,229320.5,20839571074,22160767810,3.1,26.1\n",
"ycsb/F/values=1024": "20211027,26429.2,168575162808,202512196433,0.0,11.8\n20211028,26533.0,168204615605,202274816199,0.0,11.7\n",
"ycsb/F/values=64": "20211027,237870.0,160873832374,190711184710,0.0,10.8\n20211028,236735.5,160863320765,190579811705,0.0,10.9\n"
};
};
82 changes: 82 additions & 0 deletions internal/mkbench/testutil.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use
// of this source code is governed by a BSD-style license that can be found in
// the LICENSE file.

package main

import (
"bytes"
"io"
"io/ioutil"
"os"
"path/filepath"

"github.com/cockroachdb/errors"
"github.com/cockroachdb/errors/oserror"
"github.com/pmezard/go-difflib/difflib"
)

// filesEqual returns the diff between contents of a and b.
func filesEqual(a, b string) error {
aBytes, err := ioutil.ReadFile(a)
if err != nil {
return err
}
bBytes, err := ioutil.ReadFile(b)
if err != nil {
return err
}

// Normalize newlines.
aBytes = bytes.Replace(aBytes, []byte{13, 10} /* \r\n */, []byte{10} /* \n */, -1)
bBytes = bytes.Replace(bBytes, []byte{13, 10}, []byte{10}, -1)

d, _ := difflib.GetUnifiedDiffString(difflib.UnifiedDiff{
A: difflib.SplitLines(string(aBytes)),
B: difflib.SplitLines(string(bBytes)),
})
if d != "" {
return errors.Errorf("a != b\ndiff = %s", d)
}

return nil
}

// copyDir recursively copies the fromPath to toPath, excluding certain paths.
func copyDir(fromPath, toPath string) error {
return filepath.Walk(fromPath, func(path string, info os.FileInfo, e error) error {
if e != nil {
return e
}

rel, err := filepath.Rel(fromPath, path)
if err != nil {
return err
}

// Preserve the directory structure.
if info.IsDir() {
err := os.Mkdir(filepath.Join(toPath, rel), 0700)
if err != nil && !oserror.IsNotExist(err) {
return err
}
return nil
}

// Copy files.
fIn, err := os.Open(path)
if err != nil {
return err
}
defer func() { _ = fIn.Close() }()

fOut, err := os.OpenFile(filepath.Join(toPath, rel), os.O_CREATE|os.O_WRONLY, 0700)
if err != nil {
return err
}
defer func() { _ = fOut.Close() }()

_, err = io.Copy(fOut, fIn)
return err
})
}
18 changes: 18 additions & 0 deletions internal/mkbench/util.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use
// of this source code is governed by a BSD-style license that can be found in
// the LICENSE file.

package main

import (
"encoding/json"
"log"
)

func prettyJSON(v interface{}) []byte {
data, err := json.MarshalIndent(v, "", "\t")
if err != nil {
log.Fatal(err)
}
return data
}
Loading

0 comments on commit 32634c4

Please sign in to comment.