Skip to content

Commit

Permalink
Add a format for object-size providing a wieighted random based on co…
Browse files Browse the repository at this point in the history
…nfigured buckets (#342)

Allows a comma-separated set of colon-separated pairs, describing buckets
and their respective weights. This format triggers an option that
performs a weighted random number generation when a new object is
created.

E.g.: `4096:10740,8192:1685,16384:1623` will trigger objects whose size
will be chosen between 0 and 4096 with a weight of 10740, between 4096
and 8192 with a weight of 1685, or between 8192 and 16384 with a weight
of 1623.
  • Loading branch information
jfsmig authored Oct 23, 2024
1 parent 94f1951 commit 316d79c
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 23 deletions.
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ By default warp uploads random data.

### Object Size

#### Fixed File Size

Most benchmarks use the `--obj.size` parameter to decide the size of objects to upload.

Different benchmark types will have different default values.
Expand Down Expand Up @@ -241,6 +243,20 @@ The average object size will be close to `--obj.size` multiplied by 0.179151.

To get a value for `--obj.size` multiply the desired average object size by 5.582 to get a maximum value.

#### Bucketed File Size

The `--obj.size` parameter accepts a string value whose format can describe size buckets.
The usage of that format activates the options of bucketed file sizes and preempts a possible activation
of random files sizes via `--obj.randsize`.

The format of the string is a coma-separated of colon-separated pairs, describing buckets and their respective weights.
Within each bucket, the size distribution is uniform.

E.g.: the value `4096:10740,8192:1685,16384:1623` will trigger objects whose size will be chosen
between 0 and 4096 with a weight of 10740, between 4096 and 8192 with a weight of 1685,
or between 8192 and 16384 with a weight of 1623.


## Automatic Termination
Adding `--autoterm` parameter will enable automatic termination when results are considered stable.
To detect a stable setup, warp continuously downsample the current data to
Expand Down
50 changes: 31 additions & 19 deletions cli/generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ import (

"github.com/minio/cli"
"github.com/minio/warp/pkg/generator"

hist "github.com/jfsmig/prng/histogram"
)

var genFlags = []cli.Flag{
Expand Down Expand Up @@ -83,28 +85,38 @@ func newGenSource(ctx *cli.Context, sizeField string) func() generator.Source {
generator.WithCustomPrefix(ctx.String("prefix")),
generator.WithPrefixSize(prefixSize),
}
tokens := strings.Split(ctx.String(sizeField), ",")
switch len(tokens) {
case 1:
size, err := toSize(tokens[0])
if err != nil {
fatalIf(probe.NewError(err), "Invalid obj.size specified")
}
opts = append(opts, generator.WithSize(int64(size)))
case 2:
minSize, err := toSize(tokens[0])
if err != nil {
fatalIf(probe.NewError(err), "Invalid min obj.size specified")
if strings.IndexRune(ctx.String(sizeField), ':') > 0 {
if _, err := hist.ParseCSV(ctx.String(sizeField)); err != nil {
fatalIf(probe.NewError(err), "Invalid histogram format for the size parameter")
} else {
opts = append(opts, generator.WithSizeHistograms(ctx.String(sizeField)))
}
maxSize, err := toSize(tokens[1])
if err != nil {
fatalIf(probe.NewError(err), "Invalid max obj.size specified")
} else {
tokens := strings.Split(ctx.String(sizeField), ",")
switch len(tokens) {
case 1:
size, err := toSize(tokens[0])
if err != nil {
fatalIf(probe.NewError(err), "Invalid obj.size specified")
}
opts = append(opts, generator.WithSize(int64(size)))
case 2:
minSize, err := toSize(tokens[0])
if err != nil {
fatalIf(probe.NewError(err), "Invalid min obj.size specified")
}
maxSize, err := toSize(tokens[1])
if err != nil {
fatalIf(probe.NewError(err), "Invalid max obj.size specified")
}
opts = append(opts, generator.WithMinMaxSize(int64(minSize), int64(maxSize)))
default:
fatalIf(probe.NewError(fmt.Errorf("unexpected obj.size specified: %s", ctx.String(sizeField))), "Invalid obj.size parameter")
}
opts = append(opts, generator.WithMinMaxSize(int64(minSize), int64(maxSize)))
default:
fatalIf(probe.NewError(fmt.Errorf("unexpected obj.size specified: %s", ctx.String(sizeField))), "Invalid obj.size parameter")

opts = append([]generator.Option{g.Apply()}, append(opts, generator.WithRandomSize(ctx.Bool("obj.randsize")))...)
}
opts = append([]generator.Option{g.Apply()}, append(opts, generator.WithRandomSize(ctx.Bool("obj.randsize")))...)

src, err := generator.NewFn(opts...)
fatalIf(probe.NewError(err), "Unable to create data generator")
return src
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ require (
github.com/dustin/go-humanize v1.0.1
github.com/fatih/color v1.17.0
github.com/influxdata/influxdb-client-go/v2 v2.13.0
github.com/jfsmig/prng v0.0.2
github.com/klauspost/compress v1.17.9
github.com/minio/cli v1.24.2
github.com/minio/madmin-go/v3 v3.0.51
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ github.com/influxdata/influxdb-client-go/v2 v2.13.0 h1:ioBbLmR5NMbAjP4UVA5r9b5xG
github.com/influxdata/influxdb-client-go/v2 v2.13.0/go.mod h1:k+spCbt9hcvqvUiz0sr5D8LolXHqAAOfPw9v/RIRHl4=
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf h1:7JTmneyiNEwVBOHSjoMxiWAqB992atOeepeFYegn5RU=
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo=
github.com/jfsmig/prng v0.0.2 h1:aZun+YgmBnUyhqvI+EDjwmOYc1kCPsihdEr9V/1YlGA=
github.com/jfsmig/prng v0.0.2/go.mod h1:bz1fX1aizp8/Lu1thLzfirh5uExjC1lVwB8SSt6ExpE=
github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE=
github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
Expand Down
29 changes: 25 additions & 4 deletions pkg/generator/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ package generator
import (
"errors"
"math/rand"

hist "github.com/jfsmig/prng/histogram"
)

// Options provides options.
Expand All @@ -33,6 +35,10 @@ type Options struct {
totalSize int64
randomPrefix int
randSize bool

// Activates the use of a distribution of sizes
flagSizesDistribution bool
sizesDistribution hist.Int64Distribution
}

// OptionApplier allows to abstract generator options.
Expand All @@ -42,6 +48,9 @@ type OptionApplier interface {

// getSize will return a size for an object.
func (o Options) getSize(rng *rand.Rand) int64 {
if o.flagSizesDistribution {
return o.sizesDistribution.Poll(rng)
}
if !o.randSize {
return o.totalSize
}
Expand All @@ -59,20 +68,32 @@ func defaultOptions() Options {
return o
}

func WithSizeHistograms(encoded string) Option {
return func(o *Options) error {
var err error
o.sizesDistribution, err = hist.ParseCSV(encoded)
if err != nil {
return err
}
o.flagSizesDistribution = true
return nil
}
}

// WithMinMaxSize sets the min and max size of the generated data.
func WithMinMaxSize(min, max int64) Option {
return func(o *Options) error {
if min <= 0 {
return errors.New("WithSize: minSize must be >= 0")
return errors.New("WithMinMaxSize: minSize must be >= 0")
}
if max < 0 {
return errors.New("WithSize: maxSize must be > 0")
return errors.New("WithMinMaxSize: maxSize must be > 0")
}
if min > max {
return errors.New("WithSize: minSize must be < maxSize")
return errors.New("WithMinMaxSize: minSize must be < maxSize")
}
if o.randSize && max < 256 {
return errors.New("WithSize: random sized objects should be at least 256 bytes")
return errors.New("WithMinMaxSize: random sized objects should be at least 256 bytes")
}

o.totalSize = max
Expand Down

0 comments on commit 316d79c

Please sign in to comment.