Skip to content

Commit

Permalink
feat: Add key cardinality testing feature (#67)
Browse files Browse the repository at this point in the history
## Which problem is this PR solving?

- We've seen issues where the key cardinality of things being fed to a
sampler is longer than the sampler AdjustmentInterval. This gives
loadgen the ability to generate fields that will only include all
possible values once the generator's sample period has completed.

So `mykey=/k50,60` will generate a field called mykey that will have
values of cardinality 50, but not all possible values will be used until
60 seconds have passed.

Includes testing and benchmark code, as well as a readme update.
  • Loading branch information
kentquirk authored Oct 22, 2024
1 parent 8b0edb1 commit c2ce691
Show file tree
Hide file tree
Showing 4 changed files with 180 additions and 4 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ more than one.
| sw | pronounceable words, rectangular distribution | cardinality (16)||
| sq | pronounceable words, quadratic distribution | cardinality (16) ||
| sx | hexadecimal string | length in chars (16)||
| k | key fields used for testing intermittent key cardinality | cardinality (50) | period (60) |
| u | url-like (2 parts) | cardinality of 1st part (3) | cardinality of 2nd part (10) |
| uq | url with random query | cardinality of 1st part (3) | cardinality of 2nd part (10) |
| st | status code | percentage of 400s | percentage of 500s |
Expand All @@ -161,6 +162,7 @@ where `0` means the root span.
* 1.name=/sq9 -- name is words with cardinality 9, only on spans that are direct children of the root span
* url=/u10,10 -- simulate URLs for 10 services, each of which has 10 endpoints
* status=/st10,0.1 -- generate status codes where 10% are 400s and .1% are 500s
* samplekey=/k50,60 -- generate sample keys with cardinality 50 but not all keys will occur before 60s

## Motivation

Expand Down
113 changes: 110 additions & 3 deletions fielder.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"regexp"
"strconv"
"strings"
"time"

"github.com/dgryski/go-wyhash"
"go.opentelemetry.io/otel/attribute"
Expand Down Expand Up @@ -55,7 +56,8 @@ var nouns = []string{
var constfield = regexp.MustCompile(`^([^/].*)$`)

// genfield is used to parse generator fields by matching valid commands and numeric arguments
var genfield = regexp.MustCompile(`^/([ibfsu][awxrgqt]?)([0-9.-]+)?(,[0-9.-]+)?$`)
// the second parameter, if it exists, includes the comma
var genfield = regexp.MustCompile(`^/([ibfsuk][awxrgqt]?)([0-9.-]+)?(,[0-9.-]+)?$`)

// keysplitter separates fields that look like number.name (ex: 1.myfield)
var keysplitter = regexp.MustCompile(`^([0-9]+)\.(.*$)`)
Expand All @@ -64,8 +66,8 @@ type Rng struct {
rng *rand.Rand
}

func NewRng(s string) Rng {
return Rng{rand.New(rand.NewSource(int64(wyhash.Hash([]byte(s), 2467825690))))}
func NewRng(seed string) Rng {
return Rng{rand.New(rand.NewSource(int64(wyhash.Hash([]byte(seed), 2467825690))))}
}

func (r Rng) Intn(n int) int64 {
Expand All @@ -74,6 +76,9 @@ func (r Rng) Intn(n int) int64 {

// Chooses a random element from a slice of strings.
func (r Rng) Choice(a []string) string {
if len(a) == 0 {
return ""
}
return a[r.Intn(len(a))]
}

Expand Down Expand Up @@ -169,6 +174,76 @@ func getWordList(rng Rng, cardinality int, source []string) []string {
return words
}

type EligibilityPeriod struct {
word string
start time.Duration
end time.Duration
}

type PeriodicEligibility struct {
rng Rng
periods []EligibilityPeriod
period time.Duration
}

// generates a list of eligibility periods for a set of words
// each word is eligible for some period of time that is proportional to its position in the list
// this is so that all the words are not available at the same time, but eventually all of them are
func newPeriodicEligibility(rng Rng, words []string, period time.Duration) *PeriodicEligibility {
cardinality := len(words)
periods := make([]EligibilityPeriod, cardinality)
for i := 0; i < cardinality; i++ {
// calculate a period length that is proportional to the number of remaining words
periodLength := time.Duration(float64(period) * float64(cardinality-i) / float64(cardinality))
// startTime is a random value that ensures it will end before the next period starts
startTime := (period - periodLength) * time.Duration(rng.Float(0, 1))
periods[i] = EligibilityPeriod{
word: words[i],
start: startTime,
end: startTime + periodLength,
}
}
return &PeriodicEligibility{
rng: rng,
periods: periods,
period: period,
}
}

// gets one word from the list of eligible words based on the time since the start of the period
// This is, on average, slower than the random selection, but the random one can sometimes
// be very slow, so we use this as a fallback if we try randomly a few times and fail.
func (pe *PeriodicEligibility) getEligibleWordFallback(durationSinceStart time.Duration) string {
tInPeriod := durationSinceStart % pe.period
eligibleIndexes := make([]int, 0, 20)
for i, period := range pe.periods {
if period.start <= tInPeriod && tInPeriod < period.end {
eligibleIndexes = append(eligibleIndexes, i)
}
}

if len(eligibleIndexes) == 0 {
// shouldn't happen, but if it does, just pick the first word
return pe.periods[0].word
}
ix := eligibleIndexes[pe.rng.Intn(len(eligibleIndexes))]
return pe.periods[ix].word
}

func (pe *PeriodicEligibility) getEligibleWord(durationSinceStart time.Duration) string {
tInPeriod := durationSinceStart % pe.period
// try 10 times to find an eligible word
for i := 0; i < 5; i++ {
ix := pe.rng.Intn(len(pe.periods))
period := pe.periods[ix]
if period.start <= tInPeriod && tInPeriod < period.end {
return period.word
}
}
// use the fallback
return pe.getEligibleWordFallback(durationSinceStart)
}

// parseUserFields expects a list of fields in the form of name=constant or name=/gen.
// See README.md for more information.
func parseUserFields(rng Rng, userfields map[string]string) (map[string]func() any, error) {
Expand Down Expand Up @@ -233,6 +308,11 @@ func parseUserFields(rng Rng, userfields map[string]string) (map[string]func() a
default:
fields[name] = func() any { return rng.String(n) }
}
case "k":
fields[name], err = getKeyGen(rng, p1, p2)
if err != nil {
return nil, fmt.Errorf("invalid key in key field %s=%s: %w", name, value, err)
}
case "u", "uq":
// Generate a URL-like string with a random path and possibly a query string
fields[name], err = getURLGen(rng, gentype, p1, p2)
Expand Down Expand Up @@ -399,6 +479,33 @@ func getURLGen(rng Rng, gentype, p1, p2 string) (func() any, error) {
}
}

func getKeyGen(rng Rng, p1, p2 string) (func() any, error) {
var cardinality, period int
var err error
if p1 == "" {
cardinality = 50
} else {
cardinality, err = strconv.Atoi(p1)
if err != nil {
return nil, fmt.Errorf("%s is not an int", p1)
}
if cardinality > len(nouns) {
return nil, fmt.Errorf("cardinality %d cannot be more than %d", cardinality, len(nouns))
}
}
if p2 == "" || p2 == "," {
period = 60
} else {
period, err = strconv.Atoi(p2[1:])
if err != nil {
return nil, fmt.Errorf("%s is not an int", p2[:1])
}
}
ep := newPeriodicEligibility(rng, nouns[:cardinality], time.Duration(period)*time.Second)
startTime := time.Now()
return func() any { return ep.getEligibleWord(time.Since(startTime)) }, nil
}

type Fielder struct {
fields map[string]func() any
names []string
Expand Down
66 changes: 66 additions & 0 deletions fielder_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package main

import (
"fmt"
"strconv"
"strings"
"testing"
"time"
)

func Test_PeriodicEligibility_checkEligible(t *testing.T) {
words := strings.Split("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", "")
notFound := map[string]struct{}{}
for _, w := range words {
notFound[w] = struct{}{}
}

pe := newPeriodicEligibility(NewRng("hello"), words, 60*time.Second)
t.Run("only some words show up for short period", func(t *testing.T) {
for i := 0; i < 100; i++ {
for p := 0; p < 30; p++ {
word := pe.getEligibleWord(time.Duration(p) * time.Second)
delete(notFound, word)
}
if len(notFound) == 0 {
break
}
}
if len(notFound) != 0 {
t.Errorf("expected some words to be not found, got none")
}
})

t.Run("all eligible words show up with full period", func(t *testing.T) {
for i := 0; i < 100; i++ {
for p := 0; p < 60; p++ {
word := pe.getEligibleWord(time.Duration(p) * time.Second)
delete(notFound, word)
}
if len(notFound) == 0 {
break
}
}
if len(notFound) > 0 {
t.Errorf("expected all words to be found, got %v", notFound)
}
})
}

func BenchmarkPeriodicEligibility(b *testing.B) {
for _, card := range []int{10, 50, 200} {
var words []string
for i := 0; i < card; i++ {
words = append(words, strconv.Itoa(i))
}
period := 61 * time.Second
pe := newPeriodicEligibility(NewRng("hello"), words, period)
for p := 0; p < 61; p += 10 {
b.Run(fmt.Sprintf("card_%02d_p_%02d", card, p), func(b *testing.B) {
for i := 0; i < b.N; i++ {
pe.getEligibleWord(time.Duration(p) * time.Second)
}
})
}
}
}
3 changes: 2 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ func main() {
You can specify fields to be added to each span. Each field should be specified as
FIELD=VALUE. The value can be a constant (and will be sent as the appropriate type),
or a generator function starting with /.
Allowed generators are /i, /ir, /ig, /f, /fr, /fg, /s, /sx, /sw, /b, optionally
Allowed generators are /i, /ir, /ig, /f, /fr, /fg, /s, /sx, /sw, /b, /k, optionally
followed by a single number or a comma-separated pair of numbers.
Example generators:
- /s -- alphanumeric string of length 16
Expand All @@ -172,6 +172,7 @@ func main() {
- /u -- https url-like, no query string, two path segments; default cardinality is 10/10 but can be changed like /u3,20
- /uq -- as /u above, but with query string containing a random key word with a completely random value
- /st -- an http status code by default reflecting 95% 200s, 4% 400s, 1% 500s. 400s and 500s can be changed like /st10,0.1.
- /k50,60 -- an intermittent key field with total cardinality 50, but decreasing key frequency. All keys only arrive after 60 seconds
Field names can be alphanumeric with underscores. If a field name is prefixed with
a number and a dot (e.g. 1.foo=bar) the field will only be injected into spans at
Expand Down

0 comments on commit c2ce691

Please sign in to comment.