diff --git a/README.md b/README.md index 4c1a3e2..57457eb 100644 --- a/README.md +++ b/README.md @@ -143,6 +143,7 @@ more than one. | sw | pronounceable words, rectangular distribution | cardinality (16)|| | sq | pronounceable words, quadratic distribution | cardinality (16) || | sx | hexadecimal string | length in chars (16)|| +| k | key fields used for testing intermittent key cardinality | cardinality (50) | period (60) | | u | url-like (2 parts) | cardinality of 1st part (3) | cardinality of 2nd part (10) | | uq | url with random query | cardinality of 1st part (3) | cardinality of 2nd part (10) | | st | status code | percentage of 400s | percentage of 500s | @@ -161,6 +162,7 @@ where `0` means the root span. * 1.name=/sq9 -- name is words with cardinality 9, only on spans that are direct children of the root span * url=/u10,10 -- simulate URLs for 10 services, each of which has 10 endpoints * status=/st10,0.1 -- generate status codes where 10% are 400s and .1% are 500s + * samplekey=/k50,60 -- generate sample keys with cardinality 50 but not all keys will occur before 60s ## Motivation diff --git a/fielder.go b/fielder.go index f745830..2ad6c8e 100644 --- a/fielder.go +++ b/fielder.go @@ -8,6 +8,7 @@ import ( "regexp" "strconv" "strings" + "time" "github.com/dgryski/go-wyhash" "go.opentelemetry.io/otel/attribute" @@ -55,7 +56,8 @@ var nouns = []string{ var constfield = regexp.MustCompile(`^([^/].*)$`) // genfield is used to parse generator fields by matching valid commands and numeric arguments -var genfield = regexp.MustCompile(`^/([ibfsu][awxrgqt]?)([0-9.-]+)?(,[0-9.-]+)?$`) +// the second parameter, if it exists, includes the comma +var genfield = regexp.MustCompile(`^/([ibfsuk][awxrgqt]?)([0-9.-]+)?(,[0-9.-]+)?$`) // keysplitter separates fields that look like number.name (ex: 1.myfield) var keysplitter = regexp.MustCompile(`^([0-9]+)\.(.*$)`) @@ -64,8 +66,8 @@ type Rng struct { rng *rand.Rand } -func NewRng(s string) Rng { - return Rng{rand.New(rand.NewSource(int64(wyhash.Hash([]byte(s), 2467825690))))} +func NewRng(seed string) Rng { + return Rng{rand.New(rand.NewSource(int64(wyhash.Hash([]byte(seed), 2467825690))))} } func (r Rng) Intn(n int) int64 { @@ -74,6 +76,9 @@ func (r Rng) Intn(n int) int64 { // Chooses a random element from a slice of strings. func (r Rng) Choice(a []string) string { + if len(a) == 0 { + return "" + } return a[r.Intn(len(a))] } @@ -169,6 +174,76 @@ func getWordList(rng Rng, cardinality int, source []string) []string { return words } +type EligibilityPeriod struct { + word string + start time.Duration + end time.Duration +} + +type PeriodicEligibility struct { + rng Rng + periods []EligibilityPeriod + period time.Duration +} + +// generates a list of eligibility periods for a set of words +// each word is eligible for some period of time that is proportional to its position in the list +// this is so that all the words are not available at the same time, but eventually all of them are +func newPeriodicEligibility(rng Rng, words []string, period time.Duration) *PeriodicEligibility { + cardinality := len(words) + periods := make([]EligibilityPeriod, cardinality) + for i := 0; i < cardinality; i++ { + // calculate a period length that is proportional to the number of remaining words + periodLength := time.Duration(float64(period) * float64(cardinality-i) / float64(cardinality)) + // startTime is a random value that ensures it will end before the next period starts + startTime := (period - periodLength) * time.Duration(rng.Float(0, 1)) + periods[i] = EligibilityPeriod{ + word: words[i], + start: startTime, + end: startTime + periodLength, + } + } + return &PeriodicEligibility{ + rng: rng, + periods: periods, + period: period, + } +} + +// gets one word from the list of eligible words based on the time since the start of the period +// This is, on average, slower than the random selection, but the random one can sometimes +// be very slow, so we use this as a fallback if we try randomly a few times and fail. +func (pe *PeriodicEligibility) getEligibleWordFallback(durationSinceStart time.Duration) string { + tInPeriod := durationSinceStart % pe.period + eligibleIndexes := make([]int, 0, 20) + for i, period := range pe.periods { + if period.start <= tInPeriod && tInPeriod < period.end { + eligibleIndexes = append(eligibleIndexes, i) + } + } + + if len(eligibleIndexes) == 0 { + // shouldn't happen, but if it does, just pick the first word + return pe.periods[0].word + } + ix := eligibleIndexes[pe.rng.Intn(len(eligibleIndexes))] + return pe.periods[ix].word +} + +func (pe *PeriodicEligibility) getEligibleWord(durationSinceStart time.Duration) string { + tInPeriod := durationSinceStart % pe.period + // try 10 times to find an eligible word + for i := 0; i < 5; i++ { + ix := pe.rng.Intn(len(pe.periods)) + period := pe.periods[ix] + if period.start <= tInPeriod && tInPeriod < period.end { + return period.word + } + } + // use the fallback + return pe.getEligibleWordFallback(durationSinceStart) +} + // parseUserFields expects a list of fields in the form of name=constant or name=/gen. // See README.md for more information. func parseUserFields(rng Rng, userfields map[string]string) (map[string]func() any, error) { @@ -233,6 +308,11 @@ func parseUserFields(rng Rng, userfields map[string]string) (map[string]func() a default: fields[name] = func() any { return rng.String(n) } } + case "k": + fields[name], err = getKeyGen(rng, p1, p2) + if err != nil { + return nil, fmt.Errorf("invalid key in key field %s=%s: %w", name, value, err) + } case "u", "uq": // Generate a URL-like string with a random path and possibly a query string fields[name], err = getURLGen(rng, gentype, p1, p2) @@ -399,6 +479,33 @@ func getURLGen(rng Rng, gentype, p1, p2 string) (func() any, error) { } } +func getKeyGen(rng Rng, p1, p2 string) (func() any, error) { + var cardinality, period int + var err error + if p1 == "" { + cardinality = 50 + } else { + cardinality, err = strconv.Atoi(p1) + if err != nil { + return nil, fmt.Errorf("%s is not an int", p1) + } + if cardinality > len(nouns) { + return nil, fmt.Errorf("cardinality %d cannot be more than %d", cardinality, len(nouns)) + } + } + if p2 == "" || p2 == "," { + period = 60 + } else { + period, err = strconv.Atoi(p2[1:]) + if err != nil { + return nil, fmt.Errorf("%s is not an int", p2[:1]) + } + } + ep := newPeriodicEligibility(rng, nouns[:cardinality], time.Duration(period)*time.Second) + startTime := time.Now() + return func() any { return ep.getEligibleWord(time.Since(startTime)) }, nil +} + type Fielder struct { fields map[string]func() any names []string diff --git a/fielder_test.go b/fielder_test.go new file mode 100644 index 0000000..fc2c1bd --- /dev/null +++ b/fielder_test.go @@ -0,0 +1,66 @@ +package main + +import ( + "fmt" + "strconv" + "strings" + "testing" + "time" +) + +func Test_PeriodicEligibility_checkEligible(t *testing.T) { + words := strings.Split("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", "") + notFound := map[string]struct{}{} + for _, w := range words { + notFound[w] = struct{}{} + } + + pe := newPeriodicEligibility(NewRng("hello"), words, 60*time.Second) + t.Run("only some words show up for short period", func(t *testing.T) { + for i := 0; i < 100; i++ { + for p := 0; p < 30; p++ { + word := pe.getEligibleWord(time.Duration(p) * time.Second) + delete(notFound, word) + } + if len(notFound) == 0 { + break + } + } + if len(notFound) != 0 { + t.Errorf("expected some words to be not found, got none") + } + }) + + t.Run("all eligible words show up with full period", func(t *testing.T) { + for i := 0; i < 100; i++ { + for p := 0; p < 60; p++ { + word := pe.getEligibleWord(time.Duration(p) * time.Second) + delete(notFound, word) + } + if len(notFound) == 0 { + break + } + } + if len(notFound) > 0 { + t.Errorf("expected all words to be found, got %v", notFound) + } + }) +} + +func BenchmarkPeriodicEligibility(b *testing.B) { + for _, card := range []int{10, 50, 200} { + var words []string + for i := 0; i < card; i++ { + words = append(words, strconv.Itoa(i)) + } + period := 61 * time.Second + pe := newPeriodicEligibility(NewRng("hello"), words, period) + for p := 0; p < 61; p += 10 { + b.Run(fmt.Sprintf("card_%02d_p_%02d", card, p), func(b *testing.B) { + for i := 0; i < b.N; i++ { + pe.getEligibleWord(time.Duration(p) * time.Second) + } + }) + } + } +} diff --git a/main.go b/main.go index 06c07f0..0fc6a76 100644 --- a/main.go +++ b/main.go @@ -159,7 +159,7 @@ func main() { You can specify fields to be added to each span. Each field should be specified as FIELD=VALUE. The value can be a constant (and will be sent as the appropriate type), or a generator function starting with /. - Allowed generators are /i, /ir, /ig, /f, /fr, /fg, /s, /sx, /sw, /b, optionally + Allowed generators are /i, /ir, /ig, /f, /fr, /fg, /s, /sx, /sw, /b, /k, optionally followed by a single number or a comma-separated pair of numbers. Example generators: - /s -- alphanumeric string of length 16 @@ -172,6 +172,7 @@ func main() { - /u -- https url-like, no query string, two path segments; default cardinality is 10/10 but can be changed like /u3,20 - /uq -- as /u above, but with query string containing a random key word with a completely random value - /st -- an http status code by default reflecting 95% 200s, 4% 400s, 1% 500s. 400s and 500s can be changed like /st10,0.1. + - /k50,60 -- an intermittent key field with total cardinality 50, but decreasing key frequency. All keys only arrive after 60 seconds Field names can be alphanumeric with underscores. If a field name is prefixed with a number and a dot (e.g. 1.foo=bar) the field will only be injected into spans at