diff --git a/internal/testkeys/strconv.go b/internal/testkeys/strconv.go new file mode 100644 index 0000000000..4c9148f881 --- /dev/null +++ b/internal/testkeys/strconv.go @@ -0,0 +1,121 @@ +/* +Copyright 2013 The Perkeep Authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package testkeys + +import ( + "strconv" + + "github.com/cockroachdb/errors" +) + +// parseUintBytes is like strconv.ParseUint, but using a []byte. Use of this +// function avoids an allocation when parsing an integer out of a []byte. +// +// This function is copied from go4.org/strconv. +func parseUintBytes(s []byte, base int, bitSize int) (n uint64, err error) { + var cutoff, maxVal uint64 + + if bitSize == 0 { + bitSize = int(strconv.IntSize) + } + + s0 := s + switch { + case len(s) < 1: + err = strconv.ErrSyntax + goto Error + + case 2 <= base && base <= 36: + // valid base; nothing to do + + case base == 0: + // Look for octal, hex prefix. + switch { + case s[0] == '0' && len(s) > 1 && (s[1] == 'x' || s[1] == 'X'): + base = 16 + s = s[2:] + if len(s) < 1 { + err = strconv.ErrSyntax + goto Error + } + case s[0] == '0': + base = 8 + default: + base = 10 + } + + default: + err = errors.New("invalid base " + strconv.Itoa(base)) + goto Error + } + + n = 0 + cutoff = cutoff64(base) + maxVal = 1<= base { + n = 0 + err = strconv.ErrSyntax + goto Error + } + + if n >= cutoff { + // n*base overflows + n = 1<<64 - 1 + err = strconv.ErrRange + goto Error + } + n *= uint64(base) + + n1 := n + uint64(v) + if n1 < n || n1 > maxVal { + // n+v overflows + n = 1<<64 - 1 + err = strconv.ErrRange + goto Error + } + n = n1 + } + + return n, nil + +Error: + return n, &strconv.NumError{Func: "ParseUint", Num: string(s0), Err: err} +} + +// Return the first number n such that n*base >= 1<<64. +func cutoff64(base int) uint64 { + if base < 2 { + return 0 + } + return (1<<64-1)/uint64(base) + 1 +} diff --git a/internal/testkeys/testkeys.go b/internal/testkeys/testkeys.go new file mode 100644 index 0000000000..a8d72be084 --- /dev/null +++ b/internal/testkeys/testkeys.go @@ -0,0 +1,296 @@ +// Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +// Package testkeys provides facilities for generating and comparing +// human-readable test keys for use in tests and benchmarks. This package +// provides a single Comparer implementation that compares all keys generated +// by this package. +// +// Keys generated by this package may optionally have a 'suffix' encoding an +// MVCC timestamp. This suffix is of the form "@t". Comparisons on the +// suffix are performed using integer value, not the byte representation. +package testkeys + +import ( + "bytes" + "fmt" + "math" + "strconv" + + "github.com/cockroachdb/pebble/internal/base" +) + +const alpha = "abcdefghijklmnopqrstuvwxyz" + +const suffixDelim = `@t` + +var maxSuffixLen = len(suffixDelim) + len(fmt.Sprintf("%d", math.MaxInt64)) + +// Comparer is the comparer for test keys generated by this package. +var Comparer *base.Comparer = &base.Comparer{ + Compare: compare, + Equal: func(a, b []byte) bool { return compare(a, b) == 0 }, + AbbreviatedKey: func(k []byte) uint64 { + return base.DefaultComparer.AbbreviatedKey(k[:split(k)]) + }, + FormatKey: base.DefaultFormatter, + Separator: func(dst, a, b []byte) []byte { + ai := split(a) + if ai == len(a) { + return append(dst, a...) + } + bi := split(b) + if bi == len(b) { + return append(dst, a...) + } + + // If the keys are the same just return a. + if bytes.Equal(a[:ai], b[:bi]) { + return append(dst, a...) + } + n := len(dst) + dst = base.DefaultComparer.Separator(dst, a[:ai], b[:bi]) + // Did it pick a separator different than a[:ai] -- if not we can't do better than a. + buf := dst[n:] + if bytes.Equal(a[:ai], buf) { + return append(dst[:n], a...) + } + // The separator is > a[:ai], so we only need to add the sentinel. + return append(dst, 0) + }, + Successor: func(dst, a []byte) []byte { + ai := split(a) + if ai == len(a) { + return append(dst, a...) + } + n := len(dst) + dst = base.DefaultComparer.Successor(dst, a[:ai]) + // Did it pick a successor different than a[:ai] -- if not we can't do better than a. + buf := dst[n:] + if bytes.Equal(a[:ai], buf) { + return append(dst[:n], a...) + } + // The successor is > a[:ai], so we only need to add the sentinel. + return append(dst, 0) + }, + Split: split, + Name: "pebble.internal.testkeys", +} + +func compare(a, b []byte) int { + ai, bi := split(a), split(b) + if v := bytes.Compare(a[:ai], b[:bi]); v != 0 { + return v + } + + if len(a[ai:]) == 0 { + if len(b[bi:]) == 0 { + return 0 + } + return -1 + } else if len(b[bi:]) == 0 { + return +1 + } + return compareTimestamps(a[ai:], b[bi:]) +} + +func split(a []byte) int { + i := bytes.LastIndex(a, []byte(suffixDelim)) + if i >= 0 { + return i + } + return len(a) +} + +func compareTimestamps(a, b []byte) int { + ai, err := parseUintBytes(bytes.TrimPrefix(a, []byte(suffixDelim)), 10, 64) + if err != nil { + panic(fmt.Sprintf("invalid test mvcc timestamp %q", a)) + } + bi, err := parseUintBytes(bytes.TrimPrefix(b, []byte(suffixDelim)), 10, 64) + if err != nil { + panic(fmt.Sprintf("invalid test mvcc timestamp %q", b)) + } + switch { + case ai < bi: + return +1 + case ai > bi: + return -1 + default: + return 0 + } +} + +// Keyspace describes a finite keyspace of unsuffixed test keys. +type Keyspace interface { + // Count returns the number of keys that exist within this keyspace. + Count() int + + // MaxLen returns the maximum length, in bytes, of a key within this + // keyspace. This is only guaranteed to return an upper bound. + MaxLen() int + + // Slice returns the sub-keyspace from index i, inclusive, to index j, + // exclusive. The receiver is unmodified. + Slice(i, j int) Keyspace + + // EveryN returns a key space that includes 1 key for every N keys in the + // original keyspace. The receiver is unmodified. + EveryN(n int) Keyspace + + key(buf []byte, i int) int +} + +// Alpha constructs a keyspace consisting of all keys containing characters a-z, +// with at most `maxLength` characters. +func Alpha(maxLength int) Keyspace { + return alphabet{ + alphabet: []byte(alpha), + maxLength: maxLength, + increment: 1, + } +} + +// KeyAt returns the i-th key within the keyspace with a suffix encoding the +// timestamp t. +func KeyAt(k Keyspace, i int, t int) []byte { + b := make([]byte, k.MaxLen()+maxSuffixLen) + return b[:WriteKeyAt(b, k, i, t)] +} + +// WriteKeyAt writes the i-th key within the keyspace to the buffer dst, with a +// suffix encoding the timestamp t suffix. It returns the number of bytes +// written. +func WriteKeyAt(dst []byte, k Keyspace, i int, t int) int { + n := WriteKey(dst, k, i) + n += WriteSuffix(dst[n:], t) + return n +} + +// Suffix returns the test keys suffix representation of timestamp t. +func Suffix(t int) []byte { + b := make([]byte, maxSuffixLen) + return b[:WriteSuffix(b, t)] +} + +// WriteSuffix writes the test keys suffix representation of timestamp t to dst, +// returning the number of bytes written. +func WriteSuffix(dst []byte, t int) int { + n := copy(dst, suffixDelim) + n += len(strconv.AppendInt(dst[n:n], int64(t), 10)) + return n +} + +// Key returns the i-th unsuffixed key within the keyspace. +func Key(k Keyspace, i int) []byte { + b := make([]byte, k.MaxLen()) + return b[:k.key(b, i)] +} + +// WriteKey writes the i-th unsuffixed key within the keyspace to the buffer dst. It +// returns the number of bytes written. +func WriteKey(dst []byte, k Keyspace, i int) int { + return k.key(dst, i) +} + +type alphabet struct { + alphabet []byte + maxLength int + headSkip int + tailSkip int + increment int +} + +func (a alphabet) Count() int { + return (keyCount(len(a.alphabet), a.maxLength) - a.headSkip - a.tailSkip) / a.increment +} + +func (a alphabet) MaxLen() int { + return a.maxLength +} + +func (a alphabet) Slice(i, j int) Keyspace { + s := a + s.headSkip += i + s.tailSkip += a.Count() - j + return s +} + +func (a alphabet) EveryN(n int) Keyspace { + s := a + s.increment *= n + return s +} + +func keyCount(n, l int) int { + // The number of representable keys in the keyspace is a function of the + // length of the alphabet n and the max key length l. Consider how the + // number of representable keys grows as l increases: + // + // l = 1: n + // l = 2: n + n^2 + // l = 3: n + n^2 + n^3 + // ... + // Σ i=(1...l) n^i = n*(n^l - 1)/(n-1) + return (n * (int(math.Pow(float64(n), float64(l))) - 1)) / (n - 1) +} + +func (a alphabet) key(buf []byte, idx int) int { + // This function generates keys of length 1..maxKeyLength, pulling + // characters from the alphabet. The idx determines which key to generate, + // generating the i-th lexicographically next key. + // + // The index to use is advanced by `headSkip`, allowing a keyspace to encode + // a subregion of the keyspace. + // + // Eg, alphabet = `ab`, maxKeyLength = 3: + // + // aaa aab aba abb baa bab bba bbb + // aa ab ba bb + // a b + // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 + // + return generateAlphabetKey(buf, a.alphabet, (idx*a.increment)+a.headSkip, + keyCount(len(a.alphabet), a.maxLength)) +} + +func generateAlphabetKey(buf, alphabet []byte, i, keyCount int) int { + if keyCount == 0 || i > keyCount || i < 0 { + return 0 + } + + // Of the keyCount keys in the generative keyspace, how many are there + // starting with a particular character? + keysPerCharacter := keyCount / len(alphabet) + + // Find the character that the key at index i starts with and set it. + characterIdx := i / keysPerCharacter + buf[0] = alphabet[characterIdx] + + // Consider characterIdx = 0, pointing to 'a'. + // + // aaa aab aba abb baa bab bba bbb + // aa ab ba bb + // a b + // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 + // \_________________________/ + // |keysPerCharacter| keys + // + // In our recursive call, we reduce the problem to: + // + // aaa aab aba abb + // aa ab + // 0 1 2 3 4 5 + // \________________________/ + // |keysPerCharacter-1| keys + // + // In the subproblem, there are keysPerCharacter-1 keys (eliminating the + // just 'a' key, plus any keys beginning with any other character). + // + // The index i is also offset, reduced by the count of keys beginning with + // characters earlier in the alphabet (keysPerCharacter*characterIdx) and + // the key consisting of just the 'a' (-1). + i = i - keysPerCharacter*characterIdx - 1 + return 1 + generateAlphabetKey(buf[1:], alphabet, i, keysPerCharacter-1) +} diff --git a/internal/testkeys/testkeys_test.go b/internal/testkeys/testkeys_test.go new file mode 100644 index 0000000000..43bd560f70 --- /dev/null +++ b/internal/testkeys/testkeys_test.go @@ -0,0 +1,167 @@ +// Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package testkeys + +import ( + "bytes" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestGenerateAlphabetKey(t *testing.T) { + testCases := []struct { + alphabet string + i int + depth int + want string + }{ + {"abc", 0, 1, "a"}, + {"abc", 0, 2, "a"}, + {"abc", 0, 3, "a"}, + + {"abc", 1, 1, "b"}, + {"abc", 2, 1, "c"}, + + {"abc", 0, 2, "a"}, + {"abc", 1, 2, "aa"}, + {"abc", 2, 2, "ab"}, + {"abc", 3, 2, "ac"}, + {"abc", 4, 2, "b"}, + {"abc", 5, 2, "ba"}, + {"abc", 6, 2, "bb"}, + {"abc", 7, 2, "bc"}, + {"abc", 8, 2, "c"}, + {"abc", 9, 2, "ca"}, + {"abc", 10, 2, "cb"}, + {"abc", 11, 2, "cc"}, + } + + buf := make([]byte, 10) + for _, tc := range testCases { + kc := keyCount(len(tc.alphabet), tc.depth) + n := generateAlphabetKey(buf, []byte(tc.alphabet), tc.i, kc) + got := string(buf[:n]) + if got != tc.want { + t.Errorf("generateAlphabetKey(%q, %d, %d) = %q, want %q", tc.alphabet, tc.i, kc, got, tc.want) + } + } +} + +func TestKeyCount(t *testing.T) { + type params struct { + n, l int + } + testCases := map[params]int{ + {26, 1}: 26, + {52, 1}: 52, + {2, 2}: 6, + {2, 3}: 14, + {2, 4}: 30, + {3, 2}: 12, + } + for p, want := range testCases { + got := keyCount(p.n, p.l) + if got != want { + t.Errorf("keyCount(%d, %d) = %d, want %d", p.n, p.l, got, want) + } + } +} + +func TestFullKeyspaces(t *testing.T) { + testCases := []struct { + ks Keyspace + want string + }{ + { + Alpha(1), + "a b c d e f g h i j k l m n o p q r s t u v w x y z", + }, + { + alphabet{[]byte("abc"), 2, 0, 0, 1}, + "a aa ab ac b ba bb bc c ca cb cc", + }, + { + alphabet{[]byte("abc"), 2, 0, 0, 2}, + "a ab b bb c cb", + }, + { + alphabet{[]byte("abc"), 3, 0, 0, 1}, + "a aa aaa aab aac ab aba abb abc ac aca acb acc b ba baa bab bac bb bba bbb bbc bc bca bcb bcc c ca caa cab cac cb cba cbb cbc cc cca ccb ccc", + }, + { + alphabet{[]byte("abc"), 3, 7, 10, 1}, + "abb abc ac aca acb acc b ba baa bab bac bb bba bbb bbc bc bca bcb bcc c ca caa", + }, + } + for _, tc := range testCases { + require.Equal(t, tc.want, keyspaceToString(tc.ks)) + } +} + +func TestSlice(t *testing.T) { + testCases := []struct { + orig Keyspace + i, j int + want string + }{ + {Alpha(1), 1, 25, "b c d e f g h i j k l m n o p q r s t u v w x y"}, + {Alpha(1).Slice(1, 25), 1, 23, "c d e f g h i j k l m n o p q r s t u v w x"}, + {Alpha(1).Slice(1, 25).Slice(1, 23), 10, 22, "m n o p q r s t u v w x"}, + } + for _, tc := range testCases { + got := keyspaceToString(tc.orig.Slice(tc.i, tc.j)) + if got != tc.want { + t.Errorf("(%q).Slice(%d, %d) = %q, want %q", + keyspaceToString(tc.orig), tc.i, tc.j, got, tc.want) + } + } +} + +func TestSuffix(t *testing.T) { + ks := Alpha(3) + require.Equal(t, "a@t1", string(KeyAt(ks, 0, 1))) + require.Equal(t, "a@t10", string(KeyAt(ks, 0, 10))) + require.Equal(t, "aab@t5", string(KeyAt(ks, 3, 5))) + + assertCmp := func(want int, a, b []byte) { + got := Comparer.Compare(a, b) + if got != want { + t.Helper() + t.Errorf("Compare(%q, %q) = %d, want %d", a, b, got, want) + } + } + + for i := 1; i < ks.Count(); i++ { + assertCmp(-1, KeyAt(ks, i-1, 1), KeyAt(ks, i, 1)) + assertCmp(-1, Key(ks, i-1), Key(ks, i)) + assertCmp(0, Key(ks, i), Key(ks, i)) + for ts := 2; ts < 11; ts++ { + assertCmp(+1, KeyAt(ks, i, ts-1), KeyAt(ks, i, ts)) + assertCmp(-1, KeyAt(ks, i-1, ts-1), KeyAt(ks, i, ts)) + } + } + + // Suffixes should be comparable on their own too. + a, b := make([]byte, maxSuffixLen), make([]byte, maxSuffixLen) + for ts := 2; ts < 150; ts++ { + an := WriteSuffix(a, ts-1) + bn := WriteSuffix(b, ts) + assertCmp(+1, a[:an], b[:bn]) + } +} + +func keyspaceToString(ks Keyspace) string { + var buf bytes.Buffer + b := make([]byte, ks.MaxLen()) + for i := 0; i < ks.Count(); i++ { + n := ks.key(b, i) + if i > 0 { + buf.WriteRune(' ') + } + buf.Write(b[:n]) + } + return buf.String() +}