diff --git a/pkg/kv/kvserver/client_merge_test.go b/pkg/kv/kvserver/client_merge_test.go index cb610ef12b8b..b6e8ed9f9741 100644 --- a/pkg/kv/kvserver/client_merge_test.go +++ b/pkg/kv/kvserver/client_merge_test.go @@ -3838,21 +3838,38 @@ func TestStoreRangeMergeRaftSnapshot(t *testing.T) { expectedSSTs = expectedSSTs[len(expectedSSTs)-1:] // Construct SSTs for the range-id local keys of the subsumed replicas. - // with RangeIDs 3 and 4. + // with RangeIDs 3 and 4. Note that this also targets the unreplicated + // rangeID-based keys because we're effectively replicaGC'ing these + // replicas (while absorbing their user keys into the LHS). for _, k := range []roachpb.Key{keyB, keyC} { rangeID := rangeIds[string(k)] sstFile := &storage.MemFile{} sst := storage.MakeIngestionSSTWriter(ctx, st, sstFile) defer sst.Close() - s := rditer.MakeRangeIDLocalKeySpan(rangeID, false /* replicatedOnly */) - // The snapshot code will use ClearRangeWithHeuristic with a threshold of - // 1 to clear the range, but this will truncate the range tombstone to the - // first key. In this case, the first key is RangeGCThresholdKey, which - // doesn't yet exist in the engine, so we write the Pebble range tombstone - // manually. - if err := sst.ClearRawRange(keys.RangeGCThresholdKey(rangeID), s.EndKey, true, false); err != nil { - return err + { + // The snapshot code will use ClearRangeWithHeuristic with a threshold of + // 1 to clear the range, but this will truncate the range tombstone to the + // first key. In this case, the first key is RangeGCThresholdKey, which + // doesn't yet exist in the engine, so we write the Pebble range tombstone + // manually. + sl := rditer.Select(rangeID, rditer.SelectOpts{ + ReplicatedByRangeID: true, + }) + require.Len(t, sl, 1) + s := sl[0] + require.NoError(t, sst.ClearRawRange(keys.RangeGCThresholdKey(rangeID), s.EndKey, true, false)) + } + { + // Ditto for the unreplicated version, where the first key happens to be + // the HardState. + sl := rditer.Select(rangeID, rditer.SelectOpts{ + UnreplicatedByRangeID: true, + }) + require.Len(t, sl, 1) + s := sl[0] + require.NoError(t, sst.ClearRawRange(keys.RaftHardStateKey(rangeID), s.EndKey, true, false)) } + tombstoneKey := keys.RangeTombstoneKey(rangeID) tombstoneValue := &roachpb.RangeTombstone{NextReplicaID: math.MaxInt32} if err := storage.MVCCBlindPutProto( @@ -3875,8 +3892,9 @@ func TestStoreRangeMergeRaftSnapshot(t *testing.T) { StartKey: roachpb.RKey(keyD), EndKey: roachpb.RKey(keyEnd), } - s := rditer.MakeUserKeySpan(&desc) - if err := storage.ClearRangeWithHeuristic(receivingEng, &sst, s.Key, s.EndKey, 64, 8); err != nil { + if err := storage.ClearRangeWithHeuristic( + receivingEng, &sst, desc.StartKey.AsRawKey(), desc.EndKey.AsRawKey(), 64, 8, + ); err != nil { return err } if err = sst.Finish(); err != nil { diff --git a/pkg/kv/kvserver/consistency_queue_test.go b/pkg/kv/kvserver/consistency_queue_test.go index ce2fc88578c5..e3da77f39afd 100644 --- a/pkg/kv/kvserver/consistency_queue_test.go +++ b/pkg/kv/kvserver/consistency_queue_test.go @@ -21,6 +21,7 @@ import ( "time" "github.com/cockroachdb/cockroach/pkg/base" + "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/kv" "github.com/cockroachdb/cockroach/pkg/kv/kvserver" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase" @@ -93,15 +94,16 @@ func TestConsistencyQueueRequiresLive(t *testing.T) { } // TestCheckConsistencyMultiStore creates a node with three stores -// with three way replication. A value is added to the node, and a -// consistency check is run. +// with three way replication. A consistency check is run on r1, +// which always has some data, and on a newly split off range +// as well. func TestCheckConsistencyMultiStore(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) tc := testcluster.StartTestCluster(t, 3, base.TestClusterArgs{ - ReplicationMode: base.ReplicationAuto, + ReplicationMode: base.ReplicationManual, ServerArgs: base.TestServerArgs{ Knobs: base.TestingKnobs{ Store: &kvserver.StoreTestingKnobs{ @@ -115,24 +117,38 @@ func TestCheckConsistencyMultiStore(t *testing.T) { defer tc.Stopper().Stop(context.Background()) store := tc.GetFirstStoreFromServer(t, 0) - // Write something to the DB. - putArgs := putArgs([]byte("a"), []byte("b")) - if _, err := kv.SendWrapped(context.Background(), store.DB().NonTransactionalSender(), putArgs); err != nil { - t.Fatal(err) + // Split off a range and write to it; we'll use it below but we test r1 first. + k2 := tc.ScratchRange(t) + { + // Write something to k2. + putArgs := putArgs(k2, []byte("b")) + _, pErr := kv.SendWrapped(context.Background(), store.DB().NonTransactionalSender(), putArgs) + require.NoError(t, pErr.GoError()) } - // Run consistency check. - checkArgs := roachpb.CheckConsistencyRequest{ - RequestHeader: roachpb.RequestHeader{ - // span of keys that include "a". - Key: []byte("a"), - EndKey: []byte("aa"), - }, - } - if _, err := kv.SendWrappedWith(context.Background(), store.DB().NonTransactionalSender(), roachpb.Header{ - Timestamp: store.Clock().Now(), - }, &checkArgs); err != nil { - t.Fatal(err) + // 3x replicate our ranges. + // + // Depending on what we're doing, we need to use LocalMax or [R]KeyMin + // for r1 due to the issue below. + // + // See: https://github.com/cockroachdb/cockroach/issues/95055 + tc.AddVotersOrFatal(t, roachpb.KeyMin, tc.Targets(1, 2)...) + tc.AddVotersOrFatal(t, k2, tc.Targets(1, 2)...) + + // Run consistency check on r1 and ScratchRange. + for _, k := range []roachpb.Key{keys.LocalMax, k2} { + t.Run(k.String(), func(t *testing.T) { + checkArgs := roachpb.CheckConsistencyRequest{ + RequestHeader: roachpb.RequestHeader{ + Key: keys.LocalMax, + EndKey: keys.LocalMax.Next(), + }, + } + _, pErr := kv.SendWrappedWith(context.Background(), store.DB().NonTransactionalSender(), roachpb.Header{ + Timestamp: store.Clock().Now(), + }, &checkArgs) + require.NoError(t, pErr.GoError()) + }) } } diff --git a/pkg/kv/kvserver/rditer/BUILD.bazel b/pkg/kv/kvserver/rditer/BUILD.bazel index 5a917a820199..b106d2d18486 100644 --- a/pkg/kv/kvserver/rditer/BUILD.bazel +++ b/pkg/kv/kvserver/rditer/BUILD.bazel @@ -5,6 +5,7 @@ go_library( name = "rditer", srcs = [ "replica_data_iter.go", + "select.go", "stats.go", ], importpath = "github.com/cockroachdb/cockroach/pkg/kv/kvserver/rditer", @@ -21,21 +22,30 @@ go_library( go_test( name = "rditer_test", size = "small", - srcs = ["replica_data_iter_test.go"], + srcs = [ + "replica_data_iter_test.go", + "select_test.go", + ], args = ["-test.timeout=55s"], + data = glob(["testdata/**"]), embed = [":rditer"], deps = [ "//pkg/keys", + "//pkg/kv/kvserver/concurrency/lock", "//pkg/kv/kvserver/spanset", "//pkg/roachpb", "//pkg/settings/cluster", "//pkg/storage", "//pkg/testutils", + "//pkg/testutils/datapathutils", + "//pkg/testutils/echotest", "//pkg/testutils/skip", "//pkg/util/hlc", "//pkg/util/leaktest", "//pkg/util/randutil", "//pkg/util/uuid", + "@com_github_olekukonko_tablewriter//:tablewriter", + "@com_github_stretchr_testify//assert", "@com_github_stretchr_testify//require", ], ) diff --git a/pkg/kv/kvserver/rditer/replica_data_iter.go b/pkg/kv/kvserver/rditer/replica_data_iter.go index 07887ca6bdbd..942c543e7b1d 100644 --- a/pkg/kv/kvserver/rditer/replica_data_iter.go +++ b/pkg/kv/kvserver/rditer/replica_data_iter.go @@ -54,104 +54,69 @@ type ReplicaMVCCDataIterator struct { err error } -// MakeAllKeySpans returns all key spans for the given Range, in +// makeAllKeySpans returns all key spans for the given Range, in // sorted order. -func MakeAllKeySpans(d *roachpb.RangeDescriptor) []roachpb.Span { - return makeRangeKeySpans(d, false /* replicatedOnly */) +func makeAllKeySpans(d *roachpb.RangeDescriptor) []roachpb.Span { + return Select(d.RangeID, SelectOpts{ + ReplicatedBySpan: d.RSpan(), + ReplicatedByRangeID: true, + UnreplicatedByRangeID: true, + }) } // MakeReplicatedKeySpans returns all key spans that are fully Raft -// replicated for the given Range. -// -// NOTE: The logic for receiving snapshot relies on this function returning the -// spans in the following sorted order: +// replicated for the given Range, in lexicographically sorted order: // // 1. Replicated range-id local key span. -// 2. Range-local key span. +// 2. "Local" key span (range descriptor, etc) // 3. Lock-table key spans. // 4. User key span. func MakeReplicatedKeySpans(d *roachpb.RangeDescriptor) []roachpb.Span { - return makeRangeKeySpans(d, true /* replicatedOnly */) -} - -func makeRangeKeySpans(d *roachpb.RangeDescriptor, replicatedOnly bool) []roachpb.Span { - rangeIDLocal := MakeRangeIDLocalKeySpan(d.RangeID, replicatedOnly) - rangeLocal := makeRangeLocalKeySpan(d) - rangeLockTable := makeRangeLockTableKeySpans(d) - user := MakeUserKeySpan(d) - ranges := make([]roachpb.Span, 5) - ranges[0] = rangeIDLocal - ranges[1] = rangeLocal - if len(rangeLockTable) != 2 { - panic("unexpected number of lock table key spans") - } - ranges[2] = rangeLockTable[0] - ranges[3] = rangeLockTable[1] - ranges[4] = user - return ranges + return Select(d.RangeID, SelectOpts{ + ReplicatedBySpan: d.RSpan(), + ReplicatedByRangeID: true, + }) } -// MakeReplicatedKeySpansExceptLockTable returns all key spans that are fully Raft +// makeReplicatedKeySpansExceptLockTable returns all key spans that are fully Raft // replicated for the given Range, except for the lock table spans. These are // returned in the following sorted order: // 1. Replicated range-id local key span. // 2. Range-local key span. // 3. User key span. -func MakeReplicatedKeySpansExceptLockTable(d *roachpb.RangeDescriptor) []roachpb.Span { +func makeReplicatedKeySpansExceptLockTable(d *roachpb.RangeDescriptor) []roachpb.Span { return []roachpb.Span{ - MakeRangeIDLocalKeySpan(d.RangeID, true /* replicatedOnly */), - makeRangeLocalKeySpan(d), - MakeUserKeySpan(d), + makeRangeIDReplicatedSpan(d.RangeID), + makeRangeLocalKeySpan(d.RSpan()), + d.KeySpan().AsRawSpanWithNoLocals(), } } -// MakeReplicatedKeySpansExcludingUserAndLockTable returns all key spans that are fully Raft +// makeReplicatedKeySpansExcludingUserAndLockTable returns all key spans that are fully Raft // replicated for the given Range, except for the lock table spans and user key span. // These are returned in the following sorted order: // 1. Replicated range-id local key span. // 2. Range-local key span. -func MakeReplicatedKeySpansExcludingUserAndLockTable(d *roachpb.RangeDescriptor) []roachpb.Span { +func makeReplicatedKeySpansExcludingUserAndLockTable(d *roachpb.RangeDescriptor) []roachpb.Span { return []roachpb.Span{ - MakeRangeIDLocalKeySpan(d.RangeID, true /* replicatedOnly */), - makeRangeLocalKeySpan(d), + makeRangeIDReplicatedSpan(d.RangeID), + makeRangeLocalKeySpan(d.RSpan()), } } -// MakeReplicatedKeySpansExceptRangeID returns all key spans that are fully Raft -// replicated for the given Range, except for the replicated range-id local key span. -// These are returned in the following sorted order: -// 1. Range-local key span. -// 2. Lock-table key spans. -// 3. User key span. -func MakeReplicatedKeySpansExceptRangeID(d *roachpb.RangeDescriptor) []roachpb.Span { - rangeLocal := makeRangeLocalKeySpan(d) - rangeLockTable := makeRangeLockTableKeySpans(d) - user := MakeUserKeySpan(d) - ranges := make([]roachpb.Span, 4) - ranges[0] = rangeLocal - if len(rangeLockTable) != 2 { - panic("unexpected number of lock table key spans") +func makeRangeIDReplicatedSpan(rangeID roachpb.RangeID) roachpb.Span { + prefix := keys.MakeRangeIDReplicatedPrefix(rangeID) + return roachpb.Span{ + Key: prefix, + EndKey: prefix.PrefixEnd(), } - ranges[1] = rangeLockTable[0] - ranges[2] = rangeLockTable[1] - ranges[3] = user - return ranges } -// MakeRangeIDLocalKeySpan returns the range-id local key span. If -// replicatedOnly is true, then it returns only the replicated keys, otherwise, -// it only returns both the replicated and unreplicated keys. -func MakeRangeIDLocalKeySpan(rangeID roachpb.RangeID, replicatedOnly bool) roachpb.Span { - var prefixFn func(roachpb.RangeID) roachpb.Key - if replicatedOnly { - prefixFn = keys.MakeRangeIDReplicatedPrefix - } else { - prefixFn = keys.MakeRangeIDPrefix - } - sysRangeIDKey := prefixFn(rangeID) +func makeRangeIDUnreplicatedSpan(rangeID roachpb.RangeID) roachpb.Span { + prefix := keys.MakeRangeIDUnreplicatedPrefix(rangeID) return roachpb.Span{ - Key: sysRangeIDKey, - EndKey: sysRangeIDKey.PrefixEnd(), + Key: prefix, + EndKey: prefix.PrefixEnd(), } } @@ -159,47 +124,10 @@ func MakeRangeIDLocalKeySpan(rangeID roachpb.RangeID, replicatedOnly bool) roach // are replicated keys that do not belong to the span they would naturally // sort into. For example, /Local/Range/Table/1 would sort into [/Min, // /System), but it actually belongs to [/Table/1, /Table/2). -func makeRangeLocalKeySpan(d *roachpb.RangeDescriptor) roachpb.Span { - return roachpb.Span{ - Key: keys.MakeRangeKeyPrefix(d.StartKey), - EndKey: keys.MakeRangeKeyPrefix(d.EndKey), - } -} - -// makeRangeLockTableKeySpans returns the 2 lock table key spans. -func makeRangeLockTableKeySpans(d *roachpb.RangeDescriptor) [2]roachpb.Span { - // Handle doubly-local lock table keys since range descriptor key - // is a range local key that can have a replicated lock acquired on it. - startRangeLocal, _ := keys.LockTableSingleKey(keys.MakeRangeKeyPrefix(d.StartKey), nil) - endRangeLocal, _ := keys.LockTableSingleKey(keys.MakeRangeKeyPrefix(d.EndKey), nil) - // The first range in the global keyspace can start earlier than LocalMax, - // at RKeyMin, but the actual data starts at LocalMax. We need to make this - // adjustment here to prevent [startRangeLocal, endRangeLocal) and - // [startGlobal, endGlobal) from overlapping. - globalStartKey := d.StartKey.AsRawKey() - if d.StartKey.Equal(roachpb.RKeyMin) { - globalStartKey = keys.LocalMax - } - startGlobal, _ := keys.LockTableSingleKey(globalStartKey, nil) - endGlobal, _ := keys.LockTableSingleKey(roachpb.Key(d.EndKey), nil) - return [2]roachpb.Span{ - { - Key: startRangeLocal, - EndKey: endRangeLocal, - }, - { - Key: startGlobal, - EndKey: endGlobal, - }, - } -} - -// MakeUserKeySpan returns the user key span. -func MakeUserKeySpan(d *roachpb.RangeDescriptor) roachpb.Span { - userKeys := d.KeySpan() +func makeRangeLocalKeySpan(sp roachpb.RSpan) roachpb.Span { return roachpb.Span{ - Key: userKeys.Key.AsRawKey(), - EndKey: userKeys.EndKey.AsRawKey(), + Key: keys.MakeRangeKeyPrefix(sp.Key), + EndKey: keys.MakeRangeKeyPrefix(sp.EndKey), } } @@ -224,9 +152,9 @@ func NewReplicaMVCCDataIterator( if !reader.ConsistentIterators() { panic("ReplicaMVCCDataIterator needs a Reader that provides ConsistentIterators") } - spans := MakeReplicatedKeySpansExceptLockTable(d) + spans := makeReplicatedKeySpansExceptLockTable(d) if opts.ExcludeUserKeySpan { - spans = MakeReplicatedKeySpansExcludingUserAndLockTable(d) + spans = makeReplicatedKeySpansExcludingUserAndLockTable(d) } ri := &ReplicaMVCCDataIterator{ ReplicaDataIteratorOptions: opts, @@ -377,7 +305,7 @@ func (ri *ReplicaMVCCDataIterator) HasPointAndRange() (bool, bool) { // IterateReplicaKeySpans iterates over each of a range's key spans, and calls // the given visitor with an iterator over its data. Specifically, it iterates -// over the spans returned by either MakeAllKeySpans or MakeReplicatedKeySpans, +// over the spans returned by either makeAllKeySpans or MakeReplicatedKeySpans, // and for each one provides first a point key iterator and then a range key // iterator. This is the expected order for Raft snapshots. // @@ -401,7 +329,7 @@ func IterateReplicaKeySpans( if replicatedOnly { spans = MakeReplicatedKeySpans(desc) } else { - spans = MakeAllKeySpans(desc) + spans = makeAllKeySpans(desc) } keyTypes := []storage.IterKeyType{storage.IterKeyTypePointsOnly, storage.IterKeyTypeRangesOnly} for _, span := range spans { @@ -449,9 +377,9 @@ func IterateMVCCReplicaKeySpans( if !reader.ConsistentIterators() { panic("reader must provide consistent iterators") } - spans := MakeReplicatedKeySpansExceptLockTable(desc) + spans := makeReplicatedKeySpansExceptLockTable(desc) if options.ExcludeUserKeySpan { - spans = MakeReplicatedKeySpansExcludingUserAndLockTable(desc) + spans = makeReplicatedKeySpansExcludingUserAndLockTable(desc) } if options.Reverse { spanMax := len(spans) - 1 diff --git a/pkg/kv/kvserver/rditer/replica_data_iter_test.go b/pkg/kv/kvserver/rditer/replica_data_iter_test.go index e89f750d0a5f..317cc9ecaff1 100644 --- a/pkg/kv/kvserver/rditer/replica_data_iter_test.go +++ b/pkg/kv/kvserver/rditer/replica_data_iter_test.go @@ -11,23 +11,28 @@ package rditer import ( - "bytes" "context" "encoding/binary" "fmt" + "path/filepath" + "strings" "testing" "github.com/cockroachdb/cockroach/pkg/keys" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/lock" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/storage" "github.com/cockroachdb/cockroach/pkg/testutils" + "github.com/cockroachdb/cockroach/pkg/testutils/datapathutils" + "github.com/cockroachdb/cockroach/pkg/testutils/echotest" "github.com/cockroachdb/cockroach/pkg/testutils/skip" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/randutil" "github.com/cockroachdb/cockroach/pkg/util/uuid" + "github.com/olekukonko/tablewriter" "github.com/stretchr/testify/require" ) @@ -44,11 +49,9 @@ func uuidFromString(input string) uuid.UUID { // an ordered mix of MVCCKey and MVCCRangeKey with: // - the encoded keys of all created data. // - the subset of the encoded keys that are replicated keys. -// -// TODO(sumeer): add lock table and corrsponding MVCC keys. func createRangeData( t *testing.T, eng storage.Engine, desc roachpb.RangeDescriptor, -) ([]interface{}, []interface{}) { +) ([]storage.MVCCKey, []storage.MVCCRangeKey) { ctx := context.Background() unreplicatedPrefix := keys.MakeRangeIDUnreplicatedPrefix(desc.RangeID) @@ -56,153 +59,84 @@ func createRangeData( testTxnID := uuidFromString("0ce61c17-5eb4-4587-8c36-dcf4062ada4c") testTxnID2 := uuidFromString("9855a1ef-8eb9-4c06-a106-cab1dda78a2b") + testTxnID3 := uuidFromString("295e727c-8ca9-437c-bb5e-8e2ebbad996f") value := roachpb.MakeValueFromString("value") ts0 := hlc.Timestamp{} ts := hlc.Timestamp{WallTime: 1} localTS := hlc.ClockTimestamp{} - allKeys := []interface{}{ + var ps []storage.MVCCKey + var rs []storage.MVCCRangeKey + + ps = append(ps, + // StateMachine (i.e. replicated) keys that are keyed by RangeID. storage.MVCCKey{Key: keys.AbortSpanKey(desc.RangeID, testTxnID), Timestamp: ts0}, storage.MVCCKey{Key: keys.AbortSpanKey(desc.RangeID, testTxnID2), Timestamp: ts0}, storage.MVCCKey{Key: keys.RangeGCThresholdKey(desc.RangeID), Timestamp: ts0}, storage.MVCCKey{Key: keys.RangeAppliedStateKey(desc.RangeID), Timestamp: ts0}, storage.MVCCKey{Key: keys.RangeLeaseKey(desc.RangeID), Timestamp: ts0}, + ) + rs = append(rs, storage.MVCCRangeKey{ // emitted last because we emit all point keys before range keys + StartKey: append(replicatedPrefix.Clone(), []byte(":a")...), + EndKey: append(replicatedPrefix.Clone(), []byte(":x")...), + Timestamp: ts, + }) + + ps = append(ps, + // Non-StateMachine (i.e. unreplicated) keys that are keyed by RangeID. storage.MVCCKey{Key: keys.RangeTombstoneKey(desc.RangeID), Timestamp: ts0}, storage.MVCCKey{Key: keys.RaftHardStateKey(desc.RangeID), Timestamp: ts0}, storage.MVCCKey{Key: keys.RaftLogKey(desc.RangeID, 1), Timestamp: ts0}, storage.MVCCKey{Key: keys.RaftLogKey(desc.RangeID, 2), Timestamp: ts0}, storage.MVCCKey{Key: keys.RangeLastReplicaGCTimestampKey(desc.RangeID), Timestamp: ts0}, - storage.MVCCRangeKey{ - StartKey: append(replicatedPrefix.Clone(), []byte(":a")...), - EndKey: append(replicatedPrefix.Clone(), []byte(":x")...), - Timestamp: ts, - }, - storage.MVCCRangeKey{ - StartKey: append(unreplicatedPrefix.Clone(), []byte(":a")...), - EndKey: append(unreplicatedPrefix.Clone(), []byte(":x")...), - Timestamp: ts, - }, - storage.MVCCKey{Key: keys.RangeDescriptorKey(desc.StartKey), Timestamp: ts}, - storage.MVCCKey{Key: keys.TransactionKey(roachpb.Key(desc.StartKey), uuid.MakeV4()), Timestamp: ts0}, - storage.MVCCKey{Key: keys.TransactionKey(roachpb.Key(desc.StartKey.Next()), uuid.MakeV4()), Timestamp: ts0}, - storage.MVCCKey{Key: keys.TransactionKey(roachpb.Key(desc.EndKey).Prevish(100), uuid.MakeV4()), Timestamp: ts0}, - // TODO(bdarnell): KeyMin.Next() results in a key in the reserved system-local space. - // Once we have resolved https://github.com/cockroachdb/cockroach/issues/437, - // replace this with something that reliably generates the first valid key in the range. - //{r.Desc().StartKey.Next(), ts}, - // The following line is similar to StartKey.Next() but adds more to the key to - // avoid falling into the system-local space. - storage.MVCCKey{Key: append(desc.StartKey.AsRawKey().Clone(), '\x02'), Timestamp: ts}, - storage.MVCCKey{Key: roachpb.Key(desc.EndKey).Prevish(100), Timestamp: ts}, - storage.MVCCRangeKey{ - StartKey: desc.StartKey.AsRawKey().Clone(), - EndKey: desc.EndKey.AsRawKey().Clone(), - Timestamp: ts, - }, - } + ) + rs = append(rs, storage.MVCCRangeKey{ // emitted last because we emit all point keys before range keys + StartKey: append(unreplicatedPrefix.Clone(), []byte(":a")...), + EndKey: append(unreplicatedPrefix.Clone(), []byte(":x")...), + Timestamp: ts, + }) - var replicatedKeys []interface{} - for _, keyI := range allKeys { - switch key := keyI.(type) { - case storage.MVCCKey: - require.NoError(t, storage.MVCCPut(ctx, eng, nil, key.Key, key.Timestamp, localTS, value, nil)) - if !bytes.HasPrefix(key.Key, unreplicatedPrefix) { - replicatedKeys = append(replicatedKeys, key) - } - case storage.MVCCRangeKey: - require.NoError(t, eng.PutMVCCRangeKey(key, storage.MVCCValue{})) - if !bytes.HasPrefix(key.StartKey, unreplicatedPrefix) { - replicatedKeys = append(replicatedKeys, key) - } - } + ps = append(ps, + // Replicated system keys: range descriptor, txns, locks, user keys. + storage.MVCCKey{Key: keys.RangeDescriptorKey(desc.StartKey), Timestamp: ts}, // call this [1], referenced by locks + storage.MVCCKey{Key: keys.TransactionKey(roachpb.Key(desc.StartKey), testTxnID), Timestamp: ts0}, + storage.MVCCKey{Key: keys.TransactionKey(roachpb.Key(desc.StartKey.Next()), testTxnID2), Timestamp: ts0}, + storage.MVCCKey{Key: keys.TransactionKey(roachpb.Key(desc.EndKey).Prevish(5), testTxnID3), Timestamp: ts0}, + storage.MVCCKey{Key: desc.StartKey.AsRawKey(), Timestamp: ts}, // call this [2], referenced by locks + storage.MVCCKey{Key: roachpb.Key(desc.EndKey).Prevish(5), Timestamp: ts}, + ) + + locks := []storage.LockTableKey{ + { + Key: keys.RangeDescriptorKey(desc.StartKey), // mark [1] above as intent + Strength: lock.Exclusive, + TxnUUID: testTxnID.GetBytes(), + }, { + Key: desc.StartKey.AsRawKey(), // mark [2] above as intent + Strength: lock.Exclusive, + TxnUUID: testTxnID.GetBytes(), + }, } - return allKeys, replicatedKeys -} + rs = append(rs, storage.MVCCRangeKey{ // emitted last because we emit all point keys before range keys + StartKey: desc.StartKey.AsRawKey().Clone(), + EndKey: desc.EndKey.AsRawKey().Clone(), + Timestamp: ts, + }) -func verifyRDReplicatedOnlyMVCCIter( - t *testing.T, - desc *roachpb.RangeDescriptor, - eng storage.Engine, - expectedKeys []storage.MVCCKey, - expectedRangeKeys []storage.MVCCRangeKey, -) { - t.Helper() - verify := func(t *testing.T, useSpanSet, reverse bool) { - readWriter := eng.NewReadOnly(storage.StandardDurability) - defer readWriter.Close() - if useSpanSet { - var spans spanset.SpanSet - spans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{ - Key: keys.MakeRangeIDPrefix(desc.RangeID), - EndKey: keys.MakeRangeIDPrefix(desc.RangeID).PrefixEnd(), - }) - spans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{ - Key: keys.MakeRangeKeyPrefix(desc.StartKey), - EndKey: keys.MakeRangeKeyPrefix(desc.EndKey), - }) - spans.AddMVCC(spanset.SpanReadOnly, roachpb.Span{ - Key: desc.StartKey.AsRawKey(), - EndKey: desc.EndKey.AsRawKey(), - }, hlc.Timestamp{WallTime: 42}) - readWriter = spanset.NewReadWriterAt(readWriter, &spans, hlc.Timestamp{WallTime: 42}) - } - var rangeStart roachpb.Key - actualKeys := []storage.MVCCKey{} - actualRanges := []storage.MVCCRangeKey{} - err := IterateMVCCReplicaKeySpans(desc, readWriter, IterateOptions{ - CombineRangesAndPoints: false, - Reverse: reverse, - }, func(iter storage.MVCCIterator, span roachpb.Span, keyType storage.IterKeyType) error { - for { - ok, err := iter.Valid() - require.NoError(t, err) - if !ok { - break - } - p, r := iter.HasPointAndRange() - if p { - if !reverse { - actualKeys = append(actualKeys, iter.Key()) - } else { - actualKeys = append([]storage.MVCCKey{iter.Key()}, actualKeys...) - } - } - if r { - rangeKeys := iter.RangeKeys().Clone() - if !rangeKeys.Bounds.Key.Equal(rangeStart) { - rangeStart = rangeKeys.Bounds.Key - if !reverse { - for _, v := range rangeKeys.Versions { - actualRanges = append(actualRanges, rangeKeys.AsRangeKey(v)) - } - } else { - for i := rangeKeys.Len() - 1; i >= 0; i-- { - actualRanges = append([]storage.MVCCRangeKey{ - rangeKeys.AsRangeKey(rangeKeys.Versions[i]), - }, actualRanges...) - } - } - } - } - if reverse { - iter.Prev() - } else { - iter.Next() - } - } - return nil - }) - require.NoError(t, err, "visitor failed") - require.Equal(t, expectedKeys, actualKeys) - require.Equal(t, expectedRangeKeys, actualRanges) + for _, pk := range ps { + require.NoError(t, storage.MVCCPut(ctx, eng, nil, pk.Key, pk.Timestamp, localTS, value, nil)) } - testutils.RunTrueAndFalse(t, "reverse", func(t *testing.T, reverse bool) { - testutils.RunTrueAndFalse(t, "spanset", func(t *testing.T, useSpanSet bool) { - verify(t, useSpanSet, reverse) - }) - }) + for _, rk := range rs { + require.NoError(t, eng.PutMVCCRangeKey(rk, storage.MVCCValue{})) + } + for _, l := range locks { + sl, _ := l.ToEngineKey(nil) + require.NoError(t, eng.PutEngineKey(sl, []byte("fake lock"))) + } + + return ps, rs } // verifyIterateReplicaKeySpans verifies that IterateReplicaKeySpans returns the @@ -210,15 +144,23 @@ func verifyRDReplicatedOnlyMVCCIter( // or MVCCRangeKey. func verifyIterateReplicaKeySpans( t *testing.T, + tbl *tablewriter.Table, desc *roachpb.RangeDescriptor, eng storage.Engine, replicatedOnly bool, - expectedKeys []interface{}, ) { readWriter := eng.NewSnapshot() defer readWriter.Close() - actualKeys := []interface{}{} + tbl.SetAlignment(tablewriter.ALIGN_LEFT) + tbl.SetHeader([]string{ + "span", + "key_hex", + "endKey_hex", + "version_hex", + "pretty", + }) + require.NoError(t, IterateReplicaKeySpans(desc, readWriter, replicatedOnly, func(iter storage.EngineIterator, span roachpb.Span, keyType storage.IterKeyType) error { var err error @@ -226,17 +168,32 @@ func verifyIterateReplicaKeySpans( // Span should not be empty. require.NotZero(t, span) - // Key should be in the given span. key, err := iter.UnsafeEngineKey() require.NoError(t, err) - require.True(t, key.IsMVCCKey()) require.True(t, span.ContainsKey(key.Key), "%s not in %s", key, span) + require.True(t, key.IsLockTableKey() || key.IsMVCCKey(), "%s neither lock nor MVCC", key) switch keyType { case storage.IterKeyTypePointsOnly: - mvccKey, err := key.ToMVCCKey() - require.NoError(t, err) - actualKeys = append(actualKeys, mvccKey.Clone()) + var mvccKey storage.MVCCKey + if key.IsMVCCKey() { + var err error + mvccKey, err = key.ToMVCCKey() + require.NoError(t, err) + } else { // lock key + ltk, err := key.ToLockTableKey() + require.NoError(t, err) + mvccKey = storage.MVCCKey{ + Key: ltk.Key, + } + } + tbl.Append([]string{ + span.String(), + fmt.Sprintf("%x", key.Key), + "", + fmt.Sprintf("%x", key.Version), + mvccKey.String(), + }) case storage.IterKeyTypeRangesOnly: bounds, err := iter.EngineRangeBounds() @@ -245,10 +202,17 @@ func verifyIterateReplicaKeySpans( for _, rk := range iter.EngineRangeKeys() { ts, err := storage.DecodeMVCCTimestampSuffix(rk.Version) require.NoError(t, err) - actualKeys = append(actualKeys, storage.MVCCRangeKey{ + mvccRangeKey := storage.MVCCRangeKey{ StartKey: bounds.Key.Clone(), EndKey: bounds.EndKey.Clone(), Timestamp: ts, + } + tbl.Append([]string{ + span.String(), + fmt.Sprintf("%x", bounds.Key), + fmt.Sprintf("%x", bounds.EndKey), + fmt.Sprintf("%x", rk.Version), + mvccRangeKey.String(), }) } @@ -258,26 +222,6 @@ func verifyIterateReplicaKeySpans( } return err })) - require.Equal(t, expectedKeys, actualKeys) -} - -// TestReplicaDataIterator verifies correct operation of iterator if -// a range contains no data and never has. -func TestReplicaDataIteratorEmptyRange(t *testing.T) { - defer leaktest.AfterTest(t)() - - eng := storage.NewDefaultInMemForTesting() - defer eng.Close() - - desc := &roachpb.RangeDescriptor{ - RangeID: 12345, - StartKey: roachpb.RKey("a"), - EndKey: roachpb.RKey("z"), - } - - verifyRDReplicatedOnlyMVCCIter(t, desc, eng, []storage.MVCCKey{}, []storage.MVCCRangeKey{}) - verifyIterateReplicaKeySpans(t, desc, eng, false, []interface{}{}) - verifyIterateReplicaKeySpans(t, desc, eng, true, []interface{}{}) } // TestReplicaDataIterator creates three ranges (a-b, b-c, c-d) and fills each @@ -289,59 +233,163 @@ func TestReplicaDataIterator(t *testing.T) { eng := storage.NewDefaultInMemForTesting() defer eng.Close() - descs := []roachpb.RangeDescriptor{ + // Create test cases with test data for each descriptor. + testcases := []struct { + desc roachpb.RangeDescriptor + }{ + // TODO(tbg): add first and last range test here. { - RangeID: 1, - StartKey: roachpb.RKey("a"), - EndKey: roachpb.RKey("b"), + desc: roachpb.RangeDescriptor{ + RangeID: 1, + StartKey: roachpb.RKey("a"), + EndKey: roachpb.RKey("b"), + }, }, { - RangeID: 2, - StartKey: roachpb.RKey("b"), - EndKey: roachpb.RKey("c"), + desc: roachpb.RangeDescriptor{ + RangeID: 2, + StartKey: roachpb.RKey("b"), + EndKey: roachpb.RKey("c"), + }, }, { - RangeID: 3, - StartKey: roachpb.RKey("c"), - EndKey: roachpb.RKey("d"), + desc: roachpb.RangeDescriptor{ + RangeID: 3, + StartKey: roachpb.RKey("c"), + EndKey: roachpb.RKey("d"), + }, }, } - // Create test cases with test data for each descriptor. - testcases := make([]struct { - desc roachpb.RangeDescriptor - allKeys, replicatedKeys []interface{} // mixed MVCCKey and MVCCRangeKey - }, len(descs)) - for i := range testcases { - testcases[i].desc = descs[i] - testcases[i].allKeys, testcases[i].replicatedKeys = createRangeData(t, eng, descs[i]) + createRangeData(t, eng, testcases[i].desc) } // Run tests. + path := datapathutils.TestDataPath(t, t.Name()) for _, tc := range testcases { - t.Run(tc.desc.RSpan().String(), func(t *testing.T) { - - // Verify the replicated and unreplicated engine contents. - verifyIterateReplicaKeySpans(t, &tc.desc, eng, false, tc.allKeys) - verifyIterateReplicaKeySpans(t, &tc.desc, eng, true, tc.replicatedKeys) - - // Verify the replicated MVCC contents. - var pointKeys []storage.MVCCKey - var rangeKeys []storage.MVCCRangeKey - for _, key := range tc.replicatedKeys { - if pointKey, ok := key.(storage.MVCCKey); ok { - pointKeys = append(pointKeys, pointKey) - } else if rangeKey, ok := key.(storage.MVCCRangeKey); ok { - // TODO(oleg): This is very naive and only works if keys don't overlap. - // Needs some thinking on how could we use this if ranges become - // fragmented. - rangeKeys = append(rangeKeys, rangeKey) + parName := fmt.Sprintf("r%d", tc.desc.RangeID) + t.Run(parName, func(t *testing.T) { + testutils.RunTrueAndFalse(t, "replicatedOnly", func(t *testing.T, replicatedOnly bool) { + name := "all" + if replicatedOnly { + name = "replicatedOnly" + } + w := echotest.NewWalker(t, filepath.Join(path, parName, name)) + + w.Run(t, "output", func(t *testing.T) string { + var innerBuf strings.Builder + tbl := tablewriter.NewWriter(&innerBuf) + // Print contents of the Replica according to the iterator. + verifyIterateReplicaKeySpans(t, tbl, &tc.desc, eng, replicatedOnly) + + tbl.Render() + return innerBuf.String() + })(t) + }) + }) + } +} + +func TestIterateMVCCReplicaKeySpansSpansSet(t *testing.T) { + defer leaktest.AfterTest(t)() + + eng := storage.NewDefaultInMemForTesting() + defer eng.Close() + + desc := roachpb.RangeDescriptor{ + RangeID: 1, + StartKey: roachpb.RKey("a"), + EndKey: roachpb.RKey("b"), + } + + createRangeData(t, eng, desc) + + // Verify that we're getting identical replicated MVCC contents across + // multiple ways to scan. We already have TestReplicaDataIterator above + // to show that the output is sane so here we only have to verify every + // way to scan gives the same result. + // + // TODO(oleg): This is very naive and only works if keys don't overlap. + // Needs some thinking on how could we use this if ranges become + // fragmented. + // + get := func(t *testing.T, useSpanSet, reverse bool) ([]storage.MVCCKey, []storage.MVCCRangeKey) { + readWriter := eng.NewReadOnly(storage.StandardDurability) + defer readWriter.Close() + if useSpanSet { + var spans spanset.SpanSet + spans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{ + Key: keys.MakeRangeIDPrefix(desc.RangeID), + EndKey: keys.MakeRangeIDPrefix(desc.RangeID).PrefixEnd(), + }) + spans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{ + Key: keys.MakeRangeKeyPrefix(desc.StartKey), + EndKey: keys.MakeRangeKeyPrefix(desc.EndKey), + }) + spans.AddMVCC(spanset.SpanReadOnly, roachpb.Span{ + Key: desc.StartKey.AsRawKey(), + EndKey: desc.EndKey.AsRawKey(), + }, hlc.Timestamp{WallTime: 42}) + readWriter = spanset.NewReadWriterAt(readWriter, &spans, hlc.Timestamp{WallTime: 42}) + } + var rangeStart roachpb.Key + var actualKeys []storage.MVCCKey + var actualRanges []storage.MVCCRangeKey + err := IterateMVCCReplicaKeySpans(&desc, readWriter, IterateOptions{ + CombineRangesAndPoints: false, + Reverse: reverse, + }, func(iter storage.MVCCIterator, span roachpb.Span, keyType storage.IterKeyType) error { + for { + ok, err := iter.Valid() + require.NoError(t, err) + if !ok { + break + } + p, r := iter.HasPointAndRange() + if p { + if !reverse { + actualKeys = append(actualKeys, iter.Key()) + } else { + actualKeys = append([]storage.MVCCKey{iter.Key()}, actualKeys...) + } + } + if r { + rangeKeys := iter.RangeKeys().Clone() + if !rangeKeys.Bounds.Key.Equal(rangeStart) { + rangeStart = rangeKeys.Bounds.Key + if !reverse { + for _, v := range rangeKeys.Versions { + actualRanges = append(actualRanges, rangeKeys.AsRangeKey(v)) + } + } else { + for i := rangeKeys.Len() - 1; i >= 0; i-- { + actualRanges = append([]storage.MVCCRangeKey{ + rangeKeys.AsRangeKey(rangeKeys.Versions[i]), + }, actualRanges...) + } + } + } + } + if reverse { + iter.Prev() + } else { + iter.Next() } } - verifyRDReplicatedOnlyMVCCIter(t, &tc.desc, eng, pointKeys, rangeKeys) + return nil }) + require.NoError(t, err, "visitor failed") + return actualKeys, actualRanges } + goldenActualKeys, goldenActualRanges := get(t, false /* useSpanSet */, false /* reverse */) + testutils.RunTrueAndFalse(t, "reverse", func(t *testing.T, reverse bool) { + testutils.RunTrueAndFalse(t, "spanset", func(t *testing.T, useSpanSet bool) { + actualKeys, actualRanges := get(t, useSpanSet, reverse) + require.Equal(t, goldenActualKeys, actualKeys) + require.Equal(t, goldenActualRanges, actualRanges) + }) + }) } func checkOrdering(t *testing.T, spans []roachpb.Span) { @@ -397,7 +445,7 @@ func TestReplicaDataIteratorGlobalRangeKey(t *testing.T) { if replicatedOnly { expectedSpans = MakeReplicatedKeySpans(&desc) } else { - expectedSpans = MakeAllKeySpans(&desc) + expectedSpans = makeAllKeySpans(&desc) } var actualSpans []roachpb.Span @@ -433,10 +481,9 @@ func TestReplicaKeyRanges(t *testing.T) { StartKey: roachpb.RKeyMin, EndKey: roachpb.RKeyMax, } - checkOrdering(t, MakeAllKeySpans(&desc)) + checkOrdering(t, makeAllKeySpans(&desc)) checkOrdering(t, MakeReplicatedKeySpans(&desc)) - checkOrdering(t, MakeReplicatedKeySpansExceptLockTable(&desc)) - checkOrdering(t, MakeReplicatedKeySpansExceptRangeID(&desc)) + checkOrdering(t, makeReplicatedKeySpansExceptLockTable(&desc)) } func BenchmarkReplicaEngineDataIterator(b *testing.B) { @@ -488,7 +535,7 @@ func benchReplicaEngineDataIterator(b *testing.B, numRanges, numKeysPerRange, va for _, desc := range descs { var keyBuf roachpb.Key - keySpans := MakeAllKeySpans(&desc) + keySpans := makeAllKeySpans(&desc) for i := 0; i < numKeysPerRange; i++ { keyBuf = append(keyBuf[:0], keySpans[i%len(keySpans)].Key...) keyBuf = append(keyBuf, 0, 0, 0, 0) @@ -604,7 +651,7 @@ func TestIterateMVCCReplicaKeySpans(t *testing.T) { testutils.RunTrueAndFalse(t, "reverse", func(t *testing.T, reverse bool) { // Each iteration would go through all spans because we usa a single // range key covering all spans of all ranges. - expectedSpans := MakeReplicatedKeySpansExceptLockTable(&d.desc) + expectedSpans := makeReplicatedKeySpansExceptLockTable(&d.desc) if reverse { for i, j := 0, len(expectedSpans)-1; i < j; i, j = i+1, j-1 { expectedSpans[i], expectedSpans[j] = expectedSpans[j], expectedSpans[i] diff --git a/pkg/kv/kvserver/rditer/select.go b/pkg/kv/kvserver/rditer/select.go new file mode 100644 index 000000000000..87f2f310f944 --- /dev/null +++ b/pkg/kv/kvserver/rditer/select.go @@ -0,0 +1,86 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package rditer + +import ( + "github.com/cockroachdb/cockroach/pkg/keys" + "github.com/cockroachdb/cockroach/pkg/roachpb" +) + +// SelectOpts configures which spans for a Replica to return from Select. +// A Replica comprises replicated (i.e. belonging to the state machine) spans +// and unreplicated spans, and depending on external circumstances one may want +// to read or erase only certain components of a Replica. +type SelectOpts struct { + // ReplicatedBySpan selects all replicated key Spans that are keyed by a user + // key. This includes user keys, range descriptors, and locks (separated + // intents). + ReplicatedBySpan roachpb.RSpan + // ReplicatedByRangeID selects all RangeID-keyed replicated keys. An example + // of a key that falls into this Span is the GCThresholdKey. + ReplicatedByRangeID bool + // UnreplicatedByRangeID selects all RangeID-keyed unreplicated keys. Examples + // of keys that fall into this Span are the HardStateKey (and generally all + // Raft state) and the RangeTombstoneKey. + UnreplicatedByRangeID bool +} + +// Select returns a slice of disjoint sorted[^1] Spans describing all or a part +// of a Replica's keyspace, depending on the supplied SelectOpts. +// +// [^1]: lexicographically (bytes.Compare), which means they are compatible with +// pebble's CockroachDB-specific sort order (storage.EngineComparer). +func Select(rangeID roachpb.RangeID, opts SelectOpts) []roachpb.Span { + var sl []roachpb.Span + + if opts.ReplicatedByRangeID { + sl = append(sl, makeRangeIDReplicatedSpan(rangeID)) + } + + if opts.UnreplicatedByRangeID { + sl = append(sl, makeRangeIDUnreplicatedSpan(rangeID)) + } + + if !opts.ReplicatedBySpan.Equal(roachpb.RSpan{}) { + // r1 "really" only starts at LocalMax. But because we use a StartKey of RKeyMin + // for r1, we actually do anchor range descriptors (and their locks and txn records) + // at RKeyMin as well. On the other hand, the "user key space" cannot start at RKeyMin + // because then it encompasses the special-cased prefix \x02... (/Local/). + // So awkwardly for key-based local keyspace we must not call KeySpan, for + // user keys we have to. + // + // See also the comment on KeySpan. + in := opts.ReplicatedBySpan + adjustedIn := in.KeySpan() + sl = append(sl, makeRangeLocalKeySpan(in)) + + // Lock table. + { + // Handle doubly-local lock table keys since range descriptor key + // is a range local key that can have a replicated lock acquired on it. + startRangeLocal, _ := keys.LockTableSingleKey(keys.MakeRangeKeyPrefix(in.Key), nil) + endRangeLocal, _ := keys.LockTableSingleKey(keys.MakeRangeKeyPrefix(in.EndKey), nil) + // Need adjusted start key to avoid overlapping with the local lock span right above. + startGlobal, _ := keys.LockTableSingleKey(adjustedIn.Key.AsRawKey(), nil) + endGlobal, _ := keys.LockTableSingleKey(adjustedIn.EndKey.AsRawKey(), nil) + sl = append(sl, roachpb.Span{ + Key: startRangeLocal, + EndKey: endRangeLocal, + }, roachpb.Span{ + Key: startGlobal, + EndKey: endGlobal, + }) + } + // Adjusted span because r1's "normal" keyspace starts only at LocalMax, not RKeyMin. + sl = append(sl, adjustedIn.AsRawSpanWithNoLocals()) + } + return sl +} diff --git a/pkg/kv/kvserver/rditer/select_test.go b/pkg/kv/kvserver/rditer/select_test.go new file mode 100644 index 000000000000..4eb08942e3a0 --- /dev/null +++ b/pkg/kv/kvserver/rditer/select_test.go @@ -0,0 +1,83 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package rditer + +import ( + "bytes" + "fmt" + "sort" + "strings" + "testing" + + "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/testutils/datapathutils" + "github.com/cockroachdb/cockroach/pkg/testutils/echotest" + "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/stretchr/testify/assert" +) + +func TestSelect(t *testing.T) { + defer leaktest.AfterTest(t)() + + w := echotest.NewWalker(t, datapathutils.TestDataPath(t, t.Name())) + for _, tc := range []struct { + name string + sp roachpb.RSpan + }{ + { + name: "no_span", + }, + { + name: "r1", + sp: roachpb.RSpan{ + // r1 is special - see https://github.com/cockroachdb/cockroach/issues/95055. + Key: roachpb.RKeyMin, + EndKey: roachpb.RKey("c"), + }, + }, + { + name: "r2", + sp: roachpb.RSpan{ + Key: roachpb.RKey("a"), + EndKey: roachpb.RKey("c"), + }, + }, + { + name: "r3", + sp: roachpb.RSpan{ + Key: roachpb.RKey("a"), + EndKey: roachpb.RKeyMax, + }, + }, + } { + t.Run(tc.name, w.Run(t, tc.name, func(t *testing.T) string { + var buf strings.Builder + for _, replicatedByRangeID := range []bool{false, true} { + for _, unreplicatedByRangeID := range []bool{false, true} { + opts := SelectOpts{ + ReplicatedBySpan: tc.sp, + ReplicatedByRangeID: replicatedByRangeID, + UnreplicatedByRangeID: unreplicatedByRangeID, + } + fmt.Fprintf(&buf, "Select(%+v):\n", opts) + sl := Select(roachpb.RangeID(123), opts) + assert.True(t, sort.SliceIsSorted(sl, func(i, j int) bool { + return bytes.Compare(sl[i].EndKey, sl[j].Key) < 0 + })) + for _, sp := range sl { + fmt.Fprintf(&buf, " %s\n", sp) + } + } + } + return buf.String() + })) + } +} diff --git a/pkg/kv/kvserver/rditer/stats.go b/pkg/kv/kvserver/rditer/stats.go index 51fbf9a28f36..c32e66d02a70 100644 --- a/pkg/kv/kvserver/rditer/stats.go +++ b/pkg/kv/kvserver/rditer/stats.go @@ -34,7 +34,7 @@ func ComputeStatsForRangeWithVisitors( rangeKeyVisitor func(storage.MVCCRangeKeyValue) error, ) (enginepb.MVCCStats, error) { var ms enginepb.MVCCStats - for _, keySpan := range MakeReplicatedKeySpansExceptLockTable(d) { + for _, keySpan := range makeReplicatedKeySpansExceptLockTable(d) { msDelta, err := storage.ComputeStatsWithVisitors(reader, keySpan.Key, keySpan.EndKey, nowNanos, pointKeyVisitor, rangeKeyVisitor) if err != nil { diff --git a/pkg/kv/kvserver/rditer/testdata/TestReplicaDataIterator/r1/all/output b/pkg/kv/kvserver/rditer/testdata/TestReplicaDataIterator/r1/all/output new file mode 100644 index 000000000000..7518c3c402af --- /dev/null +++ b/pkg/kv/kvserver/rditer/testdata/TestReplicaDataIterator/r1/all/output @@ -0,0 +1,27 @@ +echo +---- ++----------------------------------------+--------------------------------------------------------------+--------------+------------------------------------+------------------------------------------------------------------------------------+ +| SPAN | KEY HEX | ENDKEY HEX | VERSION HEX | PRETTY | ++----------------------------------------+--------------------------------------------------------------+--------------+------------------------------------+------------------------------------------------------------------------------------+ +| /Local/RangeID/1/{r""-s""} | 016989726162632d120ce61c175eb445878c36dcf4062ada4c0001 | | | /Local/RangeID/1/r/AbortSpan/"0ce61c17-5eb4-4587-8c36-dcf4062ada4c" | +| /Local/RangeID/1/{r""-s""} | 016989726162632d129855a1ef8eb94c06a106cab1dda78a2b0001 | | | /Local/RangeID/1/r/AbortSpan/"9855a1ef-8eb9-4c06-a106-cab1dda78a2b" | +| /Local/RangeID/1/{r""-s""} | 016989726c67632d | | | /Local/RangeID/1/r/RangeGCThreshold | +| /Local/RangeID/1/{r""-s""} | 016989727261736b | | | /Local/RangeID/1/r/RangeAppliedState | +| /Local/RangeID/1/{r""-s""} | 01698972726c6c2d | | | /Local/RangeID/1/r/RangeLease | +| /Local/RangeID/1/{r""-s""} | 016989723a61 | 016989723a78 | 000000000000000109 | /Local/RangeID/1/r":{a"-x"}/0.000000001,0 | +| /Local/RangeID/1/{u""-v""} | 0169897572667462 | | | /Local/RangeID/1/u/RangeTombstone | +| /Local/RangeID/1/{u""-v""} | 0169897572667468 | | | /Local/RangeID/1/u/RaftHardState | +| /Local/RangeID/1/{u""-v""} | 016989757266746c0000000000000001 | | | /Local/RangeID/1/u/RaftLog/logIndex:1 | +| /Local/RangeID/1/{u""-v""} | 016989757266746c0000000000000002 | | | /Local/RangeID/1/u/RaftLog/logIndex:2 | +| /Local/RangeID/1/{u""-v""} | 01698975726c7274 | | | /Local/RangeID/1/u/RangeLastReplicaGCTimestamp | +| /Local/RangeID/1/{u""-v""} | 016989753a61 | 016989753a78 | 000000000000000109 | /Local/RangeID/1/u":{a"-x"}/0.000000001,0 | +| /Local/Range"{a"-b"} | 016b1261000172647363 | | 0000000000000001 | /Local/Range"a"/RangeDescriptor/0.000000001,0 | +| /Local/Range"{a"-b"} | 016b1261000174786e2d0ce61c175eb445878c36dcf4062ada4c | | | /Local/Range"a"/Transaction/"0ce61c17-5eb4-4587-8c36-dcf4062ada4c" | +| /Local/Range"{a"-b"} | 016b126100ff000174786e2d9855a1ef8eb94c06a106cab1dda78a2b | | | /Local/Range"a\x00"/Transaction/"9855a1ef-8eb9-4c06-a106-cab1dda78a2b" | +| /Local/Range"{a"-b"} | 016b1261ffffffff000174786e2d295e727c8ca9437cbb5e8e2ebbad996f | | | /Local/Range"a\xff\xff\xff\xff"/Transaction/"295e727c-8ca9-437c-bb5e-8e2ebbad996f" | +| /Local/Lock/Intent/Local/Range"{a"-b"} | 017a6b12016b126100ff01726473630001 | | 030ce61c175eb445878c36dcf4062ada4c | /Local/Range"a"/RangeDescriptor | +| /Local/Lock/Intent"{a"-b"} | 017a6b12610001 | | 030ce61c175eb445878c36dcf4062ada4c | "a" | +| {a-b} | 61 | | 0000000000000001 | "a"/0.000000001,0 | +| {a-b} | 61ffffffff | | 0000000000000001 | "a\xff\xff\xff\xff"/0.000000001,0 | +| {a-b} | 61 | 62 | 000000000000000109 | {a-b}/0.000000001,0 | ++----------------------------------------+--------------------------------------------------------------+--------------+------------------------------------+------------------------------------------------------------------------------------+ diff --git a/pkg/kv/kvserver/rditer/testdata/TestReplicaDataIterator/r1/replicatedOnly/output b/pkg/kv/kvserver/rditer/testdata/TestReplicaDataIterator/r1/replicatedOnly/output new file mode 100644 index 000000000000..ee483e36a852 --- /dev/null +++ b/pkg/kv/kvserver/rditer/testdata/TestReplicaDataIterator/r1/replicatedOnly/output @@ -0,0 +1,21 @@ +echo +---- ++----------------------------------------+--------------------------------------------------------------+--------------+------------------------------------+------------------------------------------------------------------------------------+ +| SPAN | KEY HEX | ENDKEY HEX | VERSION HEX | PRETTY | ++----------------------------------------+--------------------------------------------------------------+--------------+------------------------------------+------------------------------------------------------------------------------------+ +| /Local/RangeID/1/{r""-s""} | 016989726162632d120ce61c175eb445878c36dcf4062ada4c0001 | | | /Local/RangeID/1/r/AbortSpan/"0ce61c17-5eb4-4587-8c36-dcf4062ada4c" | +| /Local/RangeID/1/{r""-s""} | 016989726162632d129855a1ef8eb94c06a106cab1dda78a2b0001 | | | /Local/RangeID/1/r/AbortSpan/"9855a1ef-8eb9-4c06-a106-cab1dda78a2b" | +| /Local/RangeID/1/{r""-s""} | 016989726c67632d | | | /Local/RangeID/1/r/RangeGCThreshold | +| /Local/RangeID/1/{r""-s""} | 016989727261736b | | | /Local/RangeID/1/r/RangeAppliedState | +| /Local/RangeID/1/{r""-s""} | 01698972726c6c2d | | | /Local/RangeID/1/r/RangeLease | +| /Local/RangeID/1/{r""-s""} | 016989723a61 | 016989723a78 | 000000000000000109 | /Local/RangeID/1/r":{a"-x"}/0.000000001,0 | +| /Local/Range"{a"-b"} | 016b1261000172647363 | | 0000000000000001 | /Local/Range"a"/RangeDescriptor/0.000000001,0 | +| /Local/Range"{a"-b"} | 016b1261000174786e2d0ce61c175eb445878c36dcf4062ada4c | | | /Local/Range"a"/Transaction/"0ce61c17-5eb4-4587-8c36-dcf4062ada4c" | +| /Local/Range"{a"-b"} | 016b126100ff000174786e2d9855a1ef8eb94c06a106cab1dda78a2b | | | /Local/Range"a\x00"/Transaction/"9855a1ef-8eb9-4c06-a106-cab1dda78a2b" | +| /Local/Range"{a"-b"} | 016b1261ffffffff000174786e2d295e727c8ca9437cbb5e8e2ebbad996f | | | /Local/Range"a\xff\xff\xff\xff"/Transaction/"295e727c-8ca9-437c-bb5e-8e2ebbad996f" | +| /Local/Lock/Intent/Local/Range"{a"-b"} | 017a6b12016b126100ff01726473630001 | | 030ce61c175eb445878c36dcf4062ada4c | /Local/Range"a"/RangeDescriptor | +| /Local/Lock/Intent"{a"-b"} | 017a6b12610001 | | 030ce61c175eb445878c36dcf4062ada4c | "a" | +| {a-b} | 61 | | 0000000000000001 | "a"/0.000000001,0 | +| {a-b} | 61ffffffff | | 0000000000000001 | "a\xff\xff\xff\xff"/0.000000001,0 | +| {a-b} | 61 | 62 | 000000000000000109 | {a-b}/0.000000001,0 | ++----------------------------------------+--------------------------------------------------------------+--------------+------------------------------------+------------------------------------------------------------------------------------+ diff --git a/pkg/kv/kvserver/rditer/testdata/TestReplicaDataIterator/r2/all/output b/pkg/kv/kvserver/rditer/testdata/TestReplicaDataIterator/r2/all/output new file mode 100644 index 000000000000..6f9d73c0aed2 --- /dev/null +++ b/pkg/kv/kvserver/rditer/testdata/TestReplicaDataIterator/r2/all/output @@ -0,0 +1,27 @@ +echo +---- ++----------------------------------------+--------------------------------------------------------------+--------------+------------------------------------+------------------------------------------------------------------------------------+ +| SPAN | KEY HEX | ENDKEY HEX | VERSION HEX | PRETTY | ++----------------------------------------+--------------------------------------------------------------+--------------+------------------------------------+------------------------------------------------------------------------------------+ +| /Local/RangeID/2/{r""-s""} | 01698a726162632d120ce61c175eb445878c36dcf4062ada4c0001 | | | /Local/RangeID/2/r/AbortSpan/"0ce61c17-5eb4-4587-8c36-dcf4062ada4c" | +| /Local/RangeID/2/{r""-s""} | 01698a726162632d129855a1ef8eb94c06a106cab1dda78a2b0001 | | | /Local/RangeID/2/r/AbortSpan/"9855a1ef-8eb9-4c06-a106-cab1dda78a2b" | +| /Local/RangeID/2/{r""-s""} | 01698a726c67632d | | | /Local/RangeID/2/r/RangeGCThreshold | +| /Local/RangeID/2/{r""-s""} | 01698a727261736b | | | /Local/RangeID/2/r/RangeAppliedState | +| /Local/RangeID/2/{r""-s""} | 01698a72726c6c2d | | | /Local/RangeID/2/r/RangeLease | +| /Local/RangeID/2/{r""-s""} | 01698a723a61 | 01698a723a78 | 000000000000000109 | /Local/RangeID/2/r":{a"-x"}/0.000000001,0 | +| /Local/RangeID/2/{u""-v""} | 01698a7572667462 | | | /Local/RangeID/2/u/RangeTombstone | +| /Local/RangeID/2/{u""-v""} | 01698a7572667468 | | | /Local/RangeID/2/u/RaftHardState | +| /Local/RangeID/2/{u""-v""} | 01698a757266746c0000000000000001 | | | /Local/RangeID/2/u/RaftLog/logIndex:1 | +| /Local/RangeID/2/{u""-v""} | 01698a757266746c0000000000000002 | | | /Local/RangeID/2/u/RaftLog/logIndex:2 | +| /Local/RangeID/2/{u""-v""} | 01698a75726c7274 | | | /Local/RangeID/2/u/RangeLastReplicaGCTimestamp | +| /Local/RangeID/2/{u""-v""} | 01698a753a61 | 01698a753a78 | 000000000000000109 | /Local/RangeID/2/u":{a"-x"}/0.000000001,0 | +| /Local/Range"{b"-c"} | 016b1262000172647363 | | 0000000000000001 | /Local/Range"b"/RangeDescriptor/0.000000001,0 | +| /Local/Range"{b"-c"} | 016b1262000174786e2d0ce61c175eb445878c36dcf4062ada4c | | | /Local/Range"b"/Transaction/"0ce61c17-5eb4-4587-8c36-dcf4062ada4c" | +| /Local/Range"{b"-c"} | 016b126200ff000174786e2d9855a1ef8eb94c06a106cab1dda78a2b | | | /Local/Range"b\x00"/Transaction/"9855a1ef-8eb9-4c06-a106-cab1dda78a2b" | +| /Local/Range"{b"-c"} | 016b1262ffffffff000174786e2d295e727c8ca9437cbb5e8e2ebbad996f | | | /Local/Range"b\xff\xff\xff\xff"/Transaction/"295e727c-8ca9-437c-bb5e-8e2ebbad996f" | +| /Local/Lock/Intent/Local/Range"{b"-c"} | 017a6b12016b126200ff01726473630001 | | 030ce61c175eb445878c36dcf4062ada4c | /Local/Range"b"/RangeDescriptor | +| /Local/Lock/Intent"{b"-c"} | 017a6b12620001 | | 030ce61c175eb445878c36dcf4062ada4c | "b" | +| {b-c} | 62 | | 0000000000000001 | "b"/0.000000001,0 | +| {b-c} | 62ffffffff | | 0000000000000001 | "b\xff\xff\xff\xff"/0.000000001,0 | +| {b-c} | 62 | 63 | 000000000000000109 | {b-c}/0.000000001,0 | ++----------------------------------------+--------------------------------------------------------------+--------------+------------------------------------+------------------------------------------------------------------------------------+ diff --git a/pkg/kv/kvserver/rditer/testdata/TestReplicaDataIterator/r2/replicatedOnly/output b/pkg/kv/kvserver/rditer/testdata/TestReplicaDataIterator/r2/replicatedOnly/output new file mode 100644 index 000000000000..79b044b8313f --- /dev/null +++ b/pkg/kv/kvserver/rditer/testdata/TestReplicaDataIterator/r2/replicatedOnly/output @@ -0,0 +1,21 @@ +echo +---- ++----------------------------------------+--------------------------------------------------------------+--------------+------------------------------------+------------------------------------------------------------------------------------+ +| SPAN | KEY HEX | ENDKEY HEX | VERSION HEX | PRETTY | ++----------------------------------------+--------------------------------------------------------------+--------------+------------------------------------+------------------------------------------------------------------------------------+ +| /Local/RangeID/2/{r""-s""} | 01698a726162632d120ce61c175eb445878c36dcf4062ada4c0001 | | | /Local/RangeID/2/r/AbortSpan/"0ce61c17-5eb4-4587-8c36-dcf4062ada4c" | +| /Local/RangeID/2/{r""-s""} | 01698a726162632d129855a1ef8eb94c06a106cab1dda78a2b0001 | | | /Local/RangeID/2/r/AbortSpan/"9855a1ef-8eb9-4c06-a106-cab1dda78a2b" | +| /Local/RangeID/2/{r""-s""} | 01698a726c67632d | | | /Local/RangeID/2/r/RangeGCThreshold | +| /Local/RangeID/2/{r""-s""} | 01698a727261736b | | | /Local/RangeID/2/r/RangeAppliedState | +| /Local/RangeID/2/{r""-s""} | 01698a72726c6c2d | | | /Local/RangeID/2/r/RangeLease | +| /Local/RangeID/2/{r""-s""} | 01698a723a61 | 01698a723a78 | 000000000000000109 | /Local/RangeID/2/r":{a"-x"}/0.000000001,0 | +| /Local/Range"{b"-c"} | 016b1262000172647363 | | 0000000000000001 | /Local/Range"b"/RangeDescriptor/0.000000001,0 | +| /Local/Range"{b"-c"} | 016b1262000174786e2d0ce61c175eb445878c36dcf4062ada4c | | | /Local/Range"b"/Transaction/"0ce61c17-5eb4-4587-8c36-dcf4062ada4c" | +| /Local/Range"{b"-c"} | 016b126200ff000174786e2d9855a1ef8eb94c06a106cab1dda78a2b | | | /Local/Range"b\x00"/Transaction/"9855a1ef-8eb9-4c06-a106-cab1dda78a2b" | +| /Local/Range"{b"-c"} | 016b1262ffffffff000174786e2d295e727c8ca9437cbb5e8e2ebbad996f | | | /Local/Range"b\xff\xff\xff\xff"/Transaction/"295e727c-8ca9-437c-bb5e-8e2ebbad996f" | +| /Local/Lock/Intent/Local/Range"{b"-c"} | 017a6b12016b126200ff01726473630001 | | 030ce61c175eb445878c36dcf4062ada4c | /Local/Range"b"/RangeDescriptor | +| /Local/Lock/Intent"{b"-c"} | 017a6b12620001 | | 030ce61c175eb445878c36dcf4062ada4c | "b" | +| {b-c} | 62 | | 0000000000000001 | "b"/0.000000001,0 | +| {b-c} | 62ffffffff | | 0000000000000001 | "b\xff\xff\xff\xff"/0.000000001,0 | +| {b-c} | 62 | 63 | 000000000000000109 | {b-c}/0.000000001,0 | ++----------------------------------------+--------------------------------------------------------------+--------------+------------------------------------+------------------------------------------------------------------------------------+ diff --git a/pkg/kv/kvserver/rditer/testdata/TestReplicaDataIterator/r3/all/output b/pkg/kv/kvserver/rditer/testdata/TestReplicaDataIterator/r3/all/output new file mode 100644 index 000000000000..4899536a96d1 --- /dev/null +++ b/pkg/kv/kvserver/rditer/testdata/TestReplicaDataIterator/r3/all/output @@ -0,0 +1,27 @@ +echo +---- ++----------------------------------------+--------------------------------------------------------------+--------------+------------------------------------+------------------------------------------------------------------------------------+ +| SPAN | KEY HEX | ENDKEY HEX | VERSION HEX | PRETTY | ++----------------------------------------+--------------------------------------------------------------+--------------+------------------------------------+------------------------------------------------------------------------------------+ +| /Local/RangeID/3/{r""-s""} | 01698b726162632d120ce61c175eb445878c36dcf4062ada4c0001 | | | /Local/RangeID/3/r/AbortSpan/"0ce61c17-5eb4-4587-8c36-dcf4062ada4c" | +| /Local/RangeID/3/{r""-s""} | 01698b726162632d129855a1ef8eb94c06a106cab1dda78a2b0001 | | | /Local/RangeID/3/r/AbortSpan/"9855a1ef-8eb9-4c06-a106-cab1dda78a2b" | +| /Local/RangeID/3/{r""-s""} | 01698b726c67632d | | | /Local/RangeID/3/r/RangeGCThreshold | +| /Local/RangeID/3/{r""-s""} | 01698b727261736b | | | /Local/RangeID/3/r/RangeAppliedState | +| /Local/RangeID/3/{r""-s""} | 01698b72726c6c2d | | | /Local/RangeID/3/r/RangeLease | +| /Local/RangeID/3/{r""-s""} | 01698b723a61 | 01698b723a78 | 000000000000000109 | /Local/RangeID/3/r":{a"-x"}/0.000000001,0 | +| /Local/RangeID/3/{u""-v""} | 01698b7572667462 | | | /Local/RangeID/3/u/RangeTombstone | +| /Local/RangeID/3/{u""-v""} | 01698b7572667468 | | | /Local/RangeID/3/u/RaftHardState | +| /Local/RangeID/3/{u""-v""} | 01698b757266746c0000000000000001 | | | /Local/RangeID/3/u/RaftLog/logIndex:1 | +| /Local/RangeID/3/{u""-v""} | 01698b757266746c0000000000000002 | | | /Local/RangeID/3/u/RaftLog/logIndex:2 | +| /Local/RangeID/3/{u""-v""} | 01698b75726c7274 | | | /Local/RangeID/3/u/RangeLastReplicaGCTimestamp | +| /Local/RangeID/3/{u""-v""} | 01698b753a61 | 01698b753a78 | 000000000000000109 | /Local/RangeID/3/u":{a"-x"}/0.000000001,0 | +| /Local/Range"{c"-d"} | 016b1263000172647363 | | 0000000000000001 | /Local/Range"c"/RangeDescriptor/0.000000001,0 | +| /Local/Range"{c"-d"} | 016b1263000174786e2d0ce61c175eb445878c36dcf4062ada4c | | | /Local/Range"c"/Transaction/"0ce61c17-5eb4-4587-8c36-dcf4062ada4c" | +| /Local/Range"{c"-d"} | 016b126300ff000174786e2d9855a1ef8eb94c06a106cab1dda78a2b | | | /Local/Range"c\x00"/Transaction/"9855a1ef-8eb9-4c06-a106-cab1dda78a2b" | +| /Local/Range"{c"-d"} | 016b1263ffffffff000174786e2d295e727c8ca9437cbb5e8e2ebbad996f | | | /Local/Range"c\xff\xff\xff\xff"/Transaction/"295e727c-8ca9-437c-bb5e-8e2ebbad996f" | +| /Local/Lock/Intent/Local/Range"{c"-d"} | 017a6b12016b126300ff01726473630001 | | 030ce61c175eb445878c36dcf4062ada4c | /Local/Range"c"/RangeDescriptor | +| /Local/Lock/Intent"{c"-d"} | 017a6b12630001 | | 030ce61c175eb445878c36dcf4062ada4c | "c" | +| {c-d} | 63 | | 0000000000000001 | "c"/0.000000001,0 | +| {c-d} | 63ffffffff | | 0000000000000001 | "c\xff\xff\xff\xff"/0.000000001,0 | +| {c-d} | 63 | 64 | 000000000000000109 | {c-d}/0.000000001,0 | ++----------------------------------------+--------------------------------------------------------------+--------------+------------------------------------+------------------------------------------------------------------------------------+ diff --git a/pkg/kv/kvserver/rditer/testdata/TestReplicaDataIterator/r3/replicatedOnly/output b/pkg/kv/kvserver/rditer/testdata/TestReplicaDataIterator/r3/replicatedOnly/output new file mode 100644 index 000000000000..b2ac4cfd3b59 --- /dev/null +++ b/pkg/kv/kvserver/rditer/testdata/TestReplicaDataIterator/r3/replicatedOnly/output @@ -0,0 +1,21 @@ +echo +---- ++----------------------------------------+--------------------------------------------------------------+--------------+------------------------------------+------------------------------------------------------------------------------------+ +| SPAN | KEY HEX | ENDKEY HEX | VERSION HEX | PRETTY | ++----------------------------------------+--------------------------------------------------------------+--------------+------------------------------------+------------------------------------------------------------------------------------+ +| /Local/RangeID/3/{r""-s""} | 01698b726162632d120ce61c175eb445878c36dcf4062ada4c0001 | | | /Local/RangeID/3/r/AbortSpan/"0ce61c17-5eb4-4587-8c36-dcf4062ada4c" | +| /Local/RangeID/3/{r""-s""} | 01698b726162632d129855a1ef8eb94c06a106cab1dda78a2b0001 | | | /Local/RangeID/3/r/AbortSpan/"9855a1ef-8eb9-4c06-a106-cab1dda78a2b" | +| /Local/RangeID/3/{r""-s""} | 01698b726c67632d | | | /Local/RangeID/3/r/RangeGCThreshold | +| /Local/RangeID/3/{r""-s""} | 01698b727261736b | | | /Local/RangeID/3/r/RangeAppliedState | +| /Local/RangeID/3/{r""-s""} | 01698b72726c6c2d | | | /Local/RangeID/3/r/RangeLease | +| /Local/RangeID/3/{r""-s""} | 01698b723a61 | 01698b723a78 | 000000000000000109 | /Local/RangeID/3/r":{a"-x"}/0.000000001,0 | +| /Local/Range"{c"-d"} | 016b1263000172647363 | | 0000000000000001 | /Local/Range"c"/RangeDescriptor/0.000000001,0 | +| /Local/Range"{c"-d"} | 016b1263000174786e2d0ce61c175eb445878c36dcf4062ada4c | | | /Local/Range"c"/Transaction/"0ce61c17-5eb4-4587-8c36-dcf4062ada4c" | +| /Local/Range"{c"-d"} | 016b126300ff000174786e2d9855a1ef8eb94c06a106cab1dda78a2b | | | /Local/Range"c\x00"/Transaction/"9855a1ef-8eb9-4c06-a106-cab1dda78a2b" | +| /Local/Range"{c"-d"} | 016b1263ffffffff000174786e2d295e727c8ca9437cbb5e8e2ebbad996f | | | /Local/Range"c\xff\xff\xff\xff"/Transaction/"295e727c-8ca9-437c-bb5e-8e2ebbad996f" | +| /Local/Lock/Intent/Local/Range"{c"-d"} | 017a6b12016b126300ff01726473630001 | | 030ce61c175eb445878c36dcf4062ada4c | /Local/Range"c"/RangeDescriptor | +| /Local/Lock/Intent"{c"-d"} | 017a6b12630001 | | 030ce61c175eb445878c36dcf4062ada4c | "c" | +| {c-d} | 63 | | 0000000000000001 | "c"/0.000000001,0 | +| {c-d} | 63ffffffff | | 0000000000000001 | "c\xff\xff\xff\xff"/0.000000001,0 | +| {c-d} | 63 | 64 | 000000000000000109 | {c-d}/0.000000001,0 | ++----------------------------------------+--------------------------------------------------------------+--------------+------------------------------------+------------------------------------------------------------------------------------+ diff --git a/pkg/kv/kvserver/rditer/testdata/TestSelect/no_span b/pkg/kv/kvserver/rditer/testdata/TestSelect/no_span new file mode 100644 index 000000000000..f8eacf173f16 --- /dev/null +++ b/pkg/kv/kvserver/rditer/testdata/TestSelect/no_span @@ -0,0 +1,10 @@ +echo +---- +Select({ReplicatedBySpan:/Min ReplicatedByRangeID:false UnreplicatedByRangeID:false}): +Select({ReplicatedBySpan:/Min ReplicatedByRangeID:false UnreplicatedByRangeID:true}): + /Local/RangeID/123/{u""-v""} +Select({ReplicatedBySpan:/Min ReplicatedByRangeID:true UnreplicatedByRangeID:false}): + /Local/RangeID/123/{r""-s""} +Select({ReplicatedBySpan:/Min ReplicatedByRangeID:true UnreplicatedByRangeID:true}): + /Local/RangeID/123/{r""-s""} + /Local/RangeID/123/{u""-v""} diff --git a/pkg/kv/kvserver/rditer/testdata/TestSelect/r1 b/pkg/kv/kvserver/rditer/testdata/TestSelect/r1 new file mode 100644 index 000000000000..457f59c5f533 --- /dev/null +++ b/pkg/kv/kvserver/rditer/testdata/TestSelect/r1 @@ -0,0 +1,26 @@ +echo +---- +Select({ReplicatedBySpan:{/Min-c} ReplicatedByRangeID:false UnreplicatedByRangeID:false}): + /Local/Range{/Min-"c"} + /Local/Lock/Intent/Local/Range{/Min-"c"} + /Local/Lock/Intent{/Meta1/""-"c"} + {/Meta1/-c} +Select({ReplicatedBySpan:{/Min-c} ReplicatedByRangeID:false UnreplicatedByRangeID:true}): + /Local/RangeID/123/{u""-v""} + /Local/Range{/Min-"c"} + /Local/Lock/Intent/Local/Range{/Min-"c"} + /Local/Lock/Intent{/Meta1/""-"c"} + {/Meta1/-c} +Select({ReplicatedBySpan:{/Min-c} ReplicatedByRangeID:true UnreplicatedByRangeID:false}): + /Local/RangeID/123/{r""-s""} + /Local/Range{/Min-"c"} + /Local/Lock/Intent/Local/Range{/Min-"c"} + /Local/Lock/Intent{/Meta1/""-"c"} + {/Meta1/-c} +Select({ReplicatedBySpan:{/Min-c} ReplicatedByRangeID:true UnreplicatedByRangeID:true}): + /Local/RangeID/123/{r""-s""} + /Local/RangeID/123/{u""-v""} + /Local/Range{/Min-"c"} + /Local/Lock/Intent/Local/Range{/Min-"c"} + /Local/Lock/Intent{/Meta1/""-"c"} + {/Meta1/-c} diff --git a/pkg/kv/kvserver/rditer/testdata/TestSelect/r2 b/pkg/kv/kvserver/rditer/testdata/TestSelect/r2 new file mode 100644 index 000000000000..35f755e3c75d --- /dev/null +++ b/pkg/kv/kvserver/rditer/testdata/TestSelect/r2 @@ -0,0 +1,26 @@ +echo +---- +Select({ReplicatedBySpan:{a-c} ReplicatedByRangeID:false UnreplicatedByRangeID:false}): + /Local/Range"{a"-c"} + /Local/Lock/Intent/Local/Range"{a"-c"} + /Local/Lock/Intent"{a"-c"} + {a-c} +Select({ReplicatedBySpan:{a-c} ReplicatedByRangeID:false UnreplicatedByRangeID:true}): + /Local/RangeID/123/{u""-v""} + /Local/Range"{a"-c"} + /Local/Lock/Intent/Local/Range"{a"-c"} + /Local/Lock/Intent"{a"-c"} + {a-c} +Select({ReplicatedBySpan:{a-c} ReplicatedByRangeID:true UnreplicatedByRangeID:false}): + /Local/RangeID/123/{r""-s""} + /Local/Range"{a"-c"} + /Local/Lock/Intent/Local/Range"{a"-c"} + /Local/Lock/Intent"{a"-c"} + {a-c} +Select({ReplicatedBySpan:{a-c} ReplicatedByRangeID:true UnreplicatedByRangeID:true}): + /Local/RangeID/123/{r""-s""} + /Local/RangeID/123/{u""-v""} + /Local/Range"{a"-c"} + /Local/Lock/Intent/Local/Range"{a"-c"} + /Local/Lock/Intent"{a"-c"} + {a-c} diff --git a/pkg/kv/kvserver/rditer/testdata/TestSelect/r3 b/pkg/kv/kvserver/rditer/testdata/TestSelect/r3 new file mode 100644 index 000000000000..4e3f40dca0c6 --- /dev/null +++ b/pkg/kv/kvserver/rditer/testdata/TestSelect/r3 @@ -0,0 +1,26 @@ +echo +---- +Select({ReplicatedBySpan:{a-/Max} ReplicatedByRangeID:false UnreplicatedByRangeID:false}): + /Local/Range{"a"-/Max} + /Local/Lock/Intent/Local/Range{"a"-/Max} + /Local/Lock/Intent{"a"-/Max} + {a-/Max} +Select({ReplicatedBySpan:{a-/Max} ReplicatedByRangeID:false UnreplicatedByRangeID:true}): + /Local/RangeID/123/{u""-v""} + /Local/Range{"a"-/Max} + /Local/Lock/Intent/Local/Range{"a"-/Max} + /Local/Lock/Intent{"a"-/Max} + {a-/Max} +Select({ReplicatedBySpan:{a-/Max} ReplicatedByRangeID:true UnreplicatedByRangeID:false}): + /Local/RangeID/123/{r""-s""} + /Local/Range{"a"-/Max} + /Local/Lock/Intent/Local/Range{"a"-/Max} + /Local/Lock/Intent{"a"-/Max} + {a-/Max} +Select({ReplicatedBySpan:{a-/Max} ReplicatedByRangeID:true UnreplicatedByRangeID:true}): + /Local/RangeID/123/{r""-s""} + /Local/RangeID/123/{u""-v""} + /Local/Range{"a"-/Max} + /Local/Lock/Intent/Local/Range{"a"-/Max} + /Local/Lock/Intent{"a"-/Max} + {a-/Max} diff --git a/pkg/kv/kvserver/replica_app_batch.go b/pkg/kv/kvserver/replica_app_batch.go index 45632c42e202..8a42cda8f2ea 100644 --- a/pkg/kv/kvserver/replica_app_batch.go +++ b/pkg/kv/kvserver/replica_app_batch.go @@ -334,10 +334,11 @@ func (b *replicaAppBatch) runPostAddTriggersReplicaOnly( // required for correctness, since the merge protocol should guarantee that // no new replicas of the RHS can ever be created, but it doesn't hurt to // be careful. - const clearRangeIDLocalOnly = true - const mustClearRange = false if err := rhsRepl.preDestroyRaftMuLocked( - ctx, b.batch, b.batch, mergedTombstoneReplicaID, clearRangeIDLocalOnly, mustClearRange, + ctx, b.batch, b.batch, mergedTombstoneReplicaID, clearRangeDataOptions{ + ClearReplicatedByRangeID: true, + ClearUnreplicatedByRangeID: true, + }, ); err != nil { return errors.Wrapf(err, "unable to destroy replica before merge") } @@ -466,21 +467,25 @@ func (b *replicaAppBatch) runPostAddTriggersReplicaOnly( b.r.mu.destroyStatus.Set( roachpb.NewRangeNotFoundError(b.r.RangeID, b.r.store.StoreID()), destroyReasonRemoved) + span := b.r.descRLocked().RSpan() b.r.mu.Unlock() b.r.readOnlyCmdMu.Unlock() b.changeRemovesReplica = true - // Delete all of the local data. We're going to delete the hard state too. - // In order for this to be safe we need code above this to promise that we're - // never going to write hard state in response to a message for a later - // replica (with a different replica ID) to this range state. + // Delete all of the Replica's data. We're going to delete the hard state too. + // We've set the replica's in-mem status to reflect the pending destruction + // above, and preDestroyRaftMuLocked will also add a range tombstone to the + // batch, so that when we commit it, the removal is finalized. if err := b.r.preDestroyRaftMuLocked( ctx, b.batch, b.batch, change.NextReplicaID(), - false, /* clearRangeIDLocalOnly */ - false, /* mustUseClearRange */ + clearRangeDataOptions{ + ClearReplicatedBySpan: span, + ClearReplicatedByRangeID: true, + ClearUnreplicatedByRangeID: true, + }, ); err != nil { return errors.Wrapf(err, "unable to destroy replica before removal") } diff --git a/pkg/kv/kvserver/replica_destroy.go b/pkg/kv/kvserver/replica_destroy.go index a4c65b9a4780..1d4975717308 100644 --- a/pkg/kv/kvserver/replica_destroy.go +++ b/pkg/kv/kvserver/replica_destroy.go @@ -76,8 +76,7 @@ func (r *Replica) preDestroyRaftMuLocked( reader storage.Reader, writer storage.Writer, nextReplicaID roachpb.ReplicaID, - clearRangeIDLocalOnly bool, - mustUseClearRange bool, + opts clearRangeDataOptions, ) error { r.mu.RLock() desc := r.descRLocked() @@ -89,8 +88,7 @@ func (r *Replica) preDestroyRaftMuLocked( if !removed { log.Fatalf(ctx, "replica not marked as destroyed before call to preDestroyRaftMuLocked: %v", r) } - - err := clearRangeData(desc, reader, writer, clearRangeIDLocalOnly, mustUseClearRange) + err := clearRangeData(desc.RangeID, reader, writer, opts) if err != nil { return err } @@ -147,14 +145,25 @@ func (r *Replica) destroyRaftMuLocked(ctx context.Context, nextReplicaID roachpb ms := r.GetMVCCStats() batch := r.Engine().NewUnindexedBatch(true /* writeOnly */) defer batch.Close() - clearRangeIDLocalOnly := !r.IsInitialized() + desc := r.Desc() + inited := desc.IsInitialized() + + opts := clearRangeDataOptions{ + ClearReplicatedBySpan: desc.RSpan(), // zero if !inited + // TODO(tbg): if it's uninitialized, we might as well clear + // the replicated state because there isn't any. This seems + // like it would be simpler, but needs a code audit to ensure + // callers don't call this in in-between states where the above + // assumption doesn't hold. + ClearReplicatedByRangeID: inited, + ClearUnreplicatedByRangeID: true, + } if err := r.preDestroyRaftMuLocked( ctx, r.Engine(), batch, nextReplicaID, - clearRangeIDLocalOnly, - false, /* mustUseClearRange */ + opts, ); err != nil { return err } diff --git a/pkg/kv/kvserver/replica_raftstorage.go b/pkg/kv/kvserver/replica_raftstorage.go index 00d53520413f..ee2c66e47a4c 100644 --- a/pkg/kv/kvserver/replica_raftstorage.go +++ b/pkg/kv/kvserver/replica_raftstorage.go @@ -421,31 +421,43 @@ func (r *Replica) updateRangeInfo(ctx context.Context, desc *roachpb.RangeDescri return nil } -// clearRangeData clears the data associated with a range descriptor. If -// rangeIDLocalOnly is true, then only the range-id local keys are deleted. -// Otherwise, the range-id local keys, range local keys, and user keys are all -// deleted. +type clearRangeDataOptions struct { + // ClearReplicatedByRangeID indicates that replicated RangeID-based keys + // (abort span, etc) should be removed. + ClearReplicatedByRangeID bool + // ClearUnreplicatedByRangeID indicates that unreplicated RangeID-based keys + // (logstore state incl. HardState, etc) should be removed. + ClearUnreplicatedByRangeID bool + // ClearReplicatedBySpan causes the state machine data (i.e. the replicated state + // for the given RSpan) that is key-addressable (i.e. range descriptor, user keys, + // locks) to be removed. No data is removed if this is the zero span. + ClearReplicatedBySpan roachpb.RSpan + + // If MustUseClearRange is true, a Pebble range tombstone will always be used + // to clear the key spans (unless empty). This is typically used when we need + // to write additional keys to an SST after this clear, e.g. a replica + // tombstone, since keys must be written in order. When this is false, a + // heuristic will be used instead. + MustUseClearRange bool +} + +// clearRangeData clears the data associated with a range descriptor selected +// by the provided clearRangeDataOptions. // -// If mustUseClearRange is true, a Pebble range tombstone will always be used to -// clear the key spans (unless empty). This is typically used when we need to -// write additional keys to an SST after this clear, e.g. a replica tombstone, -// since keys must be written in order. +// TODO(tbg): could rename this to clearReplicaData. The use of "Range" in both the +// "CRDB Range" and "storage.ClearRange" context in the setting of this method could +// be confusing. func clearRangeData( - desc *roachpb.RangeDescriptor, - reader storage.Reader, - writer storage.Writer, - rangeIDLocalOnly bool, - mustUseClearRange bool, + rangeID roachpb.RangeID, reader storage.Reader, writer storage.Writer, opts clearRangeDataOptions, ) error { - var keySpans []roachpb.Span - if rangeIDLocalOnly { - keySpans = []roachpb.Span{rditer.MakeRangeIDLocalKeySpan(desc.RangeID, false)} - } else { - keySpans = rditer.MakeAllKeySpans(desc) - } + keySpans := rditer.Select(rangeID, rditer.SelectOpts{ + ReplicatedBySpan: opts.ClearReplicatedBySpan, + ReplicatedByRangeID: opts.ClearReplicatedByRangeID, + UnreplicatedByRangeID: opts.ClearUnreplicatedByRangeID, + }) pointKeyThreshold, rangeKeyThreshold := clearRangeThresholdPointKeys, clearRangeThresholdRangeKeys - if mustUseClearRange { + if opts.MustUseClearRange { pointKeyThreshold, rangeKeyThreshold = 1, 1 } @@ -792,7 +804,11 @@ func (r *Replica) clearSubsumedReplicaDiskData( ) error { // NB: we don't clear RangeID local key spans here. That happens // via the call to preDestroyRaftMuLocked. - getKeySpans := rditer.MakeReplicatedKeySpansExceptRangeID + getKeySpans := func(d *roachpb.RangeDescriptor) []roachpb.Span { + return rditer.Select(d.RangeID, rditer.SelectOpts{ + ReplicatedBySpan: d.RSpan(), + }) + } keySpans := getKeySpans(desc) totalKeySpans := append([]roachpb.Span(nil), keySpans...) for _, sr := range subsumedRepls { @@ -816,13 +832,17 @@ func (r *Replica) clearSubsumedReplicaDiskData( // NOTE: We set mustClearRange to true because we are setting // RangeTombstoneKey. Since Clears and Puts need to be done in increasing // order of keys, it is not safe to use ClearRangeIter. + opts := clearRangeDataOptions{ + ClearReplicatedByRangeID: true, + ClearUnreplicatedByRangeID: true, + MustUseClearRange: true, + } if err := sr.preDestroyRaftMuLocked( ctx, r.store.Engine(), &subsumedReplSST, subsumedNextReplicaID, - true, /* clearRangeIDLocalOnly */ - true, /* mustClearRange */ + opts, ); err != nil { subsumedReplSST.Close() return err diff --git a/pkg/kv/kvserver/replicate_queue_test.go b/pkg/kv/kvserver/replicate_queue_test.go index eef4cbe31ff2..ff3b929afab8 100644 --- a/pkg/kv/kvserver/replicate_queue_test.go +++ b/pkg/kv/kvserver/replicate_queue_test.go @@ -17,6 +17,8 @@ import ( "fmt" "math" "math/rand" + "os" + "path/filepath" "regexp" "strconv" "strings" @@ -189,24 +191,48 @@ func TestReplicateQueueRebalanceMultiStore(t *testing.T) { allocatorimpl.LeaseRebalanceThreshold = 0.01 allocatorimpl.LeaseRebalanceThresholdMin = 0.0 + const useDisk = false // for debugging purposes + spec := func(node int, store int) base.StoreSpec { + return base.DefaultTestStoreSpec + } + if useDisk { + td, err := os.MkdirTemp("", "test") + require.NoError(t, err) + t.Logf("store dirs in %s", td) + spec = func(node int, store int) base.StoreSpec { + return base.StoreSpec{ + Path: filepath.Join(td, fmt.Sprintf("n%ds%d", node, store)), + Size: base.SizeSpec{}, + } + } + t.Cleanup(func() { + if t.Failed() { + return + } + _ = os.RemoveAll(td) + }) + } for _, testCase := range testCases { + t.Run(testCase.name, func(t *testing.T) { // Set up a test cluster with multiple stores per node if needed. - serverArgs := base.TestServerArgs{ - ScanMinIdleTime: time.Millisecond, - ScanMaxIdleTime: time.Millisecond, + args := base.TestClusterArgs{ + ReplicationMode: base.ReplicationAuto, + ServerArgsPerNode: map[int]base.TestServerArgs{}, } - if testCase.storesPerNode > 1 { - serverArgs.StoreSpecs = make([]base.StoreSpec, testCase.storesPerNode) - for i := range serverArgs.StoreSpecs { - serverArgs.StoreSpecs[i] = base.DefaultTestStoreSpec + for i := 0; i < testCase.nodes; i++ { + perNode := base.TestServerArgs{ + ScanMinIdleTime: time.Millisecond, + ScanMaxIdleTime: time.Millisecond, + } + perNode.StoreSpecs = make([]base.StoreSpec, testCase.storesPerNode) + for idx := range perNode.StoreSpecs { + perNode.StoreSpecs[idx] = spec(i+1, idx+1) } + args.ServerArgsPerNode[i] = perNode } tc := testcluster.StartTestCluster(t, testCase.nodes, - base.TestClusterArgs{ - ReplicationMode: base.ReplicationAuto, - ServerArgs: serverArgs, - }) + args) defer tc.Stopper().Stop(context.Background()) ctx := context.Background() for _, server := range tc.Servers { diff --git a/pkg/kv/kvserver/store_split.go b/pkg/kv/kvserver/store_split.go index 5cad93cef7a9..c61824bee315 100644 --- a/pkg/kv/kvserver/store_split.go +++ b/pkg/kv/kvserver/store_split.go @@ -66,29 +66,18 @@ func splitPreApply( // If rightRepl is not nil, we are *not* holding raftMu. // // To apply the split, we need to "throw away" the data that would belong to - // the RHS, i.e. we clear the user data the RHS would have inherited from the - // LHS due to the split and additionally clear all of the range ID local state - // that the split trigger writes into the RHS. - // - // We know we've never processed a snapshot for the right range because the - // LHS prevents any incoming snapshots until the split has executed (i.e. now). - // It is important to preserve the HardState because we might however have - // already voted at a higher term. In general this shouldn't happen because - // we add learners and then promote them only after we snapshot but we're + // the RHS, i.e. we clear the user data the RHS would have inherited from + // the LHS due to the split and additionally clear all of the range ID local + // state that the split trigger writes into the RHS. At the time of writing, + // unfortunately that means that we'll also delete any data that might + // already be present in the RHS: the HardState and RaftReplicaID. It is + // important to preserve the HardState because we might however have already + // voted at a higher term. In general this shouldn't happen because we add + // learners and then promote them only after they apply a snapshot but we're // going to be extra careful in case future versions of cockroach somehow - // promote replicas without ensuring that a snapshot has been received. - // - // Rather than specifically deleting around the data we want to preserve - // we read the HardState to preserve it, clear everything and write back - // the HardState and tombstone. Note that we only do this if rightRepl - // exists; if it doesn't, there's no Raft state to massage (when rightRepl - // was removed, a tombstone was written instead). - // - // TODO(tbg): it would be cleaner to teach clearRangeData to only remove - // the replicated state if rightRepl != nil, as opposed to writing back - // internal raft state. As is, it's going to break with any new piece of - // local state that we add, and it introduces locking between two Replicas - // that don't ever need to interact. + // promote replicas without ensuring that a snapshot has been received. So + // we write it back (and the RaftReplicaID too, since it's an invariant that + // it's always present). var hs raftpb.HardState if rightRepl != nil { rightRepl.raftMu.Lock() @@ -105,9 +94,30 @@ func splitPreApply( log.Fatalf(ctx, "failed to load hard state for removed rhs: %v", err) } } - const rangeIDLocalOnly = false - const mustUseClearRange = false - if err := clearRangeData(&split.RightDesc, readWriter, readWriter, rangeIDLocalOnly, mustUseClearRange); err != nil { + if err := clearRangeData(split.RightDesc.RangeID, readWriter, readWriter, clearRangeDataOptions{ + // We know there isn't anything in these two replicated spans below in the + // right-hand side (before the current batch), so setting these options + // will in effect only clear the writes to the RHS replicated state we have + // staged in this batch, which is what we're after. + ClearReplicatedBySpan: split.RightDesc.RSpan(), + ClearReplicatedByRangeID: true, + // See the HardState write-back dance above and below. + // + // TODO(tbg): we don't actually want to touch the raft state of the right + // hand side replica since it's absent or a more recent replica than the + // split. Now that we have a boolean targeting the unreplicated + // RangeID-based keyspace, we can set this to false and remove the + // HardState+ReplicaID write-back. (The WriteBatch does not contain + // any writes to the unreplicated RangeID keyspace for the RHS, see + // splitTriggerHelper[^1]). + // + // [^1]: https://github.com/cockroachdb/cockroach/blob/f263a765d750e41f2701da0a923a6e92d09159fa/pkg/kv/kvserver/batcheval/cmd_end_transaction.go#L1109-L1149 + // + // See also: + // + // https://github.com/cockroachdb/cockroach/issues/94933 + ClearUnreplicatedByRangeID: true, + }); err != nil { log.Fatalf(ctx, "failed to clear range data for removed rhs: %v", err) } if rightRepl != nil { diff --git a/pkg/roachpb/data.go b/pkg/roachpb/data.go index ec8cd285a972..72ae6bf46a68 100644 --- a/pkg/roachpb/data.go +++ b/pkg/roachpb/data.go @@ -2357,6 +2357,24 @@ type RSpan struct { Key, EndKey RKey } +// KeySpan returns the Span with the StartKey forwarded to LocalMax, if necessary. +// +// See: https://github.com/cockroachdb/cockroach/issues/95055 +func (rs RSpan) KeySpan() RSpan { + start := rs.Key + if start.Equal(RKeyMin) { + // The first range in the keyspace is declared to start at KeyMin (the + // lowest possible key). That is a lie, however, since the local key space + // ([LocalMin,LocalMax)) doesn't belong to this range; it doesn't belong to + // any range in particular. + start = RKey(LocalMax) + } + return RSpan{ + Key: start, + EndKey: rs.EndKey, + } +} + // Equal compares for equality. func (rs RSpan) Equal(o RSpan) bool { return rs.Key.Equal(o.Key) && rs.EndKey.Equal(o.EndKey) diff --git a/pkg/roachpb/metadata.go b/pkg/roachpb/metadata.go index fa7e2b22d2e5..60514987fe21 100644 --- a/pkg/roachpb/metadata.go +++ b/pkg/roachpb/metadata.go @@ -162,23 +162,11 @@ func (r *RangeDescriptor) RSpan() RSpan { } // KeySpan returns the keys covered by this range. Local keys are not included. +// This is identical to RSpan(), but for r1 the StartKey is forwarded to LocalMax. // -// TODO(andrei): Consider if this logic should be lifted to -// RangeDescriptor.RSpan(). Or better yet, see if we can changes things such -// that the first range starts at LocalMax instead at starting at an empty key. +// See: https://github.com/cockroachdb/cockroach/issues/95055 func (r *RangeDescriptor) KeySpan() RSpan { - start := r.StartKey - if r.StartKey.Equal(RKeyMin) { - // The first range in the keyspace is declared to start at KeyMin (the - // lowest possible key). That is a lie, however, since the local key space - // ([LocalMin,LocalMax)) doesn't belong to this range; it doesn't belong to - // any range in particular. - start = RKey(LocalMax) - } - return RSpan{ - Key: start, - EndKey: r.EndKey, - } + return r.RSpan().KeySpan() } // ContainsKey returns whether this RangeDescriptor contains the specified key.