From cddc665c86b84d0caa0e8620616bd3852a1e2018 Mon Sep 17 00:00:00 2001 From: Andrew Werner Date: Thu, 30 Jan 2020 11:24:22 -0500 Subject: [PATCH] engine: alter the meaning of targetSize in ExportToSst In #44440 we added a `targetSize` parameter to enable pagination of export requests. In that PR we defined the targetSize to return just before the key that would lead to the `targetSize` being exceeded. This definition is unfortunate when thinking about a total size limit for pagination in the DistSender (which we'll add when #44341 comes in). Imagine a case where we set a total byte limit of 1MB and a file byte limit of 1MB. That setting should lead to at most a single file being emitted (assuming one range holds enough data). If we used the previous definition we'd create a file which is just below 1MB and then the DistSender would need send another request which would contain a tiny amount of data. This brings the behavior in line with the semantics introduced in #44341 for ScanRequests and is just easier to reason about. Release note: None --- c-deps/libroach/db.cc | 6 +++--- c-deps/libroach/include/libroach.h | 10 +++------- pkg/ccl/storageccl/export_test.go | 19 ++++++++++++++++++- pkg/storage/engine/engine.go | 12 ++++-------- pkg/storage/engine/pebble.go | 6 +++--- 5 files changed, 31 insertions(+), 22 deletions(-) diff --git a/c-deps/libroach/db.cc b/c-deps/libroach/db.cc index 5c83ce9904b3..28ebc9597c30 100644 --- a/c-deps/libroach/db.cc +++ b/c-deps/libroach/db.cc @@ -1136,9 +1136,8 @@ DBStatus DBExportToSst(DBKey start, DBKey end, bool export_all_revisions, uint64 // Check to see if this is the first version of key and adding it would // put us over the limit (we might already be over the limit). const int64_t cur_size = bulkop_summary.data_size(); - const int64_t new_size = cur_size + decoded_key.size() + iter.value().size(); - const bool is_over_target = cur_size > 0 && new_size > target_size; - if (paginated && is_new_key && is_over_target) { + const bool reached_target_size = cur_size > 0 && cur_size >= target_size; + if (paginated && is_new_key && reached_target_size) { resume_key.reserve(decoded_key.size()); resume_key.assign(decoded_key.data(), decoded_key.size()); break; @@ -1154,6 +1153,7 @@ DBStatus DBExportToSst(DBKey start, DBKey end, bool export_all_revisions, uint64 if (!row_counter.Count((iter.key()), &bulkop_summary)) { return ToDBString("Error in row counter"); } + const int64_t new_size = cur_size + decoded_key.size() + iter.value().size(); bulkop_summary.set_data_size(new_size); } *summary = ToDBString(bulkop_summary.SerializeAsString()); diff --git a/c-deps/libroach/include/libroach.h b/c-deps/libroach/include/libroach.h index ad916cac3893..2b7d8fb650ea 100644 --- a/c-deps/libroach/include/libroach.h +++ b/c-deps/libroach/include/libroach.h @@ -556,13 +556,9 @@ DBStatus DBUnlockFile(DBFileLock lock); // DBExportToSst exports changes over the keyrange and time interval between the // start and end DBKeys to an SSTable using an IncrementalIterator. // If target_size is positive, it indicates that the export should produce SSTs -// which are roughly target size. Specifically, it will produce SSTs which contain -// all relevant versions of a key and will not add the first version of a new -// key if it would lead to the SST exceeding the target_size. If export_all_revisions -// is false, the returned SST will be smaller than target_size so long as the first -// kv pair is smaller than target_size. If export_all_revisions is true then -// target_size may be exceeded. If the SST construction stops due to the target_size, -// then resume will be set to the value of the resume key. +// which are roughly target size. Specifically, it will return an SST such that +// the last key is responsible for exceeding the targetSize. If the resume_key +// is non-NULL then the returns sst will exceed the targetSize. DBStatus DBExportToSst(DBKey start, DBKey end, bool export_all_revisions, uint64_t target_size, DBIterOptions iter_opts, DBEngine* engine, DBString* data, DBString* write_intent, DBString* summary, DBString* resume); diff --git a/pkg/ccl/storageccl/export_test.go b/pkg/ccl/storageccl/export_test.go index 56728d5fecb7..99f975335df7 100644 --- a/pkg/ccl/storageccl/export_test.go +++ b/pkg/ccl/storageccl/export_test.go @@ -333,9 +333,26 @@ func assertEqualKVs( var kvs []engine.MVCCKeyValue for start := startKey; start != nil; { var sst []byte - sst, _, start, err = e.ExportToSst(start, endKey, startTime, endTime, exportAllRevisions, targetSize, io) + var summary roachpb.BulkOpSummary + sst, summary, start, err = e.ExportToSst(start, endKey, startTime, endTime, exportAllRevisions, targetSize, io) require.NoError(t, err) loaded := loadSST(t, sst, startKey, endKey) + // Ensure that the pagination worked properly. + if start != nil { + dataSize := uint64(summary.DataSize) + require.Truef(t, targetSize <= dataSize, "%d > %d", + targetSize, summary.DataSize) + // Now we want to ensure that if we remove the bytes due to the last + // key that we are below the target size. + firstKVofLastKey := sort.Search(len(loaded), func(i int) bool { + return loaded[i].Key.Key.Equal(loaded[len(loaded)-1].Key.Key) + }) + dataSizeWithoutLastKey := dataSize + for _, kv := range loaded[firstKVofLastKey:] { + dataSizeWithoutLastKey -= uint64(kv.Key.Len() + len(kv.Value)) + } + require.Truef(t, targetSize > dataSizeWithoutLastKey, "%d <= %d", targetSize, dataSizeWithoutLastKey) + } kvs = append(kvs, loaded...) } diff --git a/pkg/storage/engine/engine.go b/pkg/storage/engine/engine.go index d2163fb1c1c8..207a6035557b 100644 --- a/pkg/storage/engine/engine.go +++ b/pkg/storage/engine/engine.go @@ -201,14 +201,10 @@ type Reader interface { // requested or if the start.Timestamp is non-zero. Returns the bytes of an // SSTable containing the exported keys, the size of exported data, or an error. // If targetSize is positive, it indicates that the export should produce SSTs - // which are roughly target size. Specifically, it will produce SSTs which contain - // all relevant versions of a key and will not add the first version of a new - // key if it would lead to the SST exceeding the targetSize. If exportAllRevisions - // is false, the returned SST will be smaller than target_size so long as the first - // kv pair is smaller than targetSize. If exportAllRevisions is true then - // targetSize may be exceeded by as much as the size of all of the versions of - // the last key. If the SST construction stops due to the targetSize, - // then a non-nil resumeKey will be returned. + // which are roughly target size. Specifically, it will return an SST such that + // the last key is responsible for meeting or exceeding the targetSize. If the + // resumeKey is non-nil then the data size of the returned sst will be greater + // than or equal to the targetSize. ExportToSst( startKey, endKey roachpb.Key, startTS, endTS hlc.Timestamp, exportAllRevisions bool, targetSize uint64, io IterOptions, diff --git a/pkg/storage/engine/pebble.go b/pkg/storage/engine/pebble.go index d573cd462af1..2786fc391af9 100644 --- a/pkg/storage/engine/pebble.go +++ b/pkg/storage/engine/pebble.go @@ -1163,9 +1163,8 @@ func pebbleExportToSst( return nil, roachpb.BulkOpSummary{}, nil, errors.Wrapf(err, "decoding %s", unsafeKey) } curSize := rows.BulkOpSummary.DataSize - newSize := curSize + int64(len(unsafeKey.Key)+len(unsafeValue)) - isOverTarget := paginated && curSize > 0 && uint64(newSize) > targetSize - if isNewKey && isOverTarget { + reachedTargetSize := curSize > 0 && uint64(curSize) >= targetSize + if paginated && isNewKey && reachedTargetSize { // Allocate the right size for resumeKey rather than using curKey. resumeKey = append(make(roachpb.Key, 0, len(unsafeKey.Key)), unsafeKey.Key...) break @@ -1173,6 +1172,7 @@ func pebbleExportToSst( if err := sstWriter.Put(unsafeKey, unsafeValue); err != nil { return nil, roachpb.BulkOpSummary{}, nil, errors.Wrapf(err, "adding key %s", unsafeKey) } + newSize := curSize + int64(len(unsafeKey.Key)+len(unsafeValue)) rows.BulkOpSummary.DataSize = newSize }