Skip to content

Commit

Permalink
sqlccl: rework split+scatter+import concurrency in RESTORE
Browse files Browse the repository at this point in the history
This was largely motivated by how long it takes to presplit in very
large restores. Doing them all upfront required rate-limiting, but no
number was well-tuned for every cluster. Additionally, even at a high
presplit rate of 100, a 10 TB cluster would take 52 minutes before it
even started scattering.

Now, one goroutine iterates through every span being imported,
presplitting and scattering before moving on to the next one. Upon
split+scatter, the span is sent into a buffered channel read by the
Import goroutines, which prevents it from getting too far ahead of the
Imports. This both acts as a natural rate limiter for the splits as well
as bounds the number of empty ranges created if a RESTORE fails or is
cancelled.

Overall tpch-10 RESTORE time remains 12:30 on a 4-node cluster.

Since each range is now scattered individually, we no longer need the
jitter in the scatter implementation (plus it now slows down the
RESTORE), so it's removed.

Restore really needs a refactor, but I'm going to be making a couple
more changes leading up to 1.1 so I'll leave cleanup until after they go
in.

This removes most tunable constants in RESTORE and the remaining ones
are defined in terms of the number of nodes in the cluster and the
number of cpus on a node, so this:

Closes #14798.
  • Loading branch information
danhhz committed Jul 25, 2017
1 parent cfabc0c commit 7ca57b6
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 301 deletions.
2 changes: 1 addition & 1 deletion pkg/ccl/sqlccl/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ func ValidatePreviousBackups(ctx context.Context, uris []string) (hlc.Timestamp,
// This reuses Restore's logic for lining up all the start and end
// timestamps to validate the previous backups that this one is incremental
// from.
_, endTime, err := makeImportRequests(nil, backups)
_, endTime, err := makeImportSpans(nil, backups)
return endTime, err
}

Expand Down
60 changes: 0 additions & 60 deletions pkg/ccl/sqlccl/backup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/ccl/sqlccl"
"github.com/cockroachdb/cockroach/pkg/ccl/storageccl"
"github.com/cockroachdb/cockroach/pkg/ccl/utilccl/sampledataccl"
"github.com/cockroachdb/cockroach/pkg/internal/client"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/security"
Expand All @@ -49,7 +48,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
"github.com/cockroachdb/cockroach/pkg/util"
"github.com/cockroachdb/cockroach/pkg/util/encoding"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
"github.com/cockroachdb/cockroach/pkg/util/stop"
Expand Down Expand Up @@ -1481,64 +1479,6 @@ func TestTimestampMismatch(t *testing.T) {
})
}

func TestPresplitRanges(t *testing.T) {
defer leaktest.AfterTest(t)()

ctx, _, tc, _, cleanupFn := backupRestoreTestSetup(t, multiNode, 0)
defer cleanupFn()
kvDB := tc.Server(0).KVClient().(*client.DB)

numRangesTests := []int{0, 1, 2, 3, 4, 10}
for testNum, numRanges := range numRangesTests {
t.Run(strconv.Itoa(numRanges), func(t *testing.T) {
baseKey := keys.MakeTablePrefix(uint32(keys.MaxReservedDescID + testNum))
var splitPoints []roachpb.Key
for i := 0; i < numRanges; i++ {
key := encoding.EncodeUvarintAscending(append([]byte(nil), baseKey...), uint64(i))
splitPoints = append(splitPoints, key)
}
if err := sqlccl.PresplitRanges(ctx, *kvDB, splitPoints); err != nil {
t.Error(err)
}

// Verify that the splits exist.
// Note that PresplitRanges adds the row sentinel to make a valid table
// key, but AdminSplit internally removes it (via EnsureSafeSplitKey). So
// we expect splits that match the splitPoints exactly.
for _, splitKey := range splitPoints {
// Scan the meta range for splitKey.
rk, err := keys.Addr(splitKey)
if err != nil {
t.Fatal(err)
}

startKey := keys.RangeMetaKey(rk)
endKey := keys.Meta2Prefix.PrefixEnd()

kvs, err := kvDB.Scan(context.Background(), startKey, endKey, 1)
if err != nil {
t.Fatal(err)
}
if len(kvs) != 1 {
t.Fatalf("expected 1 KV, got %v", kvs)
}
desc := &roachpb.RangeDescriptor{}
if err := kvs[0].ValueProto(desc); err != nil {
t.Fatal(err)
}
if !desc.EndKey.Equal(rk) {
t.Errorf(
"missing split %s: range %s to %s",
keys.PrettyPrint(splitKey),
keys.PrettyPrint(desc.StartKey.AsRawKey()),
keys.PrettyPrint(desc.EndKey.AsRawKey()),
)
}
}
})
}
}

func TestBackupLevelDB(t *testing.T) {
defer leaktest.AfterTest(t)()

Expand Down
Loading

0 comments on commit 7ca57b6

Please sign in to comment.