-
Notifications
You must be signed in to change notification settings - Fork 456
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
*: infer
sstable.TableFormat
from FormatMajorVerison
The [Pebble SSTable Format Versions RFC][1] outlines a new versioning scheme used for new SSTable-level features. The RFC also outlines a requirement that external callers (i.e. Cockroach) ensure that the format version is compatible with other stores in a cluster. To ensure tables are created with an explicit version, the `(*pebble.Options).NewWriterOptions` method now takes an additional parameter that specifies the format version to be used when writing a table. Previous the table format was pinned at RocksDBv2. Expose a `(FormatMajorVersion).MaxTableFormat()` method that returns the maximum allowable table format for the given `FormatMajorVersion`. This can be used to ensure that given a store major version, an `sstable.Writer` is capped at writing tables with a maximum table format, to ensure compatibility with other stores running at the same version. New `sstable.Writer`s within Pebble itself (in contrast to `Writer`s created _external_ to a Pebble store, e.g. Cockroach generating a table for backup or ingestion) select the table format by consulting the current value of `(*DB).FormatMajorVersion()` and then infer the maximum allowable version from `(FormatMajorVersion).MaxTableFormat()`. Update the table footer encode and decode functions to take into account the new Pebble magic string. This new magic string implies a Pebble-specific versioning scheme (with v1 for block properties and v2 for range keys) with the same table footer format used in RocksDBv2. Add a compatibility check to the table write path to assert that the presence of specific features on a table must be accompanied by the appropriate table format (i.e. block properties must be at least version PebbleDBv1, etc.). Update existing tests that need to make use of newer Pebble table features (i.e. block properties and / or range keys) to explicitly set the table format to opt-into the newer features. [1]: ./docs/RFCS/20220112_pebble_sstable_format_versions.md
- Loading branch information
Showing
15 changed files
with
372 additions
and
51 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
// Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use | ||
// of this source code is governed by a BSD-style license that can be found in | ||
// the LICENSE file. | ||
|
||
package sstable | ||
|
||
import ( | ||
"github.com/cockroachdb/errors" | ||
"github.com/cockroachdb/pebble/internal/base" | ||
) | ||
|
||
// TableFormat specifies the format version for sstables. The legacy LevelDB | ||
// format is format version 1. | ||
type TableFormat uint32 | ||
|
||
// The available table formats, representing the tuple (magic number, version | ||
// number). Note that these values are not (and should not) be serialized to | ||
// disk. The ordering should follow the order the versions were introduced to | ||
// Pebble (i.e. the history is linear). | ||
const ( | ||
TableFormatUnspecified TableFormat = iota | ||
TableFormatLevelDB | ||
TableFormatRocksDBv2 | ||
TableFormatPebblev1 // Block properties. | ||
TableFormatPebblev2 // Range keys. | ||
|
||
TableFormatMax = TableFormatPebblev2 | ||
) | ||
|
||
// ParseTableFormat parses the given magic bytes and version into its | ||
// corresponding internal TableFormat. | ||
func ParseTableFormat(magic []byte, version uint32) (TableFormat, error) { | ||
switch string(magic) { | ||
case levelDBMagic: | ||
return TableFormatLevelDB, nil | ||
case rocksDBMagic: | ||
if version != rocksDBFormatVersion2 { | ||
return TableFormatUnspecified, base.CorruptionErrorf( | ||
"pebble/table: unsupported rocksdb format version %d", errors.Safe(version), | ||
) | ||
} | ||
return TableFormatRocksDBv2, nil | ||
case pebbleDBMagic: | ||
switch version { | ||
case 1: | ||
return TableFormatPebblev1, nil | ||
case 2: | ||
return TableFormatPebblev2, nil | ||
default: | ||
return TableFormatUnspecified, base.CorruptionErrorf( | ||
"pebble/table: unsupported pebble format version %d", errors.Safe(version), | ||
) | ||
} | ||
default: | ||
return TableFormatUnspecified, base.CorruptionErrorf( | ||
"pebble/table: invalid table (bad magic number)", | ||
) | ||
} | ||
} | ||
|
||
// AsTuple returns the TableFormat's (Magic String, Version) tuple. | ||
func (f TableFormat) AsTuple() (string, uint32) { | ||
switch f { | ||
case TableFormatLevelDB: | ||
return levelDBMagic, 0 | ||
case TableFormatRocksDBv2: | ||
return rocksDBMagic, 2 | ||
case TableFormatPebblev1: | ||
return pebbleDBMagic, 1 | ||
case TableFormatPebblev2: | ||
return pebbleDBMagic, 2 | ||
default: | ||
panic("sstable: unknown table format version tuple") | ||
} | ||
} | ||
|
||
// String returns the TableFormat (Magic String,Version) tuple. | ||
func (f TableFormat) String() string { | ||
switch f { | ||
case TableFormatLevelDB: | ||
return "(LevelDB)" | ||
case TableFormatRocksDBv2: | ||
return "(RocksDB,v2)" | ||
case TableFormatPebblev1: | ||
return "(Pebble,v1)" | ||
case TableFormatPebblev2: | ||
return "(Pebble,v2)" | ||
default: | ||
panic("sstable: unknown table format version tuple") | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
// Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use | ||
// of this source code is governed by a BSD-style license that can be found in | ||
// the LICENSE file. | ||
|
||
package sstable | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
func TestTableFormat_RoundTrip(t *testing.T) { | ||
tcs := []struct { | ||
name string | ||
magic string | ||
version uint32 | ||
want TableFormat | ||
wantErr string | ||
}{ | ||
// Valid cases. | ||
{ | ||
name: "LevelDB", | ||
magic: levelDBMagic, | ||
version: 0, | ||
want: TableFormatLevelDB, | ||
}, | ||
{ | ||
name: "RocksDBv2", | ||
magic: rocksDBMagic, | ||
version: 2, | ||
want: TableFormatRocksDBv2, | ||
}, | ||
{ | ||
name: "PebbleDBv1", | ||
magic: pebbleDBMagic, | ||
version: 1, | ||
want: TableFormatPebblev1, | ||
}, | ||
{ | ||
name: "PebbleDBv2", | ||
magic: pebbleDBMagic, | ||
version: 2, | ||
want: TableFormatPebblev2, | ||
}, | ||
// Invalid cases. | ||
{ | ||
name: "Invalid RocksDB version", | ||
magic: rocksDBMagic, | ||
version: 1, | ||
wantErr: "pebble/table: unsupported rocksdb format version 1", | ||
}, | ||
{ | ||
name: "Invalid PebbleDB version", | ||
magic: pebbleDBMagic, | ||
version: 3, | ||
wantErr: "pebble/table: unsupported pebble format version 3", | ||
}, | ||
{ | ||
name: "Unknown magic string", | ||
magic: "foo", | ||
wantErr: "pebble/table: invalid table (bad magic number)", | ||
}, | ||
} | ||
|
||
for _, tc := range tcs { | ||
t.Run(tc.name, func(t *testing.T) { | ||
// Tuple -> TableFormat. | ||
f, err := ParseTableFormat([]byte(tc.magic), tc.version) | ||
if tc.wantErr != "" { | ||
require.Error(t, err) | ||
require.Equal(t, tc.wantErr, err.Error()) | ||
return | ||
} | ||
require.NoError(t, err) | ||
require.Equal(t, tc.want, f) | ||
|
||
// TableFormat -> Tuple. | ||
s, v := f.AsTuple() | ||
require.Equal(t, tc.magic, s) | ||
require.Equal(t, tc.version, v) | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.