Skip to content

Commit

Permalink
Use CRC64 as checksum for LogFile. Refine some testing codes
Browse files Browse the repository at this point in the history
Signed-off-by: JaySon-Huang <jayson.hjs@gmail.com>
  • Loading branch information
JaySon-Huang committed Mar 14, 2022
1 parent eef6342 commit 37ff827
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 24 deletions.
7 changes: 4 additions & 3 deletions dbms/src/Storages/Page/V3/LogFile/LogFormat.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,14 @@ static constexpr UInt8 MaxRecordType = RecyclableLastType;
static constexpr UInt32 BLOCK_SIZE = 32 * 1024;
static_assert(BLOCK_SIZE < std::numeric_limits<UInt16>::max());

using ChecksumClass = Digest::CRC32; // TODO: CRC64
// CRC32 or CRC64; City128, XXH3 is not well tested for LogFile
using ChecksumClass = Digest::CRC64;

using ChecksumType = ChecksumClass::HashType;

static constexpr UInt32 CHECKSUM_FIELD_SIZE = sizeof(ChecksumType);
static constexpr UInt32 CHECKSUM_FIELD_SIZE = ChecksumClass::hash_size;

// If the size of payload is larger than `BLOCK_SIZE`, it will be splited into
// If the size of payload is larger than `BLOCK_SIZE`, it will be splitted into
// fragments. So `PAYLOAD_FIELD_SIZE` must be fit in UInt16.
static constexpr UInt32 PAYLOAD_FIELD_SIZE = sizeof(UInt16);

Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Storages/Page/V3/LogFile/LogReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ UInt8 LogReader::readPhysicalRecord(std::string_view * result, size_t * drop_siz
}
else if (err != 0)
return err;
// else parse header successe.
// else parse header success.

if (verify_checksum)
{
Expand Down
12 changes: 6 additions & 6 deletions dbms/src/Storages/Page/V3/LogFile/LogWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,31 +25,31 @@ namespace PS::V3
* +-----+-------------+--+----+----------+------+-- ... ----+
* File | r0 | r1 |P | r2 | r3 | r4 | |
* +-----+-------------+--+----+----------+------+-- ... ----+
* <--- kBlockSize ------>|<-- kBlockSize ------>|
* <---- BlockSize ------>|<--- BlockSize ------>|
* rn = variable size records
* P = Padding
*
* Data is written out in kBlockSize chunks. If next record does not fit
* Data is written out in BlockSize chunks. If next record does not fit
* into the space left, the leftover space will be padded with \0.
*
* Legacy record format:
*
* +--------------+-----------+-----------+--- ... ---+
* |CheckSum (4B) | Size (2B) | Type (1B) | Payload |
* |CheckSum (8B) | Size (2B) | Type (1B) | Payload |
* +--------------+-----------+-----------+--- ... ---+
*
* CheckSum = 32bit hash computed over the record type and payload using checksum algo
* CheckSum = 64bit hash computed over the record type and payload using checksum algo (CRC64)
* Size = Length of the payload data
* Type = Type of record
* (kZeroType, kFullType, kFirstType, kLastType, kMiddleType )
* (ZeroType, FullType, FirstType, LastType, MiddleType)
* The type is used to group a bunch of records together to represent
* blocks that are larger than kBlockSize
* Payload = Byte stream as long as specified by the payload size
*
* Recyclable record format:
*
* +--------------+-----------+-----------+----------------+--- ... ---+
* |CheckSum (4B) | Size (2B) | Type (1B) | Log number (4B)| Payload |
* |CheckSum (8B) | Size (2B) | Type (1B) | Log number (4B)| Payload |
* +--------------+-----------+-----------+----------------+--- ... ---+
*
* Same as above, with the addition of
Expand Down
26 changes: 12 additions & 14 deletions dbms/src/Storages/Page/V3/tests/gtest_wal_log.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,19 +202,17 @@ class LogFileRWTest : public ::testing::TestWithParam<std::tuple<bool, bool>>
PageUtil::ftruncateFile(wr_file, writtenBytes() - bytes);
}

void fixChecksum(int header_offset, int len, bool recyclable)
void fixChecksum(int header_offset, int payload_len, bool recyclable)
{
// Compute crc of type/len/data
int header_size = recyclable ? Format::RECYCLABLE_HEADER_SIZE : Format::HEADER_SIZE;
Digest::CRC32 digest;
Format::ChecksumClass digest;

size_t crc_buff_size = header_size - Format::CHECKSUM_START_OFFSET + len;
size_t crc_buff_size = header_size - Format::CHECKSUM_START_OFFSET + payload_len;
char crc_buff[crc_buff_size];
PageUtil::readFile(wr_file, header_offset + Format::CHECKSUM_START_OFFSET, crc_buff, crc_buff_size, nullptr);

digest.update(
crc_buff,
crc_buff_size);
digest.update(crc_buff, crc_buff_size);

auto checksum = digest.checksum();
PageUtil::writeFile(wr_file, header_offset, reinterpret_cast<char *>(&checksum), sizeof(checksum), nullptr);
Expand Down Expand Up @@ -407,7 +405,7 @@ TEST_P(LogFileRWTest, BadRecordType)
TEST_P(LogFileRWTest, TruncatedTrailingRecordIsIgnored)
{
write("foo");
shrinkSize(4); // Drop all payload as well as a header byte
shrinkSize(3 + sizeof(Format::MaxRecordType)); // Drop all payload as well as a header byte
resetReader();
ASSERT_EQ("EOF", read());
// Truncated last record is ignored, not treated as an error
Expand All @@ -425,7 +423,7 @@ TEST_P(LogFileRWTest, TruncatedTrailingRecordIsNotIgnored)
}

write("foo");
shrinkSize(4); // Drop all payload as well as a header byte
shrinkSize(3 + sizeof(Format::MaxRecordType)); // Drop all payload as well as a header byte
resetReader(WALRecoveryMode::AbsoluteConsistency);
ASSERT_EQ("EOF", read());
// Truncated last record is ignored, not treated as an error
Expand All @@ -447,8 +445,8 @@ TEST_P(LogFileRWTest, BadLength)
write(repeatedString("bar", payload_size));
write("foo");
resetReader();
// Least significant size byte is stored in header[4].
incrementByte(4, 1);
// Least significant size byte is stored in header[SizePos].
incrementByte(Format::CHECKSUM_FIELD_SIZE, 1);
if (!recyclable_log)
{
ASSERT_EQ("foo", read());
Expand Down Expand Up @@ -498,11 +496,11 @@ TEST_P(LogFileRWTest, BadLengthAtEndIsNotIgnored)
TEST_P(LogFileRWTest, ChecksumMismatch)
{
write("foooooo");
incrementByte(0, 14);
incrementByte(0, Format::HEADER_SIZE + 7);
ASSERT_EQ("EOF", read());
if (!recyclable_log)
{
ASSERT_EQ(14, droppedBytes());
ASSERT_EQ(Format::HEADER_SIZE + 7, droppedBytes());
ASSERT_EQ("OK", matchError("checksum mismatch"));
}
else
Expand Down Expand Up @@ -560,7 +558,7 @@ TEST_P(LogFileRWTest, MissingLastIsIgnored)
{
write(repeatedString("bar", PS::V3::Format::BLOCK_SIZE));
// Remove the LAST block, including header.
shrinkSize(14);
shrinkSize(2 * (recyclable_log ? Format::RECYCLABLE_HEADER_SIZE : Format::HEADER_SIZE));
ASSERT_EQ("EOF", read());
ASSERT_EQ("", reportMessage());
ASSERT_EQ(0, droppedBytes());
Expand All @@ -577,7 +575,7 @@ TEST_P(LogFileRWTest, MissingLastIsNotIgnored)
resetReader(WALRecoveryMode::AbsoluteConsistency);
write(repeatedString("bar", PS::V3::Format::BLOCK_SIZE));
// Remove the LAST block, including header.
shrinkSize(14);
shrinkSize(2 * (recyclable_log ? Format::RECYCLABLE_HEADER_SIZE : Format::HEADER_SIZE));
ASSERT_EQ("EOF", read());
ASSERT_GT(droppedBytes(), 0);
ASSERT_EQ("OK", matchError("Corruption: error reading trailing data"));
Expand Down

0 comments on commit 37ff827

Please sign in to comment.