Skip to content

Commit

Permalink
Merge pull request #262 from BigVan/zfile_digest
Browse files Browse the repository at this point in the history
save digest of zfile's header/trailer and index
  • Loading branch information
yuchen0cc authored Sep 18, 2023
2 parents 3a3b2c7 + 7315c31 commit 2fb5fbf
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 12 deletions.
15 changes: 9 additions & 6 deletions src/overlaybd/zfile/format_spec.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,15 @@ The format of header is described as below. All fields are little-endian.
| :---: | :----: | :----: | :--- |
| magic0 | 0 | 8 | "ZFile\0\1" (and an implicit '\0') |
| magic1 | 8 | 16 | 74 75 6A 69, 2E 79 79 66, 40 41 6C 69, 62 61 62 61 |
| size | 24 | uint32_t | size of the header struct (108), excluding the tail padding |
| reserved| 28 | 4 | reserved space, should be 0 |
| size | 24 | uint32_t | size of the header structure, excluding the tail padding |
| digest | 28 | uint32_t | checksum for the range 28-511 bytes in header |
| flags | 32 | uint64_t | bits for flags* (see later for details) |
| index_offset | 40 | uint64_t | index offset |
| index_size | 48 | uint64_t | size of the index section, possibly compressed|
| index_size | 48 | uint64_t | size of the index section, possibly compressed base on flags |
| original_file_size | 56 | uint64_t | size of the orignal file before compression |
| reserved| 64 | 8 | reserved space, should be 0 |
| block_size | 72 | uint32_t | size of each compression block |
| index_crc | 64 | uint32_t | checksum value of index |
| reserved| 68 | 4 | reserved space, should be 0 |
| block_size| 72 | uint32_t | size of each compression block |
| algo | 76 | uint8_t | compression algorithm |
| level | 77 | uint8_t | compression level |
| use_dict| 78 | bool | whether use dictionary |
Expand All @@ -45,7 +46,9 @@ The format of header is described as below. All fields are little-endian.
| type | 1 | this is a data file (1) or index file (0) |
| sealed | 2 | this file is sealed (1) or not (0) |
| info_valid | 3 | information validity of the fields *after* flags (they were initially invalid (0) after creation; and readers must resort to trailer when they meet such headers) |
| reserved | 4~63 | reserved for future use; must be 0s |
| digest | 4 | the digest of this header/trailer has been recorded in the digest field |
| index_comperssion | 5 | whether the index has been compressed(1) or not(0) |
| reserved | 6~63 | reserved for future use; must be 0s |


## index
Expand Down
32 changes: 32 additions & 0 deletions src/overlaybd/zfile/test/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,38 @@ TEST_F(ZFileTest, validation_check) {
EXPECT_NE(zfile_validation_check(fdst.get()), 0);
}

TEST_F(ZFileTest, ht_check) {
// log_output_level = 1;
auto fn_src = "verify.data";
auto fn_zfile = "verify.zfile";
auto src = lfs->open(fn_src, O_CREAT | O_TRUNC | O_RDWR /*| O_DIRECT */, 0644);
unique_ptr<IFile> fsrc(src);
if (!fsrc) {
LOG_ERROR("err: `(`)", errno, strerror(errno));
}
randwrite(fsrc.get(), 1024);
struct stat _st;
if (fsrc->fstat(&_st) != 0) {
LOG_ERROR("err: `(`)", errno, strerror(errno));
return;
}
auto dst = lfs->open(fn_zfile, O_CREAT | O_TRUNC | O_RDWR /*| O_DIRECT */, 0644);
unique_ptr<IFile> fdst(dst);
if (!fdst) {
LOG_ERROR("err: `(`)", errno, strerror(errno));
}
CompressOptions opt;
opt.algo = CompressOptions::LZ4;
opt.verify = 1;
CompressArgs args(opt);
int ret = zfile_compress(fsrc.get(), fdst.get(), &args);
EXPECT_EQ(ret, 0);
auto x=2324;
dst->pwrite(&x, sizeof(x), 400);
EXPECT_NE(zfile_validation_check(fdst.get()), 0);
EXPECT_EQ(is_zfile(dst), -1);
}

TEST_F(ZFileTest, dsa) {
const int buf_size = 1024;
const int crc_count = 3000;
Expand Down
53 changes: 47 additions & 6 deletions src/overlaybd/zfile/zfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,16 @@ class CompressionFile : public VirtualReadOnlyFile {

// offset 24, 28, 32
uint32_t size = sizeof(HeaderTrailer);
uint32_t __padding = 0;
// uint32_t __padding = 0;
uint32_t digest = 0;
uint64_t flags;

static const uint32_t FLAG_SHIFT_HEADER = 0; // 1:header 0:trailer
static const uint32_t FLAG_SHIFT_TYPE = 1; // 1:data file, 0:index file
static const uint32_t FLAG_SHIFT_SEALED = 2; // 1:YES, 0:NO
static const uint32_t FLAG_SHIFT_HEADER_OVERWRITE = 3;
static const uint32_t FLAG_SHIFT_HEADER_OVERWRITE = 3; // overwrite trailer info to header
static const uint32_t FLAG_SHIFT_CALC_DIGEST = 4; // caculate digest for zfile header/trailer and jumptable
static const uint32_t FLAG_SHIFT_IDX_COMP = 5; // compress zfile index(jumptable)

uint32_t get_flag_bit(uint32_t shift) const {
return flags & (1 << shift);
Expand Down Expand Up @@ -121,6 +124,17 @@ class CompressionFile : public VirtualReadOnlyFile {
bool is_sealed() const {
return get_flag_bit(FLAG_SHIFT_SEALED);
}
bool is_digest_enabled() {
return get_flag_bit(FLAG_SHIFT_CALC_DIGEST);
}
bool is_valid() {
if (!is_digest_enabled()) return true;
auto saved_crc = this->digest;
this->digest = 0;
DEFER(this->digest = saved_crc;);
auto crc = crc32::crc32c(this, CompressionFile::HeaderTrailer::SPACE);
return crc == saved_crc;
}
void set_header() {
set_flag_bit(FLAG_SHIFT_HEADER);
}
Expand All @@ -143,6 +157,14 @@ class CompressionFile : public VirtualReadOnlyFile {
set_flag_bit(FLAG_SHIFT_HEADER_OVERWRITE);
}

void set_digest_enable() {
set_flag_bit(FLAG_SHIFT_CALC_DIGEST);
}

void set_compress_index() {
set_flag_bit(FLAG_SHIFT_IDX_COMP);
}

void set_compress_option(const CompressOptions &opt) {
this->opt = opt;
}
Expand All @@ -151,7 +173,8 @@ class CompressionFile : public VirtualReadOnlyFile {
uint64_t index_offset; // in bytes
uint64_t index_size; // # of SegmentMappings
uint64_t original_file_size;
uint64_t reserved_0;
uint32_t index_crc;
uint32_t reserved_0;
// offset 72
CompressOptions opt;

Expand Down Expand Up @@ -591,6 +614,7 @@ class ZFileBuilder : public VirtualReadOnlyFile {
LOG_ERRNO_RETURN(0, -1, "failed to write index.");
}
auto pht = (CompressionFile::HeaderTrailer *)m_ht;
pht->index_crc = crc32::crc32c(&m_block_len[0], index_bytes);
pht->index_offset = index_offset;
pht->index_size = index_size;
pht->original_file_size = raw_data_size;
Expand Down Expand Up @@ -681,7 +705,9 @@ bool load_jump_table(IFile *file, CompressionFile::HeaderTrailer *pheader_traile
if (!pht->verify_magic() || !pht->is_header()) {
LOG_ERROR_RETURN(0, false, "header magic/type don't match");
}

if (pht->is_valid() == false) {
LOG_ERROR_RETURN(0, false, "digest verification failed.");
}
struct stat stat;
ret = file->fstat(&stat);
if (ret < 0) {
Expand Down Expand Up @@ -727,12 +753,20 @@ bool load_jump_table(IFile *file, CompressionFile::HeaderTrailer *pheader_traile
if (ret < (ssize_t)index_bytes) {
LOG_ERRNO_RETURN(0, false, "failed to read index");
}
if (pht->is_digest_enabled()) {
LOG_INFO("check jumptable CRC32 (` expected)", pht->index_crc);
auto crc = crc32::crc32c(ibuf.get(), index_bytes);
if (crc != pht->index_crc) {
LOG_ERRNO_RETURN(0, false, "checksum of jumptable is incorrect");
}
}
ret = jump_table.build(ibuf.get(), pht->index_size,
CompressionFile::HeaderTrailer::SPACE + pht->opt.dict_size,
pht->opt.block_size, pht->opt.verify);
if (ret != 0) {
LOG_ERRNO_RETURN(0, false, "failed to build jump table");
}

if (pheader_trailer)
*pheader_trailer = *pht;
return true;
Expand Down Expand Up @@ -793,7 +827,10 @@ static int write_header_trailer(IFile *file, bool is_header, bool is_sealed, boo
if (offset != -1)
pht->set_header_overwrite();

LOG_INFO("pht->opt.dict_size: `", pht->opt.dict_size);
pht->set_digest_enable(); // by default
pht->digest = 0;
pht->digest = crc32::crc32c(pht, CompressionFile::HeaderTrailer::SPACE);
LOG_INFO("save header/trailer with digest: `", pht->digest);
if (offset == -1) {
return (int)file->write(pht, CompressionFile::HeaderTrailer::SPACE);
}
Expand All @@ -818,7 +855,6 @@ int zfile_compress(IFile *file, IFile *as, const CompressArgs *args) {
char buf[CompressionFile::HeaderTrailer::SPACE] = {};
auto pht = new (buf) CompressionFile::HeaderTrailer;
pht->set_compress_option(opt);

LOG_INFO("write header.");
auto ret = write_header_trailer(as, true, false, true, pht);
if (ret < 0) {
Expand Down Expand Up @@ -890,6 +926,7 @@ int zfile_compress(IFile *file, IFile *as, const CompressArgs *args) {
if (as->write(&block_len[0], index_bytes) != index_bytes) {
LOG_ERRNO_RETURN(0, -1, "failed to write index.");
}
pht->index_crc = crc32::crc32c(&block_len[0], index_bytes);
pht->index_offset = index_offset;
pht->index_size = index_size;
pht->original_file_size = raw_data_size;
Expand Down Expand Up @@ -968,6 +1005,10 @@ int is_zfile(IFile *file) {
LOG_DEBUG("file: ` is not a zfile object", file);
return 0;
}
if (!pht->is_valid()) {
LOG_ERRNO_RETURN(0, -1,
"file: ` is a zfile object but verify digest failed.", file);
}
LOG_DEBUG("file: ` is a zfile object", file);
return 1;
}
Expand Down

0 comments on commit 2fb5fbf

Please sign in to comment.