diff --git a/db/compaction/compaction_job_test.cc b/db/compaction/compaction_job_test.cc index 11a757fd68d..bbc0fe4cf3d 100644 --- a/db/compaction/compaction_job_test.cc +++ b/db/compaction/compaction_job_test.cc @@ -552,7 +552,8 @@ class CompactionJobTestBase : public testing::Test { /*db_id=*/"", /*db_session_id=*/"", /*daily_offpeak_time_utc=*/"", /*error_handler=*/nullptr, /*read_only=*/false)); compaction_job_stats_.Reset(); - ASSERT_OK(SetIdentityFile(WriteOptions(), env_, dbname_)); + ASSERT_OK( + SetIdentityFile(WriteOptions(), env_, dbname_, Temperature::kUnknown)); VersionEdit new_db; new_db.SetLogNumber(0); @@ -575,7 +576,8 @@ class CompactionJobTestBase : public testing::Test { } ASSERT_OK(s); // Make "CURRENT" file that points to the new manifest file. - s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr); + s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, + Temperature::kUnknown, nullptr); ASSERT_OK(s); diff --git a/db/db_impl/db_impl_files.cc b/db/db_impl/db_impl_files.cc index dd4bf411cd8..0db7293682d 100644 --- a/db/db_impl/db_impl_files.cc +++ b/db/db_impl/db_impl_files.cc @@ -970,7 +970,9 @@ Status DBImpl::SetupDBId(const WriteOptions& write_options, bool read_only, } // Persist it to IDENTITY file if allowed if (!read_only) { - s = SetIdentityFile(write_options, env_, dbname_, db_id_); + s = SetIdentityFile(write_options, env_, dbname_, + immutable_db_options_.metadata_write_temperature, + db_id_); } return s; } diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index 8ef024e7ba8..a58a142d712 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -295,7 +295,8 @@ Status DBImpl::ValidateOptions(const DBOptions& db_options) { Status DBImpl::NewDB(std::vector* new_filenames) { VersionEdit new_db; const WriteOptions write_options(Env::IOActivity::kDBOpen); - Status s = SetIdentityFile(write_options, env_, dbname_); + Status s = SetIdentityFile(write_options, env_, dbname_, + immutable_db_options_.metadata_write_temperature); if (!s.ok()) { return s; } @@ -319,6 +320,12 @@ Status DBImpl::NewDB(std::vector* new_filenames) { } std::unique_ptr file; FileOptions file_options = fs_->OptimizeForManifestWrite(file_options_); + // DB option takes precedence when not kUnknown + if (immutable_db_options_.metadata_write_temperature != + Temperature::kUnknown) { + file_options.temperature = + immutable_db_options_.metadata_write_temperature; + } s = NewWritableFile(fs_.get(), manifest, &file, file_options); if (!s.ok()) { return s; @@ -344,6 +351,7 @@ Status DBImpl::NewDB(std::vector* new_filenames) { if (s.ok()) { // Make "CURRENT" file that points to the new manifest file. s = SetCurrentFile(write_options, fs_.get(), dbname_, 1, + immutable_db_options_.metadata_write_temperature, directories_.GetDbDir()); if (new_filenames) { new_filenames->emplace_back( @@ -1936,6 +1944,10 @@ IOStatus DBImpl::CreateWAL(const WriteOptions& write_options, BuildDBOptions(immutable_db_options_, mutable_db_options_); FileOptions opt_file_options = fs_->OptimizeForLogWrite(file_options_, db_options); + // DB option takes precedence when not kUnknown + if (immutable_db_options_.wal_write_temperature != Temperature::kUnknown) { + opt_file_options.temperature = immutable_db_options_.wal_write_temperature; + } std::string wal_dir = immutable_db_options_.GetWalDir(); std::string log_fname = LogFileName(wal_dir, log_file_num); diff --git a/db/db_test2.cc b/db/db_test2.cc index e6a3adf9b8b..f380144c6ab 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "db/db_test_util.h" @@ -26,6 +27,7 @@ #include "rocksdb/utilities/replayer.h" #include "rocksdb/wal_filter.h" #include "test_util/testutil.h" +#include "util/defer.h" #include "util/random.h" #include "utilities/fault_injection_env.h" @@ -6544,6 +6546,234 @@ TEST_P(RenameCurrentTest, Compaction) { ASSERT_EQ("d_value", Get("d")); } +TEST_F(DBTest2, VariousFileTemperatures) { + constexpr size_t kNumberFileTypes = static_cast(kBlobFile) + 1U; + + struct MyTestFS : public FileTemperatureTestFS { + explicit MyTestFS(const std::shared_ptr& fs) + : FileTemperatureTestFS(fs) { + Reset(); + } + + IOStatus NewWritableFile(const std::string& fname, const FileOptions& opts, + std::unique_ptr* result, + IODebugContext* dbg) override { + IOStatus ios = + FileTemperatureTestFS::NewWritableFile(fname, opts, result, dbg); + if (ios.ok()) { + uint64_t number; + FileType type; + if (ParseFileName(GetFileName(fname), &number, "LOG", &type)) { + if (type == kTableFile) { + // Not checked here + } else if (type == kWalFile) { + if (opts.temperature != expected_wal_temperature) { + std::cerr << "Attempt to open " << fname << " with temperature " + << temperature_to_string[opts.temperature] + << " rather than " + << temperature_to_string[expected_wal_temperature] + << std::endl; + assert(false); + } + } else if (type == kDescriptorFile) { + if (opts.temperature != expected_manifest_temperature) { + std::cerr << "Attempt to open " << fname << " with temperature " + << temperature_to_string[opts.temperature] + << " rather than " + << temperature_to_string[expected_wal_temperature] + << std::endl; + assert(false); + } + } else if (opts.temperature != expected_other_metadata_temperature) { + std::cerr << "Attempt to open " << fname << " with temperature " + << temperature_to_string[opts.temperature] + << " rather than " + << temperature_to_string[expected_wal_temperature] + << std::endl; + assert(false); + } + UpdateCount(type, 1); + } + } + return ios; + } + + IOStatus RenameFile(const std::string& src, const std::string& dst, + const IOOptions& options, + IODebugContext* dbg) override { + IOStatus ios = FileTemperatureTestFS::RenameFile(src, dst, options, dbg); + if (ios.ok()) { + uint64_t number; + FileType src_type; + FileType dst_type; + assert(ParseFileName(GetFileName(src), &number, "LOG", &src_type)); + assert(ParseFileName(GetFileName(dst), &number, "LOG", &dst_type)); + + UpdateCount(src_type, -1); + UpdateCount(dst_type, 1); + } + return ios; + } + + void UpdateCount(FileType type, int delta) { + size_t i = static_cast(type); + assert(i < kNumberFileTypes); + counts[i].FetchAddRelaxed(delta); + } + + std::map PopCounts() { + std::map ret; + for (size_t i = 0; i < kNumberFileTypes; ++i) { + int c = counts[i].ExchangeRelaxed(0); + if (c > 0) { + ret[static_cast(i)] = c; + } + } + return ret; + } + + FileOptions OptimizeForLogWrite( + const FileOptions& file_options, + const DBOptions& /*db_options*/) const override { + FileOptions opts = file_options; + if (optimize_wal_temperature != Temperature::kUnknown) { + opts.temperature = optimize_wal_temperature; + } + return opts; + } + + FileOptions OptimizeForManifestWrite( + const FileOptions& file_options) const override { + FileOptions opts = file_options; + if (optimize_manifest_temperature != Temperature::kUnknown) { + opts.temperature = optimize_manifest_temperature; + } + return opts; + } + + void Reset() { + optimize_manifest_temperature = Temperature::kUnknown; + optimize_wal_temperature = Temperature::kUnknown; + expected_manifest_temperature = Temperature::kUnknown; + expected_other_metadata_temperature = Temperature::kUnknown; + expected_wal_temperature = Temperature::kUnknown; + for (auto& c : counts) { + c.StoreRelaxed(0); + } + } + + Temperature optimize_manifest_temperature; + Temperature optimize_wal_temperature; + Temperature expected_manifest_temperature; + Temperature expected_other_metadata_temperature; + Temperature expected_wal_temperature; + std::array, kNumberFileTypes> counts; + }; + + // We don't have enough non-unknown temps to confidently distinguish that + // a specific setting caused a specific outcome, in a single run. This is a + // reasonable work-around without blowing up test time. Only returns + // non-unknown temperatures. + auto RandomTemp = [] { + static std::vector temps = { + Temperature::kHot, Temperature::kWarm, Temperature::kCold}; + return temps[Random::GetTLSInstance()->Uniform( + static_cast(temps.size()))]; + }; + + auto test_fs = std::make_shared(env_->GetFileSystem()); + std::unique_ptr env(new CompositeEnvWrapper(env_, test_fs)); + for (bool use_optimize : {false, true}) { + std::cerr << "use_optimize: " << std::to_string(use_optimize) << std::endl; + for (bool use_temp_options : {false, true}) { + std::cerr << "use_temp_options: " << std::to_string(use_temp_options) + << std::endl; + + Options options = CurrentOptions(); + // Currently require for last level temperature + options.compaction_style = kCompactionStyleUniversal; + options.env = env.get(); + test_fs->Reset(); + if (use_optimize) { + test_fs->optimize_manifest_temperature = RandomTemp(); + test_fs->expected_manifest_temperature = + test_fs->optimize_manifest_temperature; + test_fs->optimize_wal_temperature = RandomTemp(); + test_fs->expected_wal_temperature = test_fs->optimize_wal_temperature; + } + if (use_temp_options) { + options.metadata_write_temperature = RandomTemp(); + test_fs->expected_manifest_temperature = + options.metadata_write_temperature; + test_fs->expected_other_metadata_temperature = + options.metadata_write_temperature; + options.wal_write_temperature = RandomTemp(); + test_fs->expected_wal_temperature = options.wal_write_temperature; + options.last_level_temperature = RandomTemp(); + options.default_write_temperature = RandomTemp(); + } + + DestroyAndReopen(options); + Defer closer([&] { Close(); }); + + using FTC = std::map; + // Files on DB startup + ASSERT_EQ(test_fs->PopCounts(), FTC({{kWalFile, 1}, + {kDescriptorFile, 2}, + {kCurrentFile, 2}, + {kIdentityFile, 1}, + {kOptionsFile, 1}})); + + // Temperature count map + using TCM = std::map; + ASSERT_EQ(test_fs->CountCurrentSstFilesByTemp(), TCM({})); + + ASSERT_OK(Put("foo", "1")); + ASSERT_OK(Put("bar", "1")); + ASSERT_OK(Flush()); + ASSERT_OK(Put("foo", "2")); + ASSERT_OK(Put("bar", "2")); + ASSERT_OK(Flush()); + + ASSERT_EQ(test_fs->CountCurrentSstFilesByTemp(), + TCM({{options.default_write_temperature, 2}})); + + ASSERT_OK(db_->CompactRange({}, nullptr, nullptr)); + + ASSERT_EQ(test_fs->CountCurrentSstFilesByTemp(), + TCM({{options.last_level_temperature, 1}})); + + ASSERT_OK(Put("foo", "3")); + ASSERT_OK(Put("bar", "3")); + ASSERT_OK(Flush()); + + // Just in memtable/WAL + ASSERT_OK(Put("dog", "3")); + + { + TCM expected; + expected[options.default_write_temperature] += 1; + expected[options.last_level_temperature] += 1; + ASSERT_EQ(test_fs->CountCurrentSstFilesByTemp(), expected); + } + + // New files during operation + ASSERT_EQ(test_fs->PopCounts(), FTC({{kWalFile, 3}, {kTableFile, 4}})); + + Reopen(options); + + // New files during re-open/recovery + ASSERT_EQ(test_fs->PopCounts(), FTC({{kWalFile, 1}, + {kTableFile, 1}, + {kDescriptorFile, 1}, + {kCurrentFile, 1}, + {kOptionsFile, 1}})); + + Destroy(options); + } + } +} + TEST_F(DBTest2, LastLevelTemperature) { class TestListener : public EventListener { public: diff --git a/db/db_test_util.h b/db/db_test_util.h index 47b1667eac1..36a4615344b 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -831,6 +831,15 @@ class FileTemperatureTestFS : public FileSystemWrapper { return count; } + std::map CountCurrentSstFilesByTemp() { + MutexLock lock(&mu_); + std::map ret; + for (const auto& e : current_sst_file_temperatures_) { + ret[e.second]++; + } + return ret; + } + void OverrideSstFileTemperature(uint64_t number, Temperature temp) { MutexLock lock(&mu_); current_sst_file_temperatures_[number] = temp; @@ -842,7 +851,7 @@ class FileTemperatureTestFS : public FileSystemWrapper { requested_sst_file_temperatures_; std::map current_sst_file_temperatures_; - std::string GetFileName(const std::string& fname) { + static std::string GetFileName(const std::string& fname) { auto filename = fname.substr(fname.find_last_of(kFilePathSeparator) + 1); // workaround only for Windows that the file path could contain both Windows // FilePathSeparator and '/' diff --git a/db/flush_job_test.cc b/db/flush_job_test.cc index 3ffb77d5378..d407e4815f7 100644 --- a/db/flush_job_test.cc +++ b/db/flush_job_test.cc @@ -68,7 +68,8 @@ class FlushJobTestBase : public testing::Test { } void NewDB() { - ASSERT_OK(SetIdentityFile(WriteOptions(), env_, dbname_)); + ASSERT_OK( + SetIdentityFile(WriteOptions(), env_, dbname_, Temperature::kUnknown)); VersionEdit new_db; new_db.SetLogNumber(0); @@ -114,7 +115,8 @@ class FlushJobTestBase : public testing::Test { } ASSERT_OK(s); // Make "CURRENT" file that points to the new manifest file. - s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr); + s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, + Temperature::kUnknown, nullptr); ASSERT_OK(s); } diff --git a/db/version_set.cc b/db/version_set.cc index fec847630a1..e81165a3d2e 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -5511,6 +5511,10 @@ Status VersionSet::ProcessManifestWrites( std::unique_ptr new_desc_log_ptr; { FileOptions opt_file_opts = fs_->OptimizeForManifestWrite(file_options_); + // DB option (in file_options_) takes precedence when not kUnknown + if (file_options_.temperature != Temperature::kUnknown) { + opt_file_opts.temperature = file_options_.temperature; + } mu->Unlock(); TEST_SYNC_POINT("VersionSet::LogAndApply:WriteManifestStart"); TEST_SYNC_POINT_CALLBACK("VersionSet::LogAndApply:WriteManifest", nullptr); @@ -5637,9 +5641,9 @@ Status VersionSet::ProcessManifestWrites( assert(manifest_io_status.ok()); } if (s.ok() && new_descriptor_log) { - io_s = SetCurrentFile(write_options, fs_.get(), dbname_, - pending_manifest_file_number_, - dir_contains_current_file); + io_s = SetCurrentFile( + write_options, fs_.get(), dbname_, pending_manifest_file_number_, + file_options_.temperature, dir_contains_current_file); if (!io_s.ok()) { s = io_s; // Quarantine old manifest file in case new manifest file's CURRENT file diff --git a/db/version_set_test.cc b/db/version_set_test.cc index 94ee5f2e574..4f3665fba66 100644 --- a/db/version_set_test.cc +++ b/db/version_set_test.cc @@ -1415,16 +1415,22 @@ class VersionSetTestBase { } } + void CreateCurrentFile() { + // Make "CURRENT" file point to the new manifest file. + ASSERT_OK(SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, + Temperature::kUnknown, + /* dir_contains_current_file */ nullptr)); + } + // Create DB with 3 column families. void NewDB() { SequenceNumber last_seqno; std::unique_ptr log_writer; - ASSERT_OK(SetIdentityFile(WriteOptions(), env_, dbname_)); + ASSERT_OK( + SetIdentityFile(WriteOptions(), env_, dbname_, Temperature::kUnknown)); PrepareManifest(&column_families_, &last_seqno, &log_writer); log_writer.reset(); - // Make "CURRENT" file point to the new manifest file. - Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr); - ASSERT_OK(s); + CreateCurrentFile(); EXPECT_OK(versions_->Recover(column_families_, false)); EXPECT_EQ(column_families_.size(), @@ -2600,7 +2606,7 @@ class VersionSetAtomicGroupTest : public VersionSetTestBase, edits_[i].MarkAtomicGroup(--remaining); edits_[i].SetLastSequence(last_seqno_++); } - ASSERT_OK(SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr)); + CreateCurrentFile(); } void SetupIncompleteTrailingAtomicGroup(int atomic_group_size) { @@ -2612,7 +2618,7 @@ class VersionSetAtomicGroupTest : public VersionSetTestBase, edits_[i].MarkAtomicGroup(--remaining); edits_[i].SetLastSequence(last_seqno_++); } - ASSERT_OK(SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr)); + CreateCurrentFile(); } void SetupCorruptedAtomicGroup(int atomic_group_size) { @@ -2626,7 +2632,7 @@ class VersionSetAtomicGroupTest : public VersionSetTestBase, } edits_[i].SetLastSequence(last_seqno_++); } - ASSERT_OK(SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr)); + CreateCurrentFile(); } void SetupIncorrectAtomicGroup(int atomic_group_size) { @@ -2642,7 +2648,7 @@ class VersionSetAtomicGroupTest : public VersionSetTestBase, } edits_[i].SetLastSequence(last_seqno_++); } - ASSERT_OK(SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr)); + CreateCurrentFile(); } void SetupTestSyncPoints() { @@ -3408,8 +3414,7 @@ TEST_P(VersionSetTestDropOneCF, HandleDroppedColumnFamilyInAtomicGroup) { SequenceNumber last_seqno; std::unique_ptr log_writer; PrepareManifest(&column_families, &last_seqno, &log_writer); - Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr); - ASSERT_OK(s); + CreateCurrentFile(); EXPECT_OK(versions_->Recover(column_families, false /* read_only */)); EXPECT_EQ(column_families.size(), @@ -3431,7 +3436,7 @@ TEST_P(VersionSetTestDropOneCF, HandleDroppedColumnFamilyInAtomicGroup) { cfd_to_drop->Ref(); drop_cf_edit.SetColumnFamily(cfd_to_drop->GetID()); mutex_.Lock(); - s = versions_->LogAndApply( + Status s = versions_->LogAndApply( cfd_to_drop, *cfd_to_drop->GetLatestMutableCFOptions(), read_options, write_options, &drop_cf_edit, &mutex_, nullptr); mutex_.Unlock(); @@ -3541,9 +3546,7 @@ class EmptyDefaultCfNewManifest : public VersionSetTestBase, TEST_F(EmptyDefaultCfNewManifest, Recover) { PrepareManifest(nullptr, nullptr, &log_writer_); log_writer_.reset(); - Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, - /* dir_contains_current_file */ nullptr); - ASSERT_OK(s); + CreateCurrentFile(); std::string manifest_path; VerifyManifest(&manifest_path); std::vector column_families; @@ -3552,7 +3555,7 @@ TEST_F(EmptyDefaultCfNewManifest, Recover) { cf_options_); std::string db_id; bool has_missing_table_file = false; - s = versions_->TryRecoverFromOneManifest( + Status s = versions_->TryRecoverFromOneManifest( manifest_path, column_families, false, &db_id, &has_missing_table_file); ASSERT_OK(s); ASSERT_FALSE(has_missing_table_file); @@ -3573,7 +3576,8 @@ class VersionSetTestEmptyDb assert(nullptr != log_writer); VersionEdit new_db; if (db_options_.write_dbid_to_manifest) { - ASSERT_OK(SetIdentityFile(WriteOptions(), env_, dbname_)); + ASSERT_OK(SetIdentityFile(WriteOptions(), env_, dbname_, + Temperature::kUnknown)); DBOptions tmp_db_options; tmp_db_options.env = env_; std::unique_ptr impl(new DBImpl(tmp_db_options, dbname_)); @@ -3606,9 +3610,7 @@ TEST_P(VersionSetTestEmptyDb, OpenFromIncompleteManifest0) { db_options_.write_dbid_to_manifest = std::get<0>(GetParam()); PrepareManifest(nullptr, nullptr, &log_writer_); log_writer_.reset(); - Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, - /* dir_contains_current_file */ nullptr); - ASSERT_OK(s); + CreateCurrentFile(); std::string manifest_path; VerifyManifest(&manifest_path); @@ -3623,9 +3625,9 @@ TEST_P(VersionSetTestEmptyDb, OpenFromIncompleteManifest0) { std::string db_id; bool has_missing_table_file = false; - s = versions_->TryRecoverFromOneManifest(manifest_path, column_families, - read_only, &db_id, - &has_missing_table_file); + Status s = versions_->TryRecoverFromOneManifest( + manifest_path, column_families, read_only, &db_id, + &has_missing_table_file); auto iter = std::find(cf_names.begin(), cf_names.end(), kDefaultColumnFamilyName); if (iter == cf_names.end()) { @@ -3651,9 +3653,7 @@ TEST_P(VersionSetTestEmptyDb, OpenFromIncompleteManifest1) { ASSERT_OK(s); } log_writer_.reset(); - s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, - /* dir_contains_current_file */ nullptr); - ASSERT_OK(s); + CreateCurrentFile(); std::string manifest_path; VerifyManifest(&manifest_path); @@ -3699,9 +3699,7 @@ TEST_P(VersionSetTestEmptyDb, OpenFromInCompleteManifest2) { ASSERT_OK(s); } log_writer_.reset(); - s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, - /* dir_contains_current_file */ nullptr); - ASSERT_OK(s); + CreateCurrentFile(); std::string manifest_path; VerifyManifest(&manifest_path); @@ -3758,9 +3756,7 @@ TEST_P(VersionSetTestEmptyDb, OpenManifestWithUnknownCF) { ASSERT_OK(s); } log_writer_.reset(); - s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, - /* dir_contains_current_file */ nullptr); - ASSERT_OK(s); + CreateCurrentFile(); std::string manifest_path; VerifyManifest(&manifest_path); @@ -3816,9 +3812,7 @@ TEST_P(VersionSetTestEmptyDb, OpenCompleteManifest) { ASSERT_OK(s); } log_writer_.reset(); - s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, - /* dir_contains_current_file */ nullptr); - ASSERT_OK(s); + CreateCurrentFile(); std::string manifest_path; VerifyManifest(&manifest_path); @@ -4025,15 +4019,14 @@ TEST_F(VersionSetTestMissingFiles, ManifestFarBehindSst) { WriteFileAdditionAndDeletionToManifest( /*cf=*/0, std::vector>(), deleted_files); log_writer_.reset(); - Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr); - ASSERT_OK(s); + CreateCurrentFile(); std::string manifest_path; VerifyManifest(&manifest_path); std::string db_id; bool has_missing_table_file = false; - s = versions_->TryRecoverFromOneManifest(manifest_path, column_families_, - /*read_only=*/false, &db_id, - &has_missing_table_file); + Status s = versions_->TryRecoverFromOneManifest( + manifest_path, column_families_, + /*read_only=*/false, &db_id, &has_missing_table_file); ASSERT_OK(s); ASSERT_TRUE(has_missing_table_file); for (ColumnFamilyData* cfd : *(versions_->GetColumnFamilySet())) { @@ -4083,15 +4076,14 @@ TEST_F(VersionSetTestMissingFiles, ManifestAheadofSst) { WriteFileAdditionAndDeletionToManifest( /*cf=*/0, added_files, std::vector>()); log_writer_.reset(); - Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr); - ASSERT_OK(s); + CreateCurrentFile(); std::string manifest_path; VerifyManifest(&manifest_path); std::string db_id; bool has_missing_table_file = false; - s = versions_->TryRecoverFromOneManifest(manifest_path, column_families_, - /*read_only=*/false, &db_id, - &has_missing_table_file); + Status s = versions_->TryRecoverFromOneManifest( + manifest_path, column_families_, + /*read_only=*/false, &db_id, &has_missing_table_file); ASSERT_OK(s); ASSERT_TRUE(has_missing_table_file); for (ColumnFamilyData* cfd : *(versions_->GetColumnFamilySet())) { @@ -4137,15 +4129,14 @@ TEST_F(VersionSetTestMissingFiles, NoFileMissing) { WriteFileAdditionAndDeletionToManifest( /*cf=*/0, std::vector>(), deleted_files); log_writer_.reset(); - Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr); - ASSERT_OK(s); + CreateCurrentFile(); std::string manifest_path; VerifyManifest(&manifest_path); std::string db_id; bool has_missing_table_file = false; - s = versions_->TryRecoverFromOneManifest(manifest_path, column_families_, - /*read_only=*/false, &db_id, - &has_missing_table_file); + Status s = versions_->TryRecoverFromOneManifest( + manifest_path, column_families_, + /*read_only=*/false, &db_id, &has_missing_table_file); ASSERT_OK(s); ASSERT_FALSE(has_missing_table_file); for (ColumnFamilyData* cfd : *(versions_->GetColumnFamilySet())) { @@ -4266,15 +4257,14 @@ class BestEffortsRecoverIncompleteVersionTest /*cf=*/0, added_files, std::vector>(), blob_files); log_writer_.reset(); - Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr); - ASSERT_OK(s); + CreateCurrentFile(); std::string manifest_path; VerifyManifest(&manifest_path); std::string db_id; bool has_missing_table_file = false; - s = versions_->TryRecoverFromOneManifest(manifest_path, column_families_, - /*read_only=*/false, &db_id, - &has_missing_table_file); + Status s = versions_->TryRecoverFromOneManifest( + manifest_path, column_families_, + /*read_only=*/false, &db_id, &has_missing_table_file); ASSERT_OK(s); ASSERT_TRUE(has_missing_table_file); } diff --git a/env/file_system.cc b/env/file_system.cc index 27c7207f0f5..1f02f7a7eeb 100644 --- a/env/file_system.cc +++ b/env/file_system.cc @@ -181,10 +181,10 @@ FileOptions FileSystem::OptimizeForBlobFileRead( IOStatus WriteStringToFile(FileSystem* fs, const Slice& data, const std::string& fname, bool should_sync, - const IOOptions& io_options) { + const IOOptions& io_options, + const FileOptions& file_options) { std::unique_ptr file; - EnvOptions soptions; - IOStatus s = fs->NewWritableFile(fname, soptions, &file, nullptr); + IOStatus s = fs->NewWritableFile(fname, file_options, &file, nullptr); if (!s.ok()) { return s; } diff --git a/file/filename.cc b/file/filename.cc index b34a0e113e8..45cbf9d76a9 100644 --- a/file/filename.cc +++ b/file/filename.cc @@ -388,6 +388,7 @@ bool ParseFileName(const std::string& fname, uint64_t* number, IOStatus SetCurrentFile(const WriteOptions& write_options, FileSystem* fs, const std::string& dbname, uint64_t descriptor_number, + Temperature temp, FSDirectory* dir_contains_current_file) { // Remove leading "dbname/" and add newline to manifest file name std::string manifest = DescriptorFileName(dbname, descriptor_number); @@ -397,8 +398,11 @@ IOStatus SetCurrentFile(const WriteOptions& write_options, FileSystem* fs, std::string tmp = TempFileName(dbname, descriptor_number); IOOptions opts; IOStatus s = PrepareIOFromWriteOptions(write_options, opts); + FileOptions file_opts; + file_opts.temperature = temp; if (s.ok()) { - s = WriteStringToFile(fs, contents.ToString() + "\n", tmp, true, opts); + s = WriteStringToFile(fs, contents.ToString() + "\n", tmp, true, opts, + file_opts); } TEST_SYNC_POINT_CALLBACK("SetCurrentFile:BeforeRename", &s); if (s.ok()) { @@ -423,7 +427,8 @@ IOStatus SetCurrentFile(const WriteOptions& write_options, FileSystem* fs, } Status SetIdentityFile(const WriteOptions& write_options, Env* env, - const std::string& dbname, const std::string& db_id) { + const std::string& dbname, Temperature temp, + const std::string& db_id) { std::string id; if (db_id.empty()) { id = env->GenerateUniqueId(); @@ -437,8 +442,11 @@ Status SetIdentityFile(const WriteOptions& write_options, Env* env, Status s; IOOptions opts; s = PrepareIOFromWriteOptions(write_options, opts); + FileOptions file_opts; + file_opts.temperature = temp; if (s.ok()) { - s = WriteStringToFile(env, id, tmp, true, &opts); + s = WriteStringToFile(env->GetFileSystem().get(), id, tmp, + /*should_sync=*/true, opts, file_opts); } if (s.ok()) { s = env->RenameFile(tmp, identify_file_name); diff --git a/file/filename.h b/file/filename.h index 56bbd78d555..5a52c745ac6 100644 --- a/file/filename.h +++ b/file/filename.h @@ -161,11 +161,12 @@ bool ParseFileName(const std::string& filename, uint64_t* number, // when IOStatus SetCurrentFile(const WriteOptions& write_options, FileSystem* fs, const std::string& dbname, uint64_t descriptor_number, + Temperature temp, FSDirectory* dir_contains_current_file); // Make the IDENTITY file for the db Status SetIdentityFile(const WriteOptions& write_options, Env* env, - const std::string& dbname, + const std::string& dbname, Temperature temp, const std::string& db_id = {}); // Sync manifest file `file`. diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index 11f971c2426..3761923ce7e 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -813,7 +813,7 @@ struct AdvancedColumnFamilyOptions { // If this option is set, when creating the last level files, pass this // temperature to FileSystem used. Should be no-op for default FileSystem // and users need to plug in their own FileSystem to take advantage of it. - // When using FIFO compaction, this option is ignored. + // Currently only compatible with universal compaction. // // Dynamically changeable through the SetOptions() API Temperature last_level_temperature = Temperature::kUnknown; diff --git a/include/rocksdb/file_system.h b/include/rocksdb/file_system.h index 8d21c919466..042b38305ca 100644 --- a/include/rocksdb/file_system.h +++ b/include/rocksdb/file_system.h @@ -195,7 +195,9 @@ struct FileOptions : EnvOptions { FileOptions() : EnvOptions(), handoff_checksum_type(ChecksumType::kCRC32c) {} FileOptions(const DBOptions& opts) - : EnvOptions(opts), handoff_checksum_type(ChecksumType::kCRC32c) {} + : EnvOptions(opts), + temperature(opts.metadata_write_temperature), + handoff_checksum_type(ChecksumType::kCRC32c) {} FileOptions(const EnvOptions& opts) : EnvOptions(opts), handoff_checksum_type(ChecksumType::kCRC32c) {} @@ -1952,7 +1954,8 @@ class FSDirectoryWrapper : public FSDirectory { // A utility routine: write "data" to the named file. IOStatus WriteStringToFile(FileSystem* fs, const Slice& data, const std::string& fname, bool should_sync = false, - const IOOptions& io_options = IOOptions()); + const IOOptions& io_options = IOOptions(), + const FileOptions& file_options = FileOptions()); // A utility routine: read contents of named file into *data IOStatus ReadFileToString(FileSystem* fs, const std::string& fname, diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 8223c4a1338..4b25640836c 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1580,6 +1580,16 @@ struct DBOptions { // Default 100ms uint64_t follower_catchup_retry_wait_ms = 100; + // When DB files other than SST, blob and WAL files are created, use this + // filesystem temperature. (See also `wal_write_temperature` and various + // `*_temperature` CF options.) When not `kUnknown`, this overrides any + // temperature set by OptimizeForManifestWrite functions. + Temperature metadata_write_temperature = Temperature::kUnknown; + + // Use this filesystem temperature when creating WAL files. When not + // `kUnknown`, this overrides any temperature set by OptimizeForLogWrite + // functions. + Temperature wal_write_temperature = Temperature::kUnknown; // End EXPERIMENTAL }; diff --git a/options/db_options.cc b/options/db_options.cc index 8eb28c1edca..2678bb5a76d 100644 --- a/options/db_options.cc +++ b/options/db_options.cc @@ -576,6 +576,14 @@ static std::unordered_map {offsetof(struct ImmutableDBOptions, follower_catchup_retry_wait_ms), OptionType::kUInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, + {"metadata_write_temperature", + {offsetof(struct ImmutableDBOptions, metadata_write_temperature), + OptionType::kTemperature, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"wal_write_temperature", + {offsetof(struct ImmutableDBOptions, wal_write_temperature), + OptionType::kTemperature, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, }; const std::string OptionsHelper::kDBOptionsName = "DBOptions"; @@ -778,7 +786,9 @@ ImmutableDBOptions::ImmutableDBOptions(const DBOptions& options) follower_refresh_catchup_period_ms( options.follower_refresh_catchup_period_ms), follower_catchup_retry_count(options.follower_catchup_retry_count), - follower_catchup_retry_wait_ms(options.follower_catchup_retry_wait_ms) { + follower_catchup_retry_wait_ms(options.follower_catchup_retry_wait_ms), + metadata_write_temperature(options.metadata_write_temperature), + wal_write_temperature(options.wal_write_temperature) { fs = env->GetFileSystem(); clock = env->GetSystemClock().get(); logger = info_log.get(); @@ -956,6 +966,10 @@ void ImmutableDBOptions::Dump(Logger* log) const { db_host_id.c_str()); ROCKS_LOG_HEADER(log, " Options.enforce_single_del_contracts: %s", enforce_single_del_contracts ? "true" : "false"); + ROCKS_LOG_HEADER(log, " Options.metadata_write_temperature: %s", + temperature_to_string[metadata_write_temperature].c_str()); + ROCKS_LOG_HEADER(log, " Options.wal_write_temperature: %s", + temperature_to_string[wal_write_temperature].c_str()); } bool ImmutableDBOptions::IsWalDirSameAsDBPath() const { diff --git a/options/db_options.h b/options/db_options.h index 5de6ab498a6..7e075262699 100644 --- a/options/db_options.h +++ b/options/db_options.h @@ -103,6 +103,8 @@ struct ImmutableDBOptions { uint64_t follower_refresh_catchup_period_ms; uint64_t follower_catchup_retry_count; uint64_t follower_catchup_retry_wait_ms; + Temperature metadata_write_temperature; + Temperature wal_write_temperature; // Beginning convenience/helper objects that are not part of the base // DBOptions diff --git a/options/options_helper.cc b/options/options_helper.cc index ec62dd1f5c9..011f47b9843 100644 --- a/options/options_helper.cc +++ b/options/options_helper.cc @@ -180,6 +180,15 @@ DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options, options.enforce_single_del_contracts = immutable_db_options.enforce_single_del_contracts; options.daily_offpeak_time_utc = mutable_db_options.daily_offpeak_time_utc; + options.follower_refresh_catchup_period_ms = + immutable_db_options.follower_refresh_catchup_period_ms; + options.follower_catchup_retry_count = + immutable_db_options.follower_catchup_retry_count; + options.follower_catchup_retry_wait_ms = + immutable_db_options.follower_catchup_retry_wait_ms; + options.metadata_write_temperature = + immutable_db_options.metadata_write_temperature; + options.wal_write_temperature = immutable_db_options.wal_write_temperature; return options; } diff --git a/options/options_parser.cc b/options/options_parser.cc index ec32f764472..4e249908be1 100644 --- a/options/options_parser.cc +++ b/options/options_parser.cc @@ -69,8 +69,9 @@ Status PersistRocksDBOptions(const WriteOptions& write_options, } std::unique_ptr wf; - Status s = - fs->NewWritableFile(file_name, FileOptions(), &wf, nullptr); + FileOptions file_options; + file_options.temperature = db_opt.metadata_write_temperature; + Status s = fs->NewWritableFile(file_name, file_options, &wf, nullptr); if (!s.ok()) { return s; } diff --git a/options/options_settable_test.cc b/options/options_settable_test.cc index 2bf349b1cbb..67aab055e18 100644 --- a/options/options_settable_test.cc +++ b/options/options_settable_test.cc @@ -367,7 +367,12 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) { "lowest_used_cache_tier=kNonVolatileBlockTier;" "allow_data_in_errors=false;" "enforce_single_del_contracts=false;" - "daily_offpeak_time_utc=08:30-19:00;", + "daily_offpeak_time_utc=08:30-19:00;" + "follower_refresh_catchup_period_ms=123;" + "follower_catchup_retry_count=456;" + "follower_catchup_retry_wait_ms=789;" + "metadata_write_temperature=kCold;" + "wal_write_temperature=kHot;", new_options)); ASSERT_EQ(unset_bytes_base, NumUnsetBytes(new_options_ptr, sizeof(DBOptions),