From 63ed8688408b59dab86876928efa97f36dc0c3d1 Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Fri, 29 Sep 2023 13:03:39 -0700 Subject: [PATCH] Offpeak in db option (#11893) Summary: RocksDB's primary function is to facilitate read and write operations. Compactions, while essential for minimizing read amplifications and optimizing storage, can sometimes compete with these primary tasks. Especially during periods of high read/write traffic, it's vital to ensure that primary operations receive priority, avoiding any potential disruptions or slowdowns. Conversely, during off-peak times when traffic is minimal, it's an opportune moment to tackle low-priority tasks like TTL based compactions, optimizing resource usage. In this PR, we are incorporating the concept of off-peak time into RocksDB by introducing `daily_offpeak_time_utc` within the DBOptions. This setting is formatted as "HH:mm-HH:mm" where the first one before "-" is the start time and the second one is the end time, inclusive. It will be later used for resource optimization in subsequent PRs. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11893 Test Plan: - New Unit Test Added - `DBOptionsTest::OffPeakTimes` - Existing Unit Test Updated - `OptionsTest`, `OptionsSettableTest` Reviewed By: pdillinger Differential Revision: D49714553 Pulled By: jaykorean fbshipit-source-id: fef51ea7c0fede6431c715bff116ddbb567c8752 --- db/db_impl/db_impl.h | 4 + db/db_impl/db_impl_open.cc | 10 ++ db/db_options_test.cc | 154 ++++++++++++++++++ include/rocksdb/options.h | 19 +++ options/db_options.cc | 36 +++- options/db_options.h | 3 + options/options_helper.cc | 1 + options/options_settable_test.cc | 4 +- options/options_test.cc | 7 +- .../new_features/offpeak_db_option.md | 1 + util/string_util.cc | 39 +++++ util/string_util.h | 10 ++ 12 files changed, 284 insertions(+), 4 deletions(-) create mode 100644 unreleased_history/new_features/offpeak_db_option.md diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 3f114fa3691..8ae0b9ef060 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -1197,6 +1197,10 @@ class DBImpl : public DB { const PeriodicTaskScheduler& TEST_GetPeriodicTaskScheduler() const; + static Status TEST_ValidateOptions(const DBOptions& db_options) { + return ValidateOptions(db_options); + } + #endif // NDEBUG // persist stats to column family "_persistent_stats" diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index 172a55879e0..b89b997e783 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -25,6 +25,7 @@ #include "rocksdb/wal_filter.h" #include "test_util/sync_point.h" #include "util/rate_limiter_impl.h" +#include "util/string_util.h" #include "util/udt_util.h" namespace ROCKSDB_NAMESPACE { @@ -291,6 +292,15 @@ Status DBImpl::ValidateOptions(const DBOptions& db_options) { "writes in direct IO require writable_file_max_buffer_size > 0"); } + if (db_options.daily_offpeak_time_utc != "") { + int start_time, end_time; + if (!TryParseTimeRangeString(db_options.daily_offpeak_time_utc, start_time, + end_time)) { + return Status::InvalidArgument( + "daily_offpeak_time_utc should be set in the format HH:mm-HH:mm " + "(e.g. 04:30-07:30)"); + } + } return Status::OK(); } diff --git a/db/db_options_test.cc b/db/db_options_test.cc index c3910a9787b..19d6b7e2f2b 100644 --- a/db/db_options_test.cc +++ b/db/db_options_test.cc @@ -19,6 +19,7 @@ #include "rocksdb/convenience.h" #include "rocksdb/rate_limiter.h" #include "rocksdb/stats_history.h" +#include "test_util/mock_time_env.h" #include "test_util/sync_point.h" #include "test_util/testutil.h" #include "util/random.h" @@ -1033,6 +1034,159 @@ TEST_F(DBOptionsTest, SetFIFOCompactionOptions) { ASSERT_EQ(fifo_temp_opt[1].age, 30000); } +TEST_F(DBOptionsTest, OffPeakTimes) { + Options options; + options.create_if_missing = true; + + auto verify_invalid = [&]() { + Status s = DBImpl::TEST_ValidateOptions(options); + ASSERT_NOK(s); + ASSERT_TRUE(s.IsInvalidArgument()); + }; + + auto verify_valid = [&]() { + Status s = DBImpl::TEST_ValidateOptions(options); + ASSERT_OK(s); + ASSERT_FALSE(s.IsInvalidArgument()); + }; + std::vector invalid_cases = { + "06:30-", + "-23:30", // Both need to be set + "12:30 PM-23:30", + "12:01AM-11:00PM", // Invalid format + "01:99-22:00", // Invalid value for minutes + "00:00-24:00", // 24:00 is an invalid value + "6-7", + "6:-7", + "06:31.42-7:00", + "6.31:42-7:00", + "6:0-7:", + "15:0.2-3:.7", + ":00-00:02", + "02:00-:00", + "random-value", + "No:No-Hi:Hi", + }; + + std::vector valid_cases = { + "", // Not enabled. Valid case + "00:00-00:00", // Valid. Entire 24 hours are offpeak. + "06:30-11:30", "06:30-23:30", "13:30-14:30", + "00:00-23:59", // This doesn't cover entire 24 hours. There's 1 minute + // gap from 11:59:00PM to midnight + "23:30-01:15", // From 11:30PM to 1:15AM next day. Valid case. + "1:0000000000000-2:000000000042", // Weird, but we can parse the int. + }; + + for (std::string invalid_case : invalid_cases) { + options.daily_offpeak_time_utc = invalid_case; + verify_invalid(); + } + for (std::string valid_case : valid_cases) { + options.daily_offpeak_time_utc = valid_case; + verify_valid(); + } + + auto verify_is_now_offpeak = [&](bool expected, int now_utc_hour, + int now_utc_minute, int now_utc_second = 0) { + auto mock_clock = std::make_shared(env_->GetSystemClock()); + // Add some extra random days to current time + Random rnd(301); + int days = rnd.Uniform(100); + mock_clock->SetCurrentTime(days * 86400 + now_utc_hour * 3600 + + now_utc_minute * 60 + now_utc_second); + Status s = DBImpl::TEST_ValidateOptions(options); + ASSERT_OK(s); + auto db_options = MutableDBOptions(options); + ASSERT_EQ(expected, db_options.IsNowOffPeak(mock_clock.get())); + }; + + options.daily_offpeak_time_utc = ""; + verify_is_now_offpeak(false, 12, 30); + + options.daily_offpeak_time_utc = "06:30-11:30"; + verify_is_now_offpeak(false, 5, 30); + verify_is_now_offpeak(true, 6, 30); + verify_is_now_offpeak(true, 10, 30); + verify_is_now_offpeak(true, 11, 30); + verify_is_now_offpeak(false, 13, 30); + + options.daily_offpeak_time_utc = "23:30-04:30"; + verify_is_now_offpeak(false, 6, 30); + verify_is_now_offpeak(true, 23, 30); + verify_is_now_offpeak(true, 0, 0); + verify_is_now_offpeak(true, 1, 0); + verify_is_now_offpeak(true, 4, 30); + verify_is_now_offpeak(false, 4, 31); + + // There's one minute gap from 11:59PM to midnight + options.daily_offpeak_time_utc = "00:00-23:59"; + verify_is_now_offpeak(true, 0, 0); + verify_is_now_offpeak(true, 12, 00); + verify_is_now_offpeak(true, 23, 59); + verify_is_now_offpeak(false, 23, 59, 1); + + // Open the db and test by Get/SetDBOptions + options.daily_offpeak_time_utc = ""; + DestroyAndReopen(options); + ASSERT_EQ("", dbfull()->GetDBOptions().daily_offpeak_time_utc); + for (std::string invalid_case : invalid_cases) { + ASSERT_NOK( + dbfull()->SetDBOptions({{"daily_offpeak_time_utc", invalid_case}})); + } + for (std::string valid_case : valid_cases) { + ASSERT_OK(dbfull()->SetDBOptions({{"daily_offpeak_time_utc", valid_case}})); + ASSERT_EQ(valid_case, dbfull()->GetDBOptions().daily_offpeak_time_utc); + } + Close(); + + // Sets off-peak time from 11:30PM to 4:30AM next day. + // Starting at 1:30PM, use mock sleep to make time pass + // and see if IsNowOffPeak() returns correctly per time changes + int now_hour = 13; + int now_minute = 30; + options.daily_offpeak_time_utc = "23:30-04:30"; + auto mock_clock = std::make_shared(env_->GetSystemClock()); + auto mock_env = std::make_unique(env_, mock_clock); + // Add some extra random days to current time + Random rnd(301); + int days = rnd.Uniform(100); + mock_clock->SetCurrentTime(days * 86400 + now_hour * 3600 + now_minute * 60); + options.env = mock_env.get(); + + // Starting at 1:30PM. It's not off-peak + DestroyAndReopen(options); + ASSERT_FALSE(MutableDBOptions(dbfull()->GetDBOptions()) + .IsNowOffPeak(mock_clock.get())); + + // Now it's at 4:30PM. Still not off-peak + mock_clock->MockSleepForSeconds(3 * 3600); + ASSERT_FALSE(MutableDBOptions(dbfull()->GetDBOptions()) + .IsNowOffPeak(mock_clock.get())); + + // Now it's at 11:30PM. It's off-peak + mock_clock->MockSleepForSeconds(7 * 3600); + ASSERT_TRUE(MutableDBOptions(dbfull()->GetDBOptions()) + .IsNowOffPeak(mock_clock.get())); + + // Now it's at 2:30AM next day. It's still off-peak + mock_clock->MockSleepForSeconds(3 * 3600); + ASSERT_TRUE(MutableDBOptions(dbfull()->GetDBOptions()) + .IsNowOffPeak(mock_clock.get())); + + // Now it's at 4:30AM. It's still off-peak + mock_clock->MockSleepForSeconds(2 * 3600); + ASSERT_TRUE(MutableDBOptions(dbfull()->GetDBOptions()) + .IsNowOffPeak(mock_clock.get())); + + // Sleep for one more second. It's no longer off-peak + mock_clock->MockSleepForSeconds(1); + ASSERT_FALSE(MutableDBOptions(dbfull()->GetDBOptions()) + .IsNowOffPeak(mock_clock.get())); + + Close(); +} + TEST_F(DBOptionsTest, CompactionReadaheadSizeChange) { for (bool use_direct_reads : {true, false}) { SpecialEnv env(env_); diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index ac70def2421..b14ecfb77b5 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1427,6 +1427,25 @@ struct DBOptions { // of the contract leads to undefined behaviors with high possibility of data // inconsistency, e.g. deleted old data become visible again, etc. bool enforce_single_del_contracts = true; + + // EXPERIMENTAL + // Implementing offpeak duration awareness in RocksDB. In this context, "peak + // time" signifies periods characterized by significantly elevated read and + // write activity compared to other times. By leveraging this knowledge, we + // can prevent low-priority tasks, such as TTL-based compactions, from + // competing with read and write operations during peak hours. Essentially, we + // preprocess these tasks during the preceding off-peak period, just before + // the next peak cycle begins. For example, if the TTL is configured for 25 + // days, we may compact the files during the off-peak hours of the 24th day. + // + // Time of the day in UTC. Format - HH:mm-HH:mm (00:00-23:59) + // If the start time > end time, it will be considered that the time period + // spans to the next day (e.g., 23:30-04:00) + // If the start time == end time, entire 24 hours will be considered offpeak + // (e.g. 00:00-00:00). Note that 00:00-23:59 will have one minute gap from + // 11:59:00PM to midnight. + // Default: Empty String (No notion of peak/offpeak) + std::string daily_offpeak_time_utc = ""; }; // Options to control the behavior of a database (passed to DB::Open) diff --git a/options/db_options.cc b/options/db_options.cc index b93e35f43d9..af30718f311 100644 --- a/options/db_options.cc +++ b/options/db_options.cc @@ -129,6 +129,10 @@ static std::unordered_map {offsetof(struct MutableDBOptions, max_background_flushes), OptionType::kInt, OptionVerificationType::kNormal, OptionTypeFlags::kMutable}}, + {"daily_offpeak_time_utc", + {offsetof(struct MutableDBOptions, daily_offpeak_time_utc), + OptionType::kString, OptionVerificationType::kNormal, + OptionTypeFlags::kMutable}}, }; static std::unordered_map @@ -991,7 +995,8 @@ MutableDBOptions::MutableDBOptions() wal_bytes_per_sync(0), strict_bytes_per_sync(false), compaction_readahead_size(0), - max_background_flushes(-1) {} + max_background_flushes(-1), + daily_offpeak_time_utc("") {} MutableDBOptions::MutableDBOptions(const DBOptions& options) : max_background_jobs(options.max_background_jobs), @@ -1011,7 +1016,8 @@ MutableDBOptions::MutableDBOptions(const DBOptions& options) wal_bytes_per_sync(options.wal_bytes_per_sync), strict_bytes_per_sync(options.strict_bytes_per_sync), compaction_readahead_size(options.compaction_readahead_size), - max_background_flushes(options.max_background_flushes) {} + max_background_flushes(options.max_background_flushes), + daily_offpeak_time_utc(options.daily_offpeak_time_utc) {} void MutableDBOptions::Dump(Logger* log) const { ROCKS_LOG_HEADER(log, " Options.max_background_jobs: %d", @@ -1056,6 +1062,32 @@ void MutableDBOptions::Dump(Logger* log) const { compaction_readahead_size); ROCKS_LOG_HEADER(log, " Options.max_background_flushes: %d", max_background_flushes); + ROCKS_LOG_HEADER(log, "Options.daily_offpeak_time_utc: %s", + daily_offpeak_time_utc.c_str()); +} + +bool MutableDBOptions::IsNowOffPeak(SystemClock* clock) const { + if (daily_offpeak_time_utc.empty()) { + return false; + } + int64_t now; + if (clock->GetCurrentTime(&now).ok()) { + constexpr int kSecondsPerDay = 86400; + int since_midnight_seconds = static_cast(now % kSecondsPerDay); + int start_time = 0, end_time = 0; + assert( + TryParseTimeRangeString(daily_offpeak_time_utc, start_time, end_time)); + + // if the offpeak duration spans overnight (i.e. 23:30 - 4:30 next day) + if (start_time > end_time) { + return start_time <= since_midnight_seconds || + since_midnight_seconds <= end_time; + } else { + return start_time <= since_midnight_seconds && + since_midnight_seconds <= end_time; + } + } + return false; } Status GetMutableDBOptionsFromStrings( diff --git a/options/db_options.h b/options/db_options.h index d00a0671845..85a4d949b97 100644 --- a/options/db_options.h +++ b/options/db_options.h @@ -136,6 +136,9 @@ struct MutableDBOptions { bool strict_bytes_per_sync; size_t compaction_readahead_size; int max_background_flushes; + + std::string daily_offpeak_time_utc; + bool IsNowOffPeak(SystemClock* clock) const; }; Status GetStringFromMutableDBOptions(const ConfigOptions& config_options, diff --git a/options/options_helper.cc b/options/options_helper.cc index 8d32640c924..0c76d031581 100644 --- a/options/options_helper.cc +++ b/options/options_helper.cc @@ -179,6 +179,7 @@ DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options, options.lowest_used_cache_tier = immutable_db_options.lowest_used_cache_tier; options.enforce_single_del_contracts = immutable_db_options.enforce_single_del_contracts; + options.daily_offpeak_time_utc = mutable_db_options.daily_offpeak_time_utc; return options; } diff --git a/options/options_settable_test.cc b/options/options_settable_test.cc index 39d7d6b203d..2f7493f32eb 100644 --- a/options/options_settable_test.cc +++ b/options/options_settable_test.cc @@ -252,6 +252,7 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) { sizeof(FileTypeSet)}, {offsetof(struct DBOptions, compaction_service), sizeof(std::shared_ptr)}, + {offsetof(struct DBOptions, daily_offpeak_time_utc), sizeof(std::string)}, }; char* options_ptr = new char[sizeof(DBOptions)]; @@ -365,7 +366,8 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) { "db_host_id=hostname;" "lowest_used_cache_tier=kNonVolatileBlockTier;" "allow_data_in_errors=false;" - "enforce_single_del_contracts=false;", + "enforce_single_del_contracts=false;" + "daily_offpeak_time_utc=08:30-19:00;", new_options)); ASSERT_EQ(unset_bytes_base, NumUnsetBytes(new_options_ptr, sizeof(DBOptions), diff --git a/options/options_test.cc b/options/options_test.cc index 855243c955f..a05ed0c8c80 100644 --- a/options/options_test.cc +++ b/options/options_test.cc @@ -178,6 +178,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) { {"wal_bytes_per_sync", "48"}, {"strict_bytes_per_sync", "true"}, {"preserve_deletes", "false"}, + {"daily_offpeak_time_utc", ""}, }; ColumnFamilyOptions base_cf_opt; @@ -358,6 +359,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) { ASSERT_EQ(new_db_opt.bytes_per_sync, static_cast(47)); ASSERT_EQ(new_db_opt.wal_bytes_per_sync, static_cast(48)); ASSERT_EQ(new_db_opt.strict_bytes_per_sync, true); + ASSERT_EQ(new_db_opt.daily_offpeak_time_utc, ""); db_options_map["max_open_files"] = "hello"; Status s = @@ -879,6 +881,7 @@ TEST_F(OptionsTest, OldInterfaceTest) { {"track_and_verify_wals_in_manifest", "true"}, {"verify_sst_unique_id_in_manifest", "true"}, {"max_open_files", "32"}, + {"daily_offpeak_time_utc", "06:30-23:30"}, }; ConfigOptions db_config_options(base_db_opt); @@ -909,11 +912,13 @@ TEST_F(OptionsTest, OldInterfaceTest) { db_config_options.ignore_unknown_options = false; ASSERT_OK(GetDBOptionsFromString( db_config_options, base_db_opt, - "create_if_missing=false;error_if_exists=false;max_open_files=42;", + "create_if_missing=false;error_if_exists=false;max_open_files=42;" + "daily_offpeak_time_utc=08:30-19:00;", &new_db_opt)); ASSERT_EQ(new_db_opt.create_if_missing, false); ASSERT_EQ(new_db_opt.error_if_exists, false); ASSERT_EQ(new_db_opt.max_open_files, 42); + ASSERT_EQ(new_db_opt.daily_offpeak_time_utc, "08:30-19:00"); s = GetDBOptionsFromString( db_config_options, base_db_opt, "create_if_missing=false;error_if_exists=false;max_open_files=42;" diff --git a/unreleased_history/new_features/offpeak_db_option.md b/unreleased_history/new_features/offpeak_db_option.md new file mode 100644 index 00000000000..bdc9b2a2939 --- /dev/null +++ b/unreleased_history/new_features/offpeak_db_option.md @@ -0,0 +1 @@ +Add an experimental offpeak duration awareness by setting `DBOptions::daily_offpeak_time_utc` in "HH:mm-HH:mm" format. This information will be used for resource optimization in the future diff --git a/util/string_util.cc b/util/string_util.cc index 821ccba07f4..57207889f1a 100644 --- a/util/string_util.cc +++ b/util/string_util.cc @@ -437,6 +437,45 @@ bool SerializeIntVector(const std::vector& vec, std::string* value) { return true; } +int ParseTimeStringToSeconds(const std::string& value) { + int hours, minutes; + char colon; + + std::istringstream stream(value); + stream >> hours >> colon >> minutes; + + if (stream.fail() || !stream.eof() || colon != ':') { + return -1; + } + + if (hours < 0 || hours > 23 || minutes < 0 || minutes > 59) { + return -1; + } + return hours * 3600 + minutes * 60; +} + +bool TryParseTimeRangeString(const std::string& value, int& start_time, + int& end_time) { + if (value.empty()) { + start_time = 0; + end_time = 0; + return true; + } + auto split = StringSplit(value, '-'); + if (split.size() != 2) { + return false; + } + start_time = ParseTimeStringToSeconds(split[0]); + if (start_time < 0) { + return false; + } + end_time = ParseTimeStringToSeconds(split[1]); + if (end_time < 0) { + return false; + } + return true; +} + // Copied from folly/string.cpp: // https://github.com/facebook/folly/blob/0deef031cb8aab76dc7e736f8b7c22d701d5f36b/folly/String.cpp#L457 // There are two variants of `strerror_r` function, one returns diff --git a/util/string_util.h b/util/string_util.h index 0b15181f5df..999081ebba9 100644 --- a/util/string_util.h +++ b/util/string_util.h @@ -166,6 +166,16 @@ std::vector ParseVectorInt(const std::string& value); bool SerializeIntVector(const std::vector& vec, std::string* value); +// Expects HH:mm format for the input value +// Returns -1 if invalid input. Otherwise returns seconds since midnight +int ParseTimeStringToSeconds(const std::string& value); + +// Expects HH:mm-HH:mm format for the input value +// Returns false, if invalid format. +// Otherwise, returns true and start_time and end_time are set +bool TryParseTimeRangeString(const std::string& value, int& start_time, + int& end_time); + extern const std::string kNullptrString; // errnoStr() function returns a string that describes the error code passed in