From 776bd79381b592178da85db960b576bd280c4b5a Mon Sep 17 00:00:00 2001 From: Roman Gershman Date: Sun, 28 Jul 2024 22:40:51 +0300 Subject: [PATCH] fix: reenable macos builds (#3399) * fix: reenable macos builds Also, add debug function that prints local state if deadlocks occure. * fix: free cold memory for non-cache mode as well * chore: disable FreeMemWithEvictionStep again Because it heavily affects the performance when performing evictions. --------- Signed-off-by: Roman Gershman --- .github/workflows/daily-builds.yml | 6 ++---- helio | 2 +- src/server/db_slice.cc | 25 +++++++++++++++++++------ src/server/tiered_storage.h | 20 ++++++++++++++++++-- src/server/tiered_storage_test.cc | 5 ++++- src/server/transaction.cc | 7 +++++++ 6 files changed, 51 insertions(+), 14 deletions(-) diff --git a/.github/workflows/daily-builds.yml b/.github/workflows/daily-builds.yml index a2f3b2178537..9970d394b783 100644 --- a/.github/workflows/daily-builds.yml +++ b/.github/workflows/daily-builds.yml @@ -68,7 +68,6 @@ jobs: ctest -V -L DFLY build-macos: - if: false runs-on: macos-13 timeout-minutes: 45 steps: @@ -94,8 +93,7 @@ jobs: rm /usr/local/bin/idle3* || : rm /usr/local/bin/pydoc* || : rm /usr/local/bin/python3* || : - brew update && brew install ninja boost openssl automake gcc zstd bison c-ares \ - autoconf libtool automake + brew update && brew install ninja boost automake zstd bison mkdir -p $GITHUB_WORKSPACE/build @@ -110,7 +108,7 @@ jobs: echo "*************************** START BUILDING **************************************" CC=gcc-12 CXX=g++-12 cmake .. -DCMAKE_BUILD_TYPE=Debug -GNinja -DWITH_UNWIND=OFF \ - -DCMAKE_C_COMPILER_LAUNCHER=sccache + -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_FLAGS="-Wl,-ld_classic" ninja src/all diff --git a/helio b/helio index 288cb312971f..fb46b481e3eb 160000 --- a/helio +++ b/helio @@ -1 +1 @@ -Subproject commit 288cb312971f07cd35fa3a75fcfe047788d7d0fb +Subproject commit fb46b481e3eb82ecbc7bbf1d22b2fda7f5fac409 diff --git a/src/server/db_slice.cc b/src/server/db_slice.cc index a447fe880919..cb04c6e2d92c 100644 --- a/src/server/db_slice.cc +++ b/src/server/db_slice.cc @@ -8,11 +8,11 @@ #include "base/flags.h" #include "base/logging.h" -#include "generic_family.h" #include "server/channel_store.h" #include "server/cluster/cluster_defs.h" #include "server/engine_shard_set.h" #include "server/error.h" +#include "server/generic_family.h" #include "server/journal/journal.h" #include "server/server_state.h" #include "server/tiered_storage.h" @@ -40,7 +40,7 @@ namespace dfly { using namespace std; using namespace util; using absl::GetFlag; -using facade::OpStatus; +using namespace facade; using Payload = journal::Entry::Payload; namespace { @@ -574,6 +574,14 @@ OpResult DbSlice::AddOrFindInternal(const Context& cnt // It's a new entry. CallChangeCallbacks(cntx.db_index, key, {key}); + // If we are low on memory due to cold storage, free some memory. + if (memory_budget_ < ssize_t(key.size()) && owner_->tiered_storage()) { + // At least 40KB bytes to cover potential segment split. + size_t goal = std::max(key.size() * 2 - memory_budget_, 40_KB); + size_t reclaimed = owner_->tiered_storage()->ReclaimMemory(goal); + memory_budget_ += reclaimed; + } + // In case we are loading from rdb file or replicating we want to disable conservative memory // checks (inside PrimeEvictionPolicy::CanGrow) and reject insertions only after we pass max // memory limit. When loading a snapshot created by the same server configuration (memory and @@ -621,11 +629,15 @@ OpResult DbSlice::AddOrFindInternal(const Context& cnt // and we add new objects or update the existing ones and our memory usage grows. // We do not require for a single operation to unload the whole negative debt. // Instead, we create a positive, converging force that should help with freeing enough memory. - // Free at least 256 bytes or 3% of the total debt. - size_t evict_goal = std::max(256, (-evp.mem_budget()) / 32); + // Free at least K bytes or 3% of the total debt. + // TODO: to reenable and optimize this - this call significantly slows down server + // when evictions are running. +#if 0 + size_t evict_goal = std::max(512, (-evp.mem_budget()) / 32); auto [items, bytes] = FreeMemWithEvictionStep(cntx.db_index, it.segment_id(), evict_goal); evicted_obj_bytes = bytes; events_.hard_evictions += items; +#endif } table_memory_ += (db.table_memory() - table_before); @@ -1233,8 +1245,6 @@ int32_t DbSlice::GetNextSegmentForEviction(int32_t segment_id, DbIndex db_ind) c pair DbSlice::FreeMemWithEvictionStep(DbIndex db_ind, size_t starting_segment_id, size_t increase_goal_bytes) { DCHECK(!owner_->IsReplica()); - if ((!caching_mode_) || !expire_allowed_) - return {0, 0}; size_t evicted_items = 0, evicted_bytes = 0; @@ -1244,6 +1254,9 @@ pair DbSlice::FreeMemWithEvictionStep(DbIndex db_ind, size_t s return {0, evicted_bytes}; } + if ((!caching_mode_) || !expire_allowed_) + return {0, 0}; + auto max_eviction_per_hb = GetFlag(FLAGS_max_eviction_per_heartbeat); auto max_segment_to_consider = GetFlag(FLAGS_max_segment_to_consider); diff --git a/src/server/tiered_storage.h b/src/server/tiered_storage.h index 3089fdb10c69..c140ab5b8ab5 100644 --- a/src/server/tiered_storage.h +++ b/src/server/tiered_storage.h @@ -133,7 +133,7 @@ class TieredStorage { // Min sizes of values taking up full page on their own const static size_t kMinOccupancySize = tiering::kPageSize / 2; - explicit TieredStorage(DbSlice* db_slice, size_t max_size) { + explicit TieredStorage(size_t max_size, DbSlice* db_slice) { } TieredStorage(TieredStorage&& other) = delete; @@ -150,6 +150,10 @@ class TieredStorage { return {}; } + void Read(DbIndex dbid, std::string_view key, const PrimeValue& value, + std::function readf) { + } + template util::fb2::Future Modify(DbIndex dbid, std::string_view key, const PrimeValue& value, std::function modf) { @@ -159,7 +163,19 @@ class TieredStorage { void TryStash(DbIndex dbid, std::string_view key, PrimeValue* value) { } - void Delete(PrimeValue* value) { + void Delete(DbIndex dbid, PrimeValue* value) { + } + + size_t ReclaimMemory(size_t goal) { + return 0; + } + + float WriteDepthUsage() const { + return 0; + } + + size_t CoolMemoryUsage() const { + return 0; } void CancelStash(DbIndex dbid, std::string_view key, PrimeValue* value) { diff --git a/src/server/tiered_storage_test.cc b/src/server/tiered_storage_test.cc index 10c377da4263..cf71d4044c50 100644 --- a/src/server/tiered_storage_test.cc +++ b/src/server/tiered_storage_test.cc @@ -310,7 +310,10 @@ TEST_F(TieredStorageTest, MemoryPressure) { constexpr size_t kNum = 10000; for (size_t i = 0; i < kNum; i++) { auto resp = Run({"SET", absl::StrCat("k", i), BuildString(10000)}); - ASSERT_EQ(resp, "OK") << i; + if (resp != "OK"sv) { + resp = Run({"INFO", "ALL"}); + ASSERT_FALSE(true) << i << "\nInfo ALL:\n" << resp.GetString(); + } // TODO: to remove it once used_mem_current is updated frequently. ThisFiber::SleepFor(10us); } diff --git a/src/server/transaction.cc b/src/server/transaction.cc index 2793dd1621b9..b4531755a29c 100644 --- a/src/server/transaction.cc +++ b/src/server/transaction.cc @@ -718,6 +718,13 @@ void Transaction::ScheduleInternal() { run_barrier_.Dec(); } else { IterateActiveShards([cb](const auto& sd, ShardId i) { shard_set->Add(i, cb); }); + + // Add this debugging function to print more information when we experience deadlock + // during tests. + ThisFiber::PrintLocalsCallback locals([&] { + return absl::StrCat("unique_shard_cnt_: ", unique_shard_cnt_, + " run_barrier_cnt: ", run_barrier_.DEBUG_Count(), "\n"); + }); run_barrier_.Wait(); }