From 15f9317ab76587edd41ece59f31ca992a2ba72bd Mon Sep 17 00:00:00 2001 From: heyuchen Date: Thu, 21 Nov 2019 10:06:27 +0800 Subject: [PATCH 01/10] tcmalloc: add metric for test --- src/dist/replication/lib/replica_stub.cpp | 63 +++++++++++++++++++---- 1 file changed, 52 insertions(+), 11 deletions(-) diff --git a/src/dist/replication/lib/replica_stub.cpp b/src/dist/replication/lib/replica_stub.cpp index c6cc7bf3a1..5a2ff8abef 100644 --- a/src/dist/replication/lib/replica_stub.cpp +++ b/src/dist/replication/lib/replica_stub.cpp @@ -620,6 +620,17 @@ void replica_stub::initialize(const replication_options &opts, bool clear /* = f } } +#ifdef DSN_ENABLE_GPERF +static int64_t get_tcmalloc_property(const char *prop) +{ + size_t value = 0; + if (!::MallocExtension::instance()->GetNumericProperty(prop, &value)) { + dfatal_f("Failed to get tcmalloc property {}", prop); + } + return value; +} +#endif + void replica_stub::initialize_start() { // start timer for configuration sync @@ -638,17 +649,47 @@ void replica_stub::initialize_start() #ifdef DSN_ENABLE_GPERF if (_options.mem_release_enabled) { - _mem_release_timer_task = - tasking::enqueue_timer(LPC_MEM_RELEASE, - &_tracker, - []() { - ddebug("Memory release has started..."); - ::MallocExtension::instance()->ReleaseFreeMemory(); - ddebug("Memory release has ended..."); - }, - std::chrono::milliseconds(_options.mem_release_interval_ms), - 0, - std::chrono::milliseconds(_options.mem_release_interval_ms)); + _mem_release_timer_task = tasking::enqueue_timer( + LPC_MEM_RELEASE, + &_tracker, + []() { + ddebug("Memory release has started..."); + int64_t current_allocated_bytes = + get_tcmalloc_property("generic.current_allocated_bytes"); + int64_t pageheap_free_bytes = get_tcmalloc_property("tcmalloc.pageheap_free_bytes"); + int64_t current_total_thread_cache_bytes = + get_tcmalloc_property("tcmalloc.current_total_thread_cache_bytes"); + int64_t thread_cache_free_bytes = + get_tcmalloc_property("tcmalloc.thread_cache_free_bytes"); + int64_t central_cache_free_bytes = + get_tcmalloc_property("tcmalloc.central_cache_free_bytes"); + int64_t transfer_cache_free_bytes = + get_tcmalloc_property("tcmalloc.transfer_cache_free_bytes"); + int64_t max_overhead = current_allocated_bytes * 10 / 100; + bool need_release = pageheap_free_bytes > max_overhead; + + ddebug_f("hyc tcmalloc statistic:"); + ddebug_f("hyc total_allocated={}M, pageheap_free={}M, need_release={}", + current_allocated_bytes / 1000000, + pageheap_free_bytes / 1000000, + need_release); + ddebug_f( + "hyc total_thread_cache={}M, thread_cache_free={}M, central_cache_free={}M, " + "transfer_free={}M", + current_total_thread_cache_bytes / 1000000, + thread_cache_free_bytes / 1000000, + central_cache_free_bytes / 1000000, + transfer_cache_free_bytes / 1000000); + ::MallocExtension::instance()->ReleaseFreeMemory(); + ddebug("Memory release has ended..."); + int64_t after_release_total_allocated_bytes = + get_tcmalloc_property("generic.current_allocated_bytes"); + ddebug_f("hyc release memory {}M", + (current_allocated_bytes - after_release_total_allocated_bytes) / 1000000); + }, + std::chrono::milliseconds(_options.mem_release_interval_ms), + 0, + std::chrono::milliseconds(_options.mem_release_interval_ms)); } #endif From 8ee23ddb9b930e6297e83c905929d057ee9ed126 Mon Sep 17 00:00:00 2001 From: heyuchen Date: Thu, 21 Nov 2019 15:24:16 +0800 Subject: [PATCH 02/10] tcmalloc: fix log --- src/dist/replication/lib/replica_stub.cpp | 32 ++++++++++++++--------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/src/dist/replication/lib/replica_stub.cpp b/src/dist/replication/lib/replica_stub.cpp index 5a2ff8abef..3d0970b808 100644 --- a/src/dist/replication/lib/replica_stub.cpp +++ b/src/dist/replication/lib/replica_stub.cpp @@ -626,6 +626,7 @@ static int64_t get_tcmalloc_property(const char *prop) size_t value = 0; if (!::MallocExtension::instance()->GetNumericProperty(prop, &value)) { dfatal_f("Failed to get tcmalloc property {}", prop); + assert(false); } return value; } @@ -668,24 +669,31 @@ void replica_stub::initialize_start() int64_t max_overhead = current_allocated_bytes * 10 / 100; bool need_release = pageheap_free_bytes > max_overhead; - ddebug_f("hyc tcmalloc statistic:"); - ddebug_f("hyc total_allocated={}M, pageheap_free={}M, need_release={}", - current_allocated_bytes / 1000000, - pageheap_free_bytes / 1000000, + ddebug_f("Memory total_allocated={}, pageheap_free={}, need_release={}", + current_allocated_bytes, + pageheap_free_bytes, need_release); ddebug_f( - "hyc total_thread_cache={}M, thread_cache_free={}M, central_cache_free={}M, " - "transfer_free={}M", - current_total_thread_cache_bytes / 1000000, - thread_cache_free_bytes / 1000000, - central_cache_free_bytes / 1000000, - transfer_cache_free_bytes / 1000000); + "Memory total_thread_cache={}, thread_cache_free={}, central_cache_free={}, " + "transfer_free={}", + current_total_thread_cache_bytes, + thread_cache_free_bytes, + central_cache_free_bytes, + transfer_cache_free_bytes); ::MallocExtension::instance()->ReleaseFreeMemory(); ddebug("Memory release has ended..."); int64_t after_release_total_allocated_bytes = get_tcmalloc_property("generic.current_allocated_bytes"); - ddebug_f("hyc release memory {}M", - (current_allocated_bytes - after_release_total_allocated_bytes) / 1000000); + int64_t after_release_pageheap_free_bytes = + get_tcmalloc_property("tcmalloc.pageheap_free_bytes"); + ddebug_f("Memory before memory={}, after memory={}, total_release={}, before " + "heap_page_free={}, after heap_page_free={}, heap_page_free_gap={}", + current_allocated_bytes, + after_release_total_allocated_bytes, + (after_release_total_allocated_bytes - current_allocated_bytes), + pageheap_free_bytes, + after_release_pageheap_free_bytes, + (pageheap_free_bytes - after_release_pageheap_free_bytes)); }, std::chrono::milliseconds(_options.mem_release_interval_ms), 0, From 60f43ae6d50c573051414d89765c2f5a48b95ea1 Mon Sep 17 00:00:00 2001 From: heyuchen Date: Thu, 21 Nov 2019 17:48:11 +0800 Subject: [PATCH 03/10] tcmalloc test --- src/dist/replication/lib/replica_stub.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dist/replication/lib/replica_stub.cpp b/src/dist/replication/lib/replica_stub.cpp index 3d0970b808..e7f4ebe3da 100644 --- a/src/dist/replication/lib/replica_stub.cpp +++ b/src/dist/replication/lib/replica_stub.cpp @@ -666,13 +666,12 @@ void replica_stub::initialize_start() get_tcmalloc_property("tcmalloc.central_cache_free_bytes"); int64_t transfer_cache_free_bytes = get_tcmalloc_property("tcmalloc.transfer_cache_free_bytes"); - int64_t max_overhead = current_allocated_bytes * 10 / 100; + int64_t max_overhead = current_allocated_bytes * 5 / 100; bool need_release = pageheap_free_bytes > max_overhead; - ddebug_f("Memory total_allocated={}, pageheap_free={}, need_release={}", + ddebug_f("Memory total_allocated={}, pageheap_free={}", current_allocated_bytes, - pageheap_free_bytes, - need_release); + pageheap_free_bytes); ddebug_f( "Memory total_thread_cache={}, thread_cache_free={}, central_cache_free={}, " "transfer_free={}", @@ -680,6 +679,7 @@ void replica_stub::initialize_start() thread_cache_free_bytes, central_cache_free_bytes, transfer_cache_free_bytes); + ddebug_f("Memory need_release={}", need_release); ::MallocExtension::instance()->ReleaseFreeMemory(); ddebug("Memory release has ended..."); int64_t after_release_total_allocated_bytes = From ca2d363a8ff0c6aba6e8a5d8bd6eb4c1104eb500 Mon Sep 17 00:00:00 2001 From: heyuchen Date: Fri, 22 Nov 2019 15:00:22 +0800 Subject: [PATCH 04/10] tcmalloc: update release method --- src/dist/replication/lib/replica_stub.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/dist/replication/lib/replica_stub.cpp b/src/dist/replication/lib/replica_stub.cpp index e7f4ebe3da..50be9bfaeb 100644 --- a/src/dist/replication/lib/replica_stub.cpp +++ b/src/dist/replication/lib/replica_stub.cpp @@ -680,7 +680,18 @@ void replica_stub::initialize_start() central_cache_free_bytes, transfer_cache_free_bytes); ddebug_f("Memory need_release={}", need_release); - ::MallocExtension::instance()->ReleaseFreeMemory(); + + // ::MallocExtension::instance()->ReleaseFreeMemory(); + + int64_t bytes_overhead = pageheap_free_bytes; + if (bytes_overhead > max_overhead) { + int64_t extra = bytes_overhead - max_overhead; + while (extra > 0) { + ::MallocExtension::instance()->ReleaseToSystem(1024 * 1024); + extra -= 1024 * 1024; + } + } + ddebug("Memory release has ended..."); int64_t after_release_total_allocated_bytes = get_tcmalloc_property("generic.current_allocated_bytes"); From ecfb83ef4139933d660ebd87986fea478fbc6169 Mon Sep 17 00:00:00 2001 From: heyuchen Date: Mon, 25 Nov 2019 17:04:30 +0800 Subject: [PATCH 05/10] tcmalloc: refactor code --- .../replication/common/replication_common.cpp | 18 ++- .../replication/common/replication_common.h | 3 +- src/dist/replication/lib/replica_stub.cpp | 112 +++++++----------- src/dist/replication/lib/replica_stub.h | 10 ++ 4 files changed, 70 insertions(+), 73 deletions(-) diff --git a/src/dist/replication/common/replication_common.cpp b/src/dist/replication/common/replication_common.cpp index 6e5c46685b..d1d891ea26 100644 --- a/src/dist/replication/common/replication_common.cpp +++ b/src/dist/replication/common/replication_common.cpp @@ -100,7 +100,8 @@ replication_options::replication_options() config_sync_interval_ms = 30000; mem_release_enabled = true; - mem_release_interval_ms = 86400000; + mem_release_check_interval_ms = 86400000; + mem_release_tcmalloc_max_reserved_memory_percentage = 10; lb_interval_ms = 10000; @@ -479,11 +480,18 @@ void replication_options::initialize() mem_release_enabled, "whether to enable periodic memory release"); - mem_release_interval_ms = (int)dsn_config_get_value_uint64( + mem_release_check_interval_ms = (int)dsn_config_get_value_uint64( "replication", - "mem_release_interval_ms", - mem_release_interval_ms, - "the replica releases its idle memory to the system every this period of time(ms)"); + "mem_release_check_interval_ms", + mem_release_check_interval_ms, + "the replica check if should release memory to the system every this period of time(ms)"); + + mem_release_tcmalloc_max_reserved_memory_percentage = (int)dsn_config_get_value_uint64( + "replication", + "mem_release_tcmalloc_max_reserved_memory_percentage", + mem_release_tcmalloc_max_reserved_memory_percentage, + "if tcmalloc reserved but not-used memory exceed this percentage of application allocated " + "memory, replica server will release the exceeding memory back to operating system"); lb_interval_ms = (int)dsn_config_get_value_uint64( "replication", diff --git a/src/dist/replication/common/replication_common.h b/src/dist/replication/common/replication_common.h index 1630dcdad2..bd81f4c2ed 100644 --- a/src/dist/replication/common/replication_common.h +++ b/src/dist/replication/common/replication_common.h @@ -104,7 +104,8 @@ class replication_options int32_t config_sync_interval_ms; bool mem_release_enabled; - int32_t mem_release_interval_ms; + int32_t mem_release_check_interval_ms; + int32_t mem_release_tcmalloc_max_reserved_memory_percentage; int32_t lb_interval_ms; diff --git a/src/dist/replication/lib/replica_stub.cpp b/src/dist/replication/lib/replica_stub.cpp index 50be9bfaeb..2079abcc6b 100644 --- a/src/dist/replication/lib/replica_stub.cpp +++ b/src/dist/replication/lib/replica_stub.cpp @@ -620,18 +620,6 @@ void replica_stub::initialize(const replication_options &opts, bool clear /* = f } } -#ifdef DSN_ENABLE_GPERF -static int64_t get_tcmalloc_property(const char *prop) -{ - size_t value = 0; - if (!::MallocExtension::instance()->GetNumericProperty(prop, &value)) { - dfatal_f("Failed to get tcmalloc property {}", prop); - assert(false); - } - return value; -} -#endif - void replica_stub::initialize_start() { // start timer for configuration sync @@ -653,62 +641,10 @@ void replica_stub::initialize_start() _mem_release_timer_task = tasking::enqueue_timer( LPC_MEM_RELEASE, &_tracker, - []() { - ddebug("Memory release has started..."); - int64_t current_allocated_bytes = - get_tcmalloc_property("generic.current_allocated_bytes"); - int64_t pageheap_free_bytes = get_tcmalloc_property("tcmalloc.pageheap_free_bytes"); - int64_t current_total_thread_cache_bytes = - get_tcmalloc_property("tcmalloc.current_total_thread_cache_bytes"); - int64_t thread_cache_free_bytes = - get_tcmalloc_property("tcmalloc.thread_cache_free_bytes"); - int64_t central_cache_free_bytes = - get_tcmalloc_property("tcmalloc.central_cache_free_bytes"); - int64_t transfer_cache_free_bytes = - get_tcmalloc_property("tcmalloc.transfer_cache_free_bytes"); - int64_t max_overhead = current_allocated_bytes * 5 / 100; - bool need_release = pageheap_free_bytes > max_overhead; - - ddebug_f("Memory total_allocated={}, pageheap_free={}", - current_allocated_bytes, - pageheap_free_bytes); - ddebug_f( - "Memory total_thread_cache={}, thread_cache_free={}, central_cache_free={}, " - "transfer_free={}", - current_total_thread_cache_bytes, - thread_cache_free_bytes, - central_cache_free_bytes, - transfer_cache_free_bytes); - ddebug_f("Memory need_release={}", need_release); - - // ::MallocExtension::instance()->ReleaseFreeMemory(); - - int64_t bytes_overhead = pageheap_free_bytes; - if (bytes_overhead > max_overhead) { - int64_t extra = bytes_overhead - max_overhead; - while (extra > 0) { - ::MallocExtension::instance()->ReleaseToSystem(1024 * 1024); - extra -= 1024 * 1024; - } - } - - ddebug("Memory release has ended..."); - int64_t after_release_total_allocated_bytes = - get_tcmalloc_property("generic.current_allocated_bytes"); - int64_t after_release_pageheap_free_bytes = - get_tcmalloc_property("tcmalloc.pageheap_free_bytes"); - ddebug_f("Memory before memory={}, after memory={}, total_release={}, before " - "heap_page_free={}, after heap_page_free={}, heap_page_free_gap={}", - current_allocated_bytes, - after_release_total_allocated_bytes, - (after_release_total_allocated_bytes - current_allocated_bytes), - pageheap_free_bytes, - after_release_pageheap_free_bytes, - (pageheap_free_bytes - after_release_pageheap_free_bytes)); - }, - std::chrono::milliseconds(_options.mem_release_interval_ms), + std::bind(&replica_stub::gc_tcmalloc_memory, this), + std::chrono::milliseconds(_options.mem_release_check_interval_ms), 0, - std::chrono::milliseconds(_options.mem_release_interval_ms)); + std::chrono::milliseconds(_options.mem_release_check_interval_ms)); } #endif @@ -2364,6 +2300,48 @@ replica_stub::get_child_dir(const char *app_type, gpid child_pid, const std::str return child_dir; } +#ifdef DSN_ENABLE_GPERF +int64_t replica_stub::get_tcmalloc_numeric_property(const char *prop) +{ + size_t value; + if (!::MallocExtension::instance()->GetNumericProperty(prop, &value)) { + derror_f("Failed to get tcmalloc property {}", prop); + return -1; + } + return value; +} + +void replica_stub::gc_tcmalloc_memory() +{ + int64_t total_allocated_bytes = + get_tcmalloc_numeric_property("generic.current_allocated_bytes"); + int64_t reserved_bytes = get_tcmalloc_numeric_property("tcmalloc.pageheap_free_bytes"); + // TODO(heyuchen): delete it + int64_t pageheap_free_before = reserved_bytes; + + int64_t max_reserved_bytes = total_allocated_bytes * + _options.mem_release_tcmalloc_max_reserved_memory_percentage / + 100.0; + if (reserved_bytes > max_reserved_bytes) { + int64_t release_bytes = reserved_bytes - max_reserved_bytes; + ddebug_f("Memory release started, almost {} bytes will be released", release_bytes); + while (release_bytes > 0) { + // tcmalloc release memory will lock page heap, release 1MB at a time will shorten + // locked time + ::MallocExtension::instance()->ReleaseToSystem(1024 * 1024); + release_bytes -= 1024 * 1024; + } + } + // TODO(heyuchen): delete it + int64_t pageheap_free_after = get_tcmalloc_numeric_property("tcmalloc.pageheap_free_bytes"); + ddebug_f("total={}, reserved={}, need_release={}, heappage_free={}", + total_allocated_bytes, + pageheap_free_before, + pageheap_free_before > max_reserved_bytes, + pageheap_free_before - pageheap_free_after); +} +#endif + // // partition split // diff --git a/src/dist/replication/lib/replica_stub.h b/src/dist/replication/lib/replica_stub.h index a11288a78d..658a3f755a 100644 --- a/src/dist/replication/lib/replica_stub.h +++ b/src/dist/replication/lib/replica_stub.h @@ -246,6 +246,16 @@ class replica_stub : public serverlet, public ref_counter partition_status::type status, error_code error); +#ifdef DSN_ENABLE_GPERF + // Get tcmalloc numeric property (name is "prop") value. + // Return -1 if get property failed (property we used will be greater than zero) + // Properties can be found in 'gperftools/malloc_extension.h' + int64_t get_tcmalloc_numeric_property(const char *prop); + + // Try to release tcmalloc memory back to operating system + void gc_tcmalloc_memory(); +#endif + private: friend class ::dsn::replication::replication_checker; friend class ::dsn::replication::test::test_checker; From a68b82294d63981a2ea3cebb0dea7d26ee03eae8 Mon Sep 17 00:00:00 2001 From: heyuchen Date: Mon, 25 Nov 2019 18:22:30 +0800 Subject: [PATCH 06/10] tcmalloc: make tcmalloc_max_reserved_memory_percentage dynamic config --- src/dist/replication/lib/replica_stub.cpp | 39 +++++++++++++++++++++-- src/dist/replication/lib/replica_stub.h | 2 ++ 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/src/dist/replication/lib/replica_stub.cpp b/src/dist/replication/lib/replica_stub.cpp index 2079abcc6b..f9452418c3 100644 --- a/src/dist/replication/lib/replica_stub.cpp +++ b/src/dist/replication/lib/replica_stub.cpp @@ -68,11 +68,13 @@ replica_stub::replica_stub(replica_state_subscriber subscriber /*= nullptr*/, _query_compact_command(nullptr), _query_app_envs_command(nullptr), _useless_dir_reserve_seconds_command(nullptr), + _mem_release_max_reserved_percentage_command(nullptr), _deny_client(false), _verbose_client_log(false), _verbose_commit_log(false), _gc_disk_error_replica_interval_seconds(3600), _gc_disk_garbage_replica_interval_seconds(3600), + _mem_release_tcmalloc_max_reserved_memory_percentage(10), _learn_app_concurrent_count(0), _fs_manager(false) { @@ -317,6 +319,8 @@ void replica_stub::initialize(const replication_options &opts, bool clear /* = f _verbose_commit_log = _options.verbose_commit_log_on_start; _gc_disk_error_replica_interval_seconds = _options.gc_disk_error_replica_interval_seconds; _gc_disk_garbage_replica_interval_seconds = _options.gc_disk_garbage_replica_interval_seconds; + _mem_release_tcmalloc_max_reserved_memory_percentage = + _options.mem_release_tcmalloc_max_reserved_memory_percentage; // clear dirs if need if (clear) { @@ -2049,6 +2053,33 @@ void replica_stub::open_service() } return result; }); + + _mem_release_max_reserved_percentage_command = + dsn::command_manager::instance().register_app_command( + {"mem-release-max-reserved-percentage"}, + "mem-release-max-reserved-percentage [num | DEFAULT]", + "control tcmalloc max reserved but not-used memory percentage", + [this](const std::vector &args) { + std::string result("OK"); + if (args.empty()) { + result = "mem-release-max-reserved-percentage=" + + std::to_string(_mem_release_tcmalloc_max_reserved_memory_percentage); + } else { + if (args[0] == "DEFAULT") { + _mem_release_tcmalloc_max_reserved_memory_percentage = + _options.mem_release_tcmalloc_max_reserved_memory_percentage; + } else { + int32_t percentage = 0; + if (!dsn::buf2int32(args[0], percentage) || percentage <= 0 || + percentage >= 100) { + result = std::string("ERR: invalid arguments"); + } else { + _mem_release_tcmalloc_max_reserved_memory_percentage = percentage; + } + } + } + return result; + }); } std::string @@ -2174,6 +2205,8 @@ void replica_stub::close() dsn::command_manager::instance().deregister_command(_query_compact_command); dsn::command_manager::instance().deregister_command(_query_app_envs_command); dsn::command_manager::instance().deregister_command(_useless_dir_reserve_seconds_command); + dsn::command_manager::instance().deregister_command( + _mem_release_max_reserved_percentage_command); _kill_partition_command = nullptr; _deny_client_command = nullptr; @@ -2183,6 +2216,7 @@ void replica_stub::close() _query_compact_command = nullptr; _query_app_envs_command = nullptr; _useless_dir_reserve_seconds_command = nullptr; + _mem_release_max_reserved_percentage_command = nullptr; if (_config_sync_timer_task != nullptr) { _config_sync_timer_task->cancel(true); @@ -2319,9 +2353,8 @@ void replica_stub::gc_tcmalloc_memory() // TODO(heyuchen): delete it int64_t pageheap_free_before = reserved_bytes; - int64_t max_reserved_bytes = total_allocated_bytes * - _options.mem_release_tcmalloc_max_reserved_memory_percentage / - 100.0; + int64_t max_reserved_bytes = + total_allocated_bytes * _mem_release_tcmalloc_max_reserved_memory_percentage / 100.0; if (reserved_bytes > max_reserved_bytes) { int64_t release_bytes = reserved_bytes - max_reserved_bytes; ddebug_f("Memory release started, almost {} bytes will be released", release_bytes); diff --git a/src/dist/replication/lib/replica_stub.h b/src/dist/replication/lib/replica_stub.h index 658a3f755a..a5601ac8fb 100644 --- a/src/dist/replication/lib/replica_stub.h +++ b/src/dist/replication/lib/replica_stub.h @@ -315,12 +315,14 @@ class replica_stub : public serverlet, public ref_counter dsn_handle_t _query_compact_command; dsn_handle_t _query_app_envs_command; dsn_handle_t _useless_dir_reserve_seconds_command; + dsn_handle_t _mem_release_max_reserved_percentage_command; bool _deny_client; bool _verbose_client_log; bool _verbose_commit_log; int32_t _gc_disk_error_replica_interval_seconds; int32_t _gc_disk_garbage_replica_interval_seconds; + int32_t _mem_release_tcmalloc_max_reserved_memory_percentage; // we limit LT_APP max concurrent count, because nfs service implementation is // too simple, it do not support priority. From 2f04d1307a50da23ff2ed925b31ba01e531b399d Mon Sep 17 00:00:00 2001 From: heyuchen Date: Thu, 28 Nov 2019 10:24:05 +0800 Subject: [PATCH 07/10] small fix --- .../replication/common/replication_common.cpp | 2 +- src/dist/replication/lib/replica_stub.cpp | 16 +++++----------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/src/dist/replication/common/replication_common.cpp b/src/dist/replication/common/replication_common.cpp index d1d891ea26..5a03831d81 100644 --- a/src/dist/replication/common/replication_common.cpp +++ b/src/dist/replication/common/replication_common.cpp @@ -100,7 +100,7 @@ replication_options::replication_options() config_sync_interval_ms = 30000; mem_release_enabled = true; - mem_release_check_interval_ms = 86400000; + mem_release_check_interval_ms = 3600000; mem_release_tcmalloc_max_reserved_memory_percentage = 10; lb_interval_ms = 10000; diff --git a/src/dist/replication/lib/replica_stub.cpp b/src/dist/replication/lib/replica_stub.cpp index f9452418c3..261f7b25a6 100644 --- a/src/dist/replication/lib/replica_stub.cpp +++ b/src/dist/replication/lib/replica_stub.cpp @@ -2350,8 +2350,9 @@ void replica_stub::gc_tcmalloc_memory() int64_t total_allocated_bytes = get_tcmalloc_numeric_property("generic.current_allocated_bytes"); int64_t reserved_bytes = get_tcmalloc_numeric_property("tcmalloc.pageheap_free_bytes"); - // TODO(heyuchen): delete it - int64_t pageheap_free_before = reserved_bytes; + if (total_allocated_bytes == -1 || reserved_bytes == -1) { + return; + } int64_t max_reserved_bytes = total_allocated_bytes * _mem_release_tcmalloc_max_reserved_memory_percentage / 100.0; @@ -2359,19 +2360,12 @@ void replica_stub::gc_tcmalloc_memory() int64_t release_bytes = reserved_bytes - max_reserved_bytes; ddebug_f("Memory release started, almost {} bytes will be released", release_bytes); while (release_bytes > 0) { - // tcmalloc release memory will lock page heap, release 1MB at a time will shorten - // locked time + // tcmalloc releasing memory will lock page heap, release 1MB at a time to avoid locking + // page heap for long time ::MallocExtension::instance()->ReleaseToSystem(1024 * 1024); release_bytes -= 1024 * 1024; } } - // TODO(heyuchen): delete it - int64_t pageheap_free_after = get_tcmalloc_numeric_property("tcmalloc.pageheap_free_bytes"); - ddebug_f("total={}, reserved={}, need_release={}, heappage_free={}", - total_allocated_bytes, - pageheap_free_before, - pageheap_free_before > max_reserved_bytes, - pageheap_free_before - pageheap_free_after); } #endif From 0780df422937bceb840327ce94befd32f55f7f20 Mon Sep 17 00:00:00 2001 From: heyuchen Date: Thu, 28 Nov 2019 14:39:51 +0800 Subject: [PATCH 08/10] small fix --- .../replication/common/replication_common.cpp | 8 +-- .../replication/common/replication_common.h | 2 +- src/dist/replication/lib/replica_stub.cpp | 64 +++++++++---------- src/dist/replication/lib/replica_stub.h | 4 +- 4 files changed, 38 insertions(+), 40 deletions(-) diff --git a/src/dist/replication/common/replication_common.cpp b/src/dist/replication/common/replication_common.cpp index 5a03831d81..82c260fbf8 100644 --- a/src/dist/replication/common/replication_common.cpp +++ b/src/dist/replication/common/replication_common.cpp @@ -101,7 +101,7 @@ replication_options::replication_options() mem_release_enabled = true; mem_release_check_interval_ms = 3600000; - mem_release_tcmalloc_max_reserved_memory_percentage = 10; + mem_release_max_reserved_mem_percentage = 10; lb_interval_ms = 10000; @@ -486,10 +486,10 @@ void replication_options::initialize() mem_release_check_interval_ms, "the replica check if should release memory to the system every this period of time(ms)"); - mem_release_tcmalloc_max_reserved_memory_percentage = (int)dsn_config_get_value_uint64( + mem_release_max_reserved_mem_percentage = (int)dsn_config_get_value_uint64( "replication", - "mem_release_tcmalloc_max_reserved_memory_percentage", - mem_release_tcmalloc_max_reserved_memory_percentage, + "mem_release_max_reserved_mem_percentage", + mem_release_max_reserved_mem_percentage, "if tcmalloc reserved but not-used memory exceed this percentage of application allocated " "memory, replica server will release the exceeding memory back to operating system"); diff --git a/src/dist/replication/common/replication_common.h b/src/dist/replication/common/replication_common.h index bd81f4c2ed..502f6c95c1 100644 --- a/src/dist/replication/common/replication_common.h +++ b/src/dist/replication/common/replication_common.h @@ -105,7 +105,7 @@ class replication_options bool mem_release_enabled; int32_t mem_release_check_interval_ms; - int32_t mem_release_tcmalloc_max_reserved_memory_percentage; + int32_t mem_release_max_reserved_mem_percentage; int32_t lb_interval_ms; diff --git a/src/dist/replication/lib/replica_stub.cpp b/src/dist/replication/lib/replica_stub.cpp index 261f7b25a6..3e0fabe403 100644 --- a/src/dist/replication/lib/replica_stub.cpp +++ b/src/dist/replication/lib/replica_stub.cpp @@ -68,13 +68,13 @@ replica_stub::replica_stub(replica_state_subscriber subscriber /*= nullptr*/, _query_compact_command(nullptr), _query_app_envs_command(nullptr), _useless_dir_reserve_seconds_command(nullptr), - _mem_release_max_reserved_percentage_command(nullptr), + _max_reserved_memory_percentage_command(nullptr), _deny_client(false), _verbose_client_log(false), _verbose_commit_log(false), _gc_disk_error_replica_interval_seconds(3600), _gc_disk_garbage_replica_interval_seconds(3600), - _mem_release_tcmalloc_max_reserved_memory_percentage(10), + _mem_release_max_reserved_mem_percentage(10), _learn_app_concurrent_count(0), _fs_manager(false) { @@ -319,8 +319,7 @@ void replica_stub::initialize(const replication_options &opts, bool clear /* = f _verbose_commit_log = _options.verbose_commit_log_on_start; _gc_disk_error_replica_interval_seconds = _options.gc_disk_error_replica_interval_seconds; _gc_disk_garbage_replica_interval_seconds = _options.gc_disk_garbage_replica_interval_seconds; - _mem_release_tcmalloc_max_reserved_memory_percentage = - _options.mem_release_tcmalloc_max_reserved_memory_percentage; + _mem_release_max_reserved_mem_percentage = _options.mem_release_max_reserved_mem_percentage; // clear dirs if need if (clear) { @@ -2054,32 +2053,32 @@ void replica_stub::open_service() return result; }); - _mem_release_max_reserved_percentage_command = - dsn::command_manager::instance().register_app_command( - {"mem-release-max-reserved-percentage"}, - "mem-release-max-reserved-percentage [num | DEFAULT]", - "control tcmalloc max reserved but not-used memory percentage", - [this](const std::vector &args) { - std::string result("OK"); - if (args.empty()) { - result = "mem-release-max-reserved-percentage=" + - std::to_string(_mem_release_tcmalloc_max_reserved_memory_percentage); - } else { - if (args[0] == "DEFAULT") { - _mem_release_tcmalloc_max_reserved_memory_percentage = - _options.mem_release_tcmalloc_max_reserved_memory_percentage; - } else { - int32_t percentage = 0; - if (!dsn::buf2int32(args[0], percentage) || percentage <= 0 || - percentage >= 100) { - result = std::string("ERR: invalid arguments"); - } else { - _mem_release_tcmalloc_max_reserved_memory_percentage = percentage; - } - } - } + _max_reserved_memory_percentage_command = dsn::command_manager::instance().register_app_command( + {"mem-release-max-reserved-percentage"}, + "mem-release-max-reserved-percentage [num | DEFAULT]", + "control tcmalloc max reserved but not-used memory percentage", + [this](const std::vector &args) { + std::string result("OK"); + if (args.empty()) { + // show current value + result = "mem-release-max-reserved-percentage = " + + std::to_string(_mem_release_max_reserved_mem_percentage); return result; - }); + } + if (args[0] == "DEFAULT") { + // set to default value + _mem_release_max_reserved_mem_percentage = + _options.mem_release_max_reserved_mem_percentage; + return result; + } + int32_t percentage = 0; + if (!dsn::buf2int32(args[0], percentage) || percentage <= 0 || percentage >= 100) { + result = std::string("ERR: invalid arguments"); + } else { + _mem_release_max_reserved_mem_percentage = percentage; + } + return result; + }); } std::string @@ -2205,8 +2204,7 @@ void replica_stub::close() dsn::command_manager::instance().deregister_command(_query_compact_command); dsn::command_manager::instance().deregister_command(_query_app_envs_command); dsn::command_manager::instance().deregister_command(_useless_dir_reserve_seconds_command); - dsn::command_manager::instance().deregister_command( - _mem_release_max_reserved_percentage_command); + dsn::command_manager::instance().deregister_command(_max_reserved_memory_percentage_command); _kill_partition_command = nullptr; _deny_client_command = nullptr; @@ -2216,7 +2214,7 @@ void replica_stub::close() _query_compact_command = nullptr; _query_app_envs_command = nullptr; _useless_dir_reserve_seconds_command = nullptr; - _mem_release_max_reserved_percentage_command = nullptr; + _max_reserved_memory_percentage_command = nullptr; if (_config_sync_timer_task != nullptr) { _config_sync_timer_task->cancel(true); @@ -2355,7 +2353,7 @@ void replica_stub::gc_tcmalloc_memory() } int64_t max_reserved_bytes = - total_allocated_bytes * _mem_release_tcmalloc_max_reserved_memory_percentage / 100.0; + total_allocated_bytes * _mem_release_max_reserved_mem_percentage / 100.0; if (reserved_bytes > max_reserved_bytes) { int64_t release_bytes = reserved_bytes - max_reserved_bytes; ddebug_f("Memory release started, almost {} bytes will be released", release_bytes); diff --git a/src/dist/replication/lib/replica_stub.h b/src/dist/replication/lib/replica_stub.h index a5601ac8fb..1fb6518b67 100644 --- a/src/dist/replication/lib/replica_stub.h +++ b/src/dist/replication/lib/replica_stub.h @@ -315,14 +315,14 @@ class replica_stub : public serverlet, public ref_counter dsn_handle_t _query_compact_command; dsn_handle_t _query_app_envs_command; dsn_handle_t _useless_dir_reserve_seconds_command; - dsn_handle_t _mem_release_max_reserved_percentage_command; + dsn_handle_t _max_reserved_memory_percentage_command; bool _deny_client; bool _verbose_client_log; bool _verbose_commit_log; int32_t _gc_disk_error_replica_interval_seconds; int32_t _gc_disk_garbage_replica_interval_seconds; - int32_t _mem_release_tcmalloc_max_reserved_memory_percentage; + int32_t _mem_release_max_reserved_mem_percentage; // we limit LT_APP max concurrent count, because nfs service implementation is // too simple, it do not support priority. From 0a4cdf58843235c65c6c7ed3eb5d0eb587d66a2b Mon Sep 17 00:00:00 2001 From: heyuchen Date: Fri, 29 Nov 2019 09:47:37 +0800 Subject: [PATCH 09/10] if disable gperf, not register memory percentage command --- src/dist/replication/lib/replica_stub.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/dist/replication/lib/replica_stub.cpp b/src/dist/replication/lib/replica_stub.cpp index 3e0fabe403..a4af340272 100644 --- a/src/dist/replication/lib/replica_stub.cpp +++ b/src/dist/replication/lib/replica_stub.cpp @@ -2053,6 +2053,7 @@ void replica_stub::open_service() return result; }); +#ifdef DSN_ENABLE_GPERF _max_reserved_memory_percentage_command = dsn::command_manager::instance().register_app_command( {"mem-release-max-reserved-percentage"}, "mem-release-max-reserved-percentage [num | DEFAULT]", @@ -2079,6 +2080,7 @@ void replica_stub::open_service() } return result; }); +#endif } std::string @@ -2204,7 +2206,9 @@ void replica_stub::close() dsn::command_manager::instance().deregister_command(_query_compact_command); dsn::command_manager::instance().deregister_command(_query_app_envs_command); dsn::command_manager::instance().deregister_command(_useless_dir_reserve_seconds_command); +#ifdef DSN_ENABLE_GPERF dsn::command_manager::instance().deregister_command(_max_reserved_memory_percentage_command); +#endif _kill_partition_command = nullptr; _deny_client_command = nullptr; From 2e2a57227613fbf1dffcd3abfd471fdcc3e8e8c4 Mon Sep 17 00:00:00 2001 From: heyuchen Date: Fri, 29 Nov 2019 15:36:50 +0800 Subject: [PATCH 10/10] small fix --- src/dist/replication/lib/replica_stub.cpp | 5 ++++- src/dist/replication/lib/replica_stub.h | 5 ----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/dist/replication/lib/replica_stub.cpp b/src/dist/replication/lib/replica_stub.cpp index a4af340272..9546f2da2a 100644 --- a/src/dist/replication/lib/replica_stub.cpp +++ b/src/dist/replication/lib/replica_stub.cpp @@ -2337,7 +2337,10 @@ replica_stub::get_child_dir(const char *app_type, gpid child_pid, const std::str } #ifdef DSN_ENABLE_GPERF -int64_t replica_stub::get_tcmalloc_numeric_property(const char *prop) +// Get tcmalloc numeric property (name is "prop") value. +// Return -1 if get property failed (property we used will be greater than zero) +// Properties can be found in 'gperftools/malloc_extension.h' +static int64_t get_tcmalloc_numeric_property(const char *prop) { size_t value; if (!::MallocExtension::instance()->GetNumericProperty(prop, &value)) { diff --git a/src/dist/replication/lib/replica_stub.h b/src/dist/replication/lib/replica_stub.h index 1fb6518b67..5fa505d46f 100644 --- a/src/dist/replication/lib/replica_stub.h +++ b/src/dist/replication/lib/replica_stub.h @@ -247,11 +247,6 @@ class replica_stub : public serverlet, public ref_counter error_code error); #ifdef DSN_ENABLE_GPERF - // Get tcmalloc numeric property (name is "prop") value. - // Return -1 if get property failed (property we used will be greater than zero) - // Properties can be found in 'gperftools/malloc_extension.h' - int64_t get_tcmalloc_numeric_property(const char *prop); - // Try to release tcmalloc memory back to operating system void gc_tcmalloc_memory(); #endif