From df623ac5961822e1e67db25a995dc91c5c19c1c2 Mon Sep 17 00:00:00 2001 From: HuaiyuXu <391585975@qq.com> Date: Thu, 30 Nov 2023 18:16:26 +0800 Subject: [PATCH] pkg/util: refine tidb_server_memory_limit to make the cpu usage more stable (#48927) close pingcap/tidb#48741 --- pkg/util/gctuner/memory_limit_tuner.go | 32 ++++-- pkg/util/gctuner/memory_limit_tuner_test.go | 104 +++++++++++++++++++- 2 files changed, 124 insertions(+), 12 deletions(-) diff --git a/pkg/util/gctuner/memory_limit_tuner.go b/pkg/util/gctuner/memory_limit_tuner.go index b226248bcd0a6..28f934f7b3aee 100644 --- a/pkg/util/gctuner/memory_limit_tuner.go +++ b/pkg/util/gctuner/memory_limit_tuner.go @@ -33,9 +33,11 @@ var GlobalMemoryLimitTuner = &memoryLimitTuner{} // So we can change memory limit dynamically to avoid frequent GC when memory usage is greater than the limit. type memoryLimitTuner struct { finalizer *finalizer - isTuning atomicutil.Bool + isValidValueSet atomicutil.Bool percentage atomicutil.Float64 - waitingReset atomicutil.Bool + adjustPercentageInProgress atomicutil.Bool + serverMemLimitBeforeAdjust atomicutil.Uint64 + percentageBeforeAdjust atomicutil.Float64 nextGCTriggeredByMemoryLimit atomicutil.Bool } @@ -56,7 +58,7 @@ func WaitMemoryLimitTunerExitInTest() { // tuning check the memory nextGC and judge whether this GC is trigger by memory limit. // Go runtime ensure that it will be called serially. func (t *memoryLimitTuner) tuning() { - if !t.isTuning.Load() { + if !t.isValidValueSet.Load() { return } r := memory.ForceReadMemStats() @@ -72,7 +74,11 @@ func (t *memoryLimitTuner) tuning() { // - Only if NextGC >= MemoryLimit , the **next** GC will be triggered by MemoryLimit. Thus, we need to reset // MemoryLimit after the **next** GC happens if needed. if float64(r.HeapInuse)*ratio > float64(debug.SetMemoryLimit(-1)) { - if t.nextGCTriggeredByMemoryLimit.Load() && t.waitingReset.CompareAndSwap(false, true) { + if t.nextGCTriggeredByMemoryLimit.Load() && t.adjustPercentageInProgress.CompareAndSwap(false, true) { + // It's ok to update `adjustPercentageInProgress`, `serverMemLimitBeforeAdjust` and `percentageBeforeAdjust` not in a transaction. + // The update of memory limit is eventually consistent. + t.serverMemLimitBeforeAdjust.Store(memory.ServerMemoryLimit.Load()) + t.percentageBeforeAdjust.Store(t.GetPercentage()) go func() { if intest.InTest { memoryGoroutineCntInTest.Inc() @@ -85,6 +91,13 @@ func (t *memoryLimitTuner) tuning() { if intest.InTest { resetInterval = 3 * time.Second } + failpoint.Inject("mockUpdateGlobalVarDuringAdjustPercentage", func(val failpoint.Value) { + if val, ok := val.(bool); val && ok { + resetInterval = 5 * time.Second + time.Sleep(300 * time.Millisecond) + t.UpdateMemoryLimit() + } + }) failpoint.Inject("testMemoryLimitTuner", func(val failpoint.Value) { if val, ok := val.(bool); val && ok { resetInterval = 1 * time.Second @@ -92,7 +105,7 @@ func (t *memoryLimitTuner) tuning() { }) time.Sleep(resetInterval) debug.SetMemoryLimit(t.calcMemoryLimit(t.GetPercentage())) - for !t.waitingReset.CompareAndSwap(true, false) { + for !t.adjustPercentageInProgress.CompareAndSwap(true, false) { continue } }() @@ -128,12 +141,17 @@ func (t *memoryLimitTuner) GetPercentage() float64 { // UpdateMemoryLimit updates the memory limit. // This function should be called when `tidb_server_memory_limit` or `tidb_server_memory_limit_gc_trigger` is modified. func (t *memoryLimitTuner) UpdateMemoryLimit() { + if t.adjustPercentageInProgress.Load() { + if t.serverMemLimitBeforeAdjust.Load() == memory.ServerMemoryLimit.Load() && t.percentageBeforeAdjust.Load() == t.GetPercentage() { + return + } + } var memoryLimit = t.calcMemoryLimit(t.GetPercentage()) if memoryLimit == math.MaxInt64 { - t.isTuning.Store(false) + t.isValidValueSet.Store(false) memoryLimit = initGOMemoryLimitValue } else { - t.isTuning.Store(true) + t.isValidValueSet.Store(true) } debug.SetMemoryLimit(memoryLimit) } diff --git a/pkg/util/gctuner/memory_limit_tuner_test.go b/pkg/util/gctuner/memory_limit_tuner_test.go index ab354530d852c..c8ea2d9724740 100644 --- a/pkg/util/gctuner/memory_limit_tuner_test.go +++ b/pkg/util/gctuner/memory_limit_tuner_test.go @@ -57,16 +57,16 @@ func TestGlobalMemoryTuner(t *testing.T) { memory.ServerMemoryLimit.Store(1 << 30) // 1GB GlobalMemoryLimitTuner.SetPercentage(0.8) // 1GB * 80% = 800MB GlobalMemoryLimitTuner.UpdateMemoryLimit() - require.True(t, GlobalMemoryLimitTuner.isTuning.Load()) + require.True(t, GlobalMemoryLimitTuner.isValidValueSet.Load()) defer func() { // If test.count > 1, wait tuning finished. require.Eventually(t, func() bool { //nolint: all_revive - return GlobalMemoryLimitTuner.isTuning.Load() + return GlobalMemoryLimitTuner.isValidValueSet.Load() }, 5*time.Second, 100*time.Millisecond) require.Eventually(t, func() bool { //nolint: all_revive - return !GlobalMemoryLimitTuner.waitingReset.Load() + return !GlobalMemoryLimitTuner.adjustPercentageInProgress.Load() }, 5*time.Second, 100*time.Millisecond) require.Eventually(t, func() bool { //nolint: all_revive @@ -85,7 +85,7 @@ func TestGlobalMemoryTuner(t *testing.T) { runtime.ReadMemStats(r) nextGC := r.NextGC memoryLimit := GlobalMemoryLimitTuner.calcMemoryLimit(GlobalMemoryLimitTuner.GetPercentage()) - // In golang source, nextGC = memoryLimit - three parts memory. + // Refer to golang source code, nextGC = memoryLimit - nonHeapMemory - overageMemory - headroom require.True(t, nextGC < uint64(memoryLimit)) } @@ -94,7 +94,7 @@ func TestGlobalMemoryTuner(t *testing.T) { memory210mb := allocator.alloc(210 << 20) require.Eventually(t, func() bool { - return GlobalMemoryLimitTuner.waitingReset.Load() && gcNum < getNowGCNum() + return GlobalMemoryLimitTuner.adjustPercentageInProgress.Load() && gcNum < getNowGCNum() }, 5*time.Second, 100*time.Millisecond) // Test waiting for reset require.Eventually(t, func() bool { @@ -123,3 +123,97 @@ func TestGlobalMemoryTuner(t *testing.T) { allocator.free(memory210mb) allocator.free(memory600mb) } + +func TestIssue48741(t *testing.T) { + // Close GOGCTuner + gogcTuner := EnableGOGCTuner.Load() + EnableGOGCTuner.Store(false) + defer EnableGOGCTuner.Store(gogcTuner) + + r := &runtime.MemStats{} + getNowGCNum := func() uint32 { + runtime.ReadMemStats(r) + return r.NumGC + } + allocator := &mockAllocator{} + defer allocator.freeAll() + + checkIfMemoryLimitIsModified := func() { + memory.ServerMemoryLimit.Store(1500 << 20) // 1.5 GB + + // Try to trigger GC by 1GB * 80% = 800MB (tidb_server_memory_limit * tidb_server_memory_limit_gc_trigger) + gcNum := getNowGCNum() + memory810mb := allocator.alloc(810 << 20) + require.Eventually(t, + // Wait for the GC triggered by memory810mb + func() bool { + return GlobalMemoryLimitTuner.adjustPercentageInProgress.Load() && gcNum < getNowGCNum() + }, + 500*time.Millisecond, 100*time.Millisecond) + + gcNumAfterMemory810mb := getNowGCNum() + // After the GC triggered by memory810mb. + time.Sleep(4500 * time.Millisecond) + require.Equal(t, debug.SetMemoryLimit(-1), int64(1500<<20*80/100)) + + memory700mb := allocator.alloc(200 << 20) + time.Sleep(5 * time.Second) + // The heapInUse is less than 1.5GB * 80% = 1.2GB, so the gc will not be triggered. + require.Equal(t, gcNumAfterMemory810mb, getNowGCNum()) + + memory150mb := allocator.alloc(300 << 20) + require.Eventually(t, + // Wait for the GC triggered by memory810mb + func() bool { + return GlobalMemoryLimitTuner.adjustPercentageInProgress.Load() && gcNumAfterMemory810mb < getNowGCNum() + }, + 5*time.Second, 100*time.Millisecond) + + time.Sleep(4500 * time.Millisecond) + require.Equal(t, debug.SetMemoryLimit(-1), int64(1500<<20*110/100)) + + allocator.free(memory810mb) + allocator.free(memory700mb) + allocator.free(memory150mb) + } + + checkIfMemoryLimitNotModified := func() { + // Try to trigger GC by 1GB * 80% = 800MB (tidb_server_memory_limit * tidb_server_memory_limit_gc_trigger) + gcNum := getNowGCNum() + memory810mb := allocator.alloc(810 << 20) + require.Eventually(t, + // Wait for the GC triggered by memory810mb + func() bool { + return GlobalMemoryLimitTuner.adjustPercentageInProgress.Load() && gcNum < getNowGCNum() + }, + 500*time.Millisecond, 100*time.Millisecond) + + gcNumAfterMemory810mb := getNowGCNum() + // After the GC triggered by memory810mb. + time.Sleep(4500 * time.Millisecond) + // During the process of adjusting the percentage, the memory limit will be set to 1GB * 110% = 1.1GB. + require.Equal(t, debug.SetMemoryLimit(-1), int64(1<<30*110/100)) + + require.Eventually(t, + // The GC will be trigged immediately after memoryLimit is set back to 1GB * 80% = 800MB. + func() bool { + return GlobalMemoryLimitTuner.adjustPercentageInProgress.Load() && gcNumAfterMemory810mb < getNowGCNum() + }, + 2*time.Second, 100*time.Millisecond) + + allocator.free(memory810mb) + } + + require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/pkg/util/gctuner/mockUpdateGlobalVarDuringAdjustPercentage", "return(true)")) + defer func() { + require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/util/gctuner/mockUpdateGlobalVarDuringAdjustPercentage")) + }() + + memory.ServerMemoryLimit.Store(1 << 30) // 1GB + GlobalMemoryLimitTuner.SetPercentage(0.8) // 1GB * 80% = 800MB + GlobalMemoryLimitTuner.UpdateMemoryLimit() + require.Equal(t, debug.SetMemoryLimit(-1), int64(1<<30*80/100)) + + checkIfMemoryLimitNotModified() + checkIfMemoryLimitIsModified() +}