From cad0dc6848f84a9b0a2256b6d44638d9a6043639 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Fri, 16 Dec 2022 10:14:56 -0800 Subject: [PATCH] LookupCache: Optimize cache clearing and allocation Use one large allocation for all levels of the cache so they are virtually contiguous. This allows us to clear the cache entirely by using a single madvise instead of three. Which ends up being quite a bit nicer. --- .../Source/Interface/Core/LookupCache.cpp | 19 +++++++++---------- .../Source/Interface/Core/LookupCache.h | 2 ++ 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/External/FEXCore/Source/Interface/Core/LookupCache.cpp b/External/FEXCore/Source/Interface/Core/LookupCache.cpp index e35d4d73bb..5c58726565 100644 --- a/External/FEXCore/Source/Interface/Core/LookupCache.cpp +++ b/External/FEXCore/Source/Interface/Core/LookupCache.cpp @@ -17,6 +17,8 @@ namespace FEXCore { LookupCache::LookupCache(FEXCore::Context::Context *CTX) : ctx {CTX} { + TotalCacheSize = ctx->Config.VirtualMemSize / 4096 * 8 + CODE_SIZE + L1_SIZE; + // Block cache ends up looking like this // PageMemoryMap[VirtualMemoryRegion >> 12] // | @@ -29,27 +31,26 @@ LookupCache::LookupCache(FEXCore::Context::Context *CTX) // Allocate a region of memory that we can use to back our block pointers // We need one pointer per page of virtual memory // At 64GB of virtual memory this will allocate 128MB of virtual memory space - PagePointer = reinterpret_cast(FEXCore::Allocator::mmap(nullptr, ctx->Config.VirtualMemSize / 4096 * 8, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); + PagePointer = reinterpret_cast(FEXCore::Allocator::mmap(nullptr, TotalCacheSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); // Allocate our memory backing our pages // We need 32KB per guest page (One pointer per byte) // XXX: We can drop down to 16KB if we store 4byte offsets from the code base // We currently limit to 128MB of real memory for caching for the total cache size. // Can end up being inefficient if we compile a small number of blocks per page - PageMemory = reinterpret_cast(FEXCore::Allocator::mmap(nullptr, CODE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); + PageMemory = PagePointer + ctx->Config.VirtualMemSize / 4096 * 8; LOGMAN_THROW_AA_FMT(PageMemory != -1ULL, "Failed to allocate page memory"); // L1 Cache - L1Pointer = reinterpret_cast(FEXCore::Allocator::mmap(nullptr, L1_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); + L1Pointer = PageMemory + CODE_SIZE; LOGMAN_THROW_AA_FMT(L1Pointer != -1ULL, "Failed to allocate L1Pointer"); VirtualMemSize = ctx->Config.VirtualMemSize; } LookupCache::~LookupCache() { - FEXCore::Allocator::munmap(reinterpret_cast(PagePointer), ctx->Config.VirtualMemSize / 4096 * 8); - FEXCore::Allocator::munmap(reinterpret_cast(PageMemory), CODE_SIZE); - FEXCore::Allocator::munmap(reinterpret_cast(L1Pointer), L1_SIZE); + const size_t TotalCacheSize = ctx->Config.VirtualMemSize / 4096 * 8 + CODE_SIZE + L1_SIZE; + FEXCore::Allocator::munmap(reinterpret_cast(PagePointer), TotalCacheSize); } void LookupCache::ClearL2Cache() { @@ -63,10 +64,8 @@ void LookupCache::ClearL2Cache() { void LookupCache::ClearCache() { std::lock_guard lk(WriteLock); - // Clear L1 - madvise(reinterpret_cast(L1Pointer), L1_SIZE, MADV_DONTNEED); - // Clear L2 - ClearL2Cache(); + // Clear L1 and L2 by clearing the full cache. + madvise(reinterpret_cast(PagePointer), TotalCacheSize, MADV_DONTNEED); // All code is gone, remove links BlockLinks.clear(); // All code is gone, clear the block list diff --git a/External/FEXCore/Source/Interface/Core/LookupCache.h b/External/FEXCore/Source/Interface/Core/LookupCache.h index 41e4ec63cd..7ac1575122 100644 --- a/External/FEXCore/Source/Interface/Core/LookupCache.h +++ b/External/FEXCore/Source/Interface/Core/LookupCache.h @@ -242,6 +242,8 @@ class LookupCache { std::map> BlockLinks; tsl::robin_map BlockList; + size_t TotalCacheSize; + constexpr static size_t CODE_SIZE = 128 * 1024 * 1024; constexpr static size_t SIZE_PER_PAGE = 4096 * sizeof(LookupCacheEntry); constexpr static size_t L1_SIZE = L1_ENTRIES * sizeof(LookupCacheEntry);