Skip to content

Commit

Permalink
CPU/CodeCache: Improve block host size heuristics
Browse files Browse the repository at this point in the history
Codegen is much better these days, especially with NewRec.
  • Loading branch information
stenzek committed Dec 3, 2024
1 parent 9a5ee3a commit d3ceda0
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 23 deletions.
36 changes: 22 additions & 14 deletions src/core/cpu_code_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,10 @@ static u8* s_free_far_code_ptr = nullptr;
static u32 s_far_code_size = 0;
static u32 s_far_code_used = 0;

#if defined(_DEBUG) || defined(_DEVEL)
#ifdef DUMP_CODE_SIZE_STATS
static u32 s_total_instructions_compiled = 0;
static u32 s_total_host_instructions_emitted = 0;
static u32 s_total_host_code_used_by_instructions = 0;
#endif
} // namespace CPU::CodeCache

Expand Down Expand Up @@ -691,7 +692,6 @@ void CPU::CodeCache::InvalidateAllRAMBlocks()

void CPU::CodeCache::ClearBlocks()
{

for (u32 i = 0; i < Bus::RAM_8MB_CODE_PAGE_COUNT; i++)
{
PageProtectionInfo& ppi = s_page_protection[i];
Expand Down Expand Up @@ -1345,10 +1345,13 @@ void CPU::CodeCache::CompileOrRevalidateBlock(u32 start_pc)
}

// Ensure we're not going to run out of space while compiling this block.
// We could definitely do better here... TODO: far code is no longer needed for newrec
// We could definitely do better here...
const u32 block_size = static_cast<u32>(s_block_instructions.size());
if (GetFreeCodeSpace() < (block_size * Recompiler::MAX_NEAR_HOST_BYTES_PER_INSTRUCTION) ||
GetFreeFarCodeSpace() < (block_size * Recompiler::MAX_FAR_HOST_BYTES_PER_INSTRUCTION))
const u32 free_code_space = GetFreeCodeSpace();
const u32 free_far_code_space = GetFreeFarCodeSpace();
if (free_code_space < (block_size * Recompiler::MAX_NEAR_HOST_BYTES_PER_INSTRUCTION) ||
free_code_space < Recompiler::MIN_CODE_RESERVE_FOR_BLOCK ||
free_far_code_space < Recompiler::MIN_CODE_RESERVE_FOR_BLOCK)
{
ERROR_LOG("Out of code space while compiling {:08X}. Resetting code cache.", start_pc);
CodeCache::Reset();
Expand Down Expand Up @@ -1540,9 +1543,10 @@ void CPU::CodeCache::CompileASMFunctions()
{
MemMap::BeginCodeWrite();

#if defined(_DEBUG) || defined(_DEVEL)
#ifdef DUMP_CODE_SIZE_STATS
s_total_instructions_compiled = 0;
s_total_host_instructions_emitted = 0;
s_total_host_code_used_by_instructions = 0;
#endif

const u32 asm_size = EmitASMFunctions(GetFreeCodePointer(), GetFreeCodeSpace());
Expand Down Expand Up @@ -1580,14 +1584,18 @@ bool CPU::CodeCache::CompileBlock(Block* block)
const u32 host_instructions = GetHostInstructionCount(host_code, host_code_size);
s_total_instructions_compiled += block->size;
s_total_host_instructions_emitted += host_instructions;

DEV_LOG("0x{:08X}: {}/{}b for {}b ({}i), blowup: {:.2f}x, cache: {:.2f}%/{:.2f}%, ipi: {:.2f}/{:.2f}", block->pc,
host_code_size, host_far_code_size, block->size * 4, block->size,
static_cast<float>(host_code_size) / static_cast<float>(block->size * 4),
(static_cast<float>(s_code_used) / static_cast<float>(s_code_size)) * 100.0f,
(static_cast<float>(s_far_code_used) / static_cast<float>(s_far_code_size)) * 100.0f,
static_cast<float>(host_instructions) / static_cast<float>(block->size),
static_cast<float>(s_total_host_instructions_emitted) / static_cast<float>(s_total_instructions_compiled));
s_total_host_code_used_by_instructions += host_code_size;

DEV_LOG(
"0x{:08X}: {}/{}b for {}b ({}i), blowup: {:.2f}x, cache: {:.2f}%/{:.2f}%, ipi: {:.2f}/{:.2f}, bpi: {:.2f}/{:.2f}",
block->pc, host_code_size, host_far_code_size, block->size * 4, block->size,
static_cast<float>(host_code_size) / static_cast<float>(block->size * 4),
(static_cast<float>(s_code_used) / static_cast<float>(s_code_size)) * 100.0f,
(static_cast<float>(s_far_code_used) / static_cast<float>(s_far_code_size)) * 100.0f,
static_cast<float>(host_instructions) / static_cast<float>(block->size),
static_cast<float>(s_total_host_instructions_emitted) / static_cast<float>(s_total_instructions_compiled),
static_cast<float>(block->host_code_size) / static_cast<float>(block->size),
static_cast<float>(s_total_host_code_used_by_instructions) / static_cast<float>(s_total_instructions_compiled));
#endif

#if 0
Expand Down
21 changes: 12 additions & 9 deletions src/core/cpu_recompiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

namespace CPU {

// TODO: Get rid of the virtuals... somehow.
class Recompiler
{
public:
Expand All @@ -26,8 +25,10 @@ class Recompiler
#if defined(CPU_ARCH_X64)

// A reasonable "maximum" number of bytes per instruction.
static constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 64;
static constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128;
// Seems to hover around ~21 bytes without PGXP, and ~26 bytes with.
// Use an upper bound of 32 bytes to be safe.
static constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 32;
static constexpr u32 MIN_CODE_RESERVE_FOR_BLOCK = 512;

// Number of host registers.
static constexpr u32 NUM_HOST_REGS = 16;
Expand All @@ -37,22 +38,24 @@ class Recompiler

// A reasonable "maximum" number of bytes per instruction.
static constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 64;
static constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128;
static constexpr u32 MIN_CODE_RESERVE_FOR_BLOCK = 512;

// Number of host registers.
static constexpr u32 NUM_HOST_REGS = 16;
static constexpr bool HAS_MEMORY_OPERANDS = false;

#elif defined(CPU_ARCH_ARM64)

// A reasonable "maximum" number of bytes per instruction.
// Seems to hover around ~24 bytes without PGXP, and ~40 bytes with.
// Use an upper bound of 48 bytes to be safe.
static constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 48;
static constexpr u32 MIN_CODE_RESERVE_FOR_BLOCK = 512;

// Number of host registers.
static constexpr u32 NUM_HOST_REGS = 32;
static constexpr bool HAS_MEMORY_OPERANDS = false;

// A reasonable "maximum" number of bytes per instruction.
static constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 64;
static constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128;

#elif defined(CPU_ARCH_RISCV64)

// Number of host registers.
Expand All @@ -61,7 +64,7 @@ class Recompiler

// A reasonable "maximum" number of bytes per instruction.
static constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 64;
static constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128;
static constexpr u32 MIN_CODE_RESERVE_FOR_BLOCK = 512;

#endif

Expand Down

0 comments on commit d3ceda0

Please sign in to comment.