Skip to content

Commit

Permalink
CPU/CodeCache: Purge JitCodeBuffer
Browse files Browse the repository at this point in the history
  • Loading branch information
stenzek committed Jun 30, 2024
1 parent be8fbaf commit 9b42ad3
Show file tree
Hide file tree
Showing 12 changed files with 160 additions and 220 deletions.
122 changes: 104 additions & 18 deletions src/core/cpu_code_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ static BlockInstructionList s_block_instructions;

static void BacklinkBlocks(u32 pc, const void* dst);
static void UnlinkBlockExits(Block* block);
static void ResetCodeBuffer();

static void ClearASMFunctions();
static void CompileASMFunctions();
Expand Down Expand Up @@ -146,7 +147,15 @@ __attribute__((aligned(HOST_PAGE_SIZE))) static u8 s_code_buffer_ptr[RECOMPILER_
static u8* s_code_buffer_ptr = nullptr;
#endif

static JitCodeBuffer s_code_buffer;
static u8* s_code_ptr = nullptr;
static u8* s_free_code_ptr = nullptr;
static u32 s_code_size = 0;
static u32 s_code_used = 0;

static u8* s_far_code_ptr = nullptr;
static u8* s_free_far_code_ptr = nullptr;
static u32 s_far_code_size = 0;
static u32 s_far_code_used = 0;

#ifdef _DEBUG
static u32 s_total_instructions_compiled = 0;
Expand Down Expand Up @@ -206,10 +215,9 @@ void CPU::CodeCache::Initialize()
{
Assert(s_blocks.empty());

// TODO: Reduce far code size when not using memory exceptions.
if (IsUsingAnyRecompiler())
{
s_code_buffer.Reset(s_code_buffer_ptr, RECOMPILER_CODE_CACHE_SIZE, RECOMPILER_FAR_CODE_CACHE_SIZE);
ResetCodeBuffer();
CompileASMFunctions();
ResetCodeLUT();
}
Expand All @@ -234,7 +242,7 @@ void CPU::CodeCache::Reset()
if (IsUsingAnyRecompiler())
{
ClearASMFunctions();
s_code_buffer.Reset(s_code_buffer_ptr, RECOMPILER_CODE_CACHE_SIZE, RECOMPILER_FAR_CODE_CACHE_SIZE);
ResetCodeBuffer();
CompileASMFunctions();
ResetCodeLUT();
}
Expand Down Expand Up @@ -1331,8 +1339,8 @@ void CPU::CodeCache::CompileOrRevalidateBlock(u32 start_pc)
// Ensure we're not going to run out of space while compiling this block.
// We could definitely do better here... TODO: far code is no longer needed for newrec
const u32 block_size = static_cast<u32>(s_block_instructions.size());
if (s_code_buffer.GetFreeCodeSpace() < (block_size * Recompiler::MAX_NEAR_HOST_BYTES_PER_INSTRUCTION) ||
s_code_buffer.GetFreeFarCodeSpace() < (block_size * Recompiler::MAX_FAR_HOST_BYTES_PER_INSTRUCTION))
if (GetFreeCodeSpace() < (block_size * Recompiler::MAX_NEAR_HOST_BYTES_PER_INSTRUCTION) ||
GetFreeFarCodeSpace() < (block_size * Recompiler::MAX_FAR_HOST_BYTES_PER_INSTRUCTION))
{
ERROR_LOG("Out of code space while compiling {:08X}. Resetting code cache.", start_pc);
CodeCache::Reset();
Expand Down Expand Up @@ -1420,9 +1428,86 @@ void CPU::CodeCache::UnlinkBlockExits(Block* block)
block->num_exit_links = 0;
}

JitCodeBuffer& CPU::CodeCache::GetCodeBuffer()
void CPU::CodeCache::ResetCodeBuffer()
{
s_code_ptr = static_cast<u8*>(s_code_buffer_ptr);
s_free_code_ptr = s_code_ptr;
s_code_size = RECOMPILER_CODE_CACHE_SIZE - RECOMPILER_FAR_CODE_CACHE_SIZE;
s_code_used = 0;

s_far_code_size = RECOMPILER_FAR_CODE_CACHE_SIZE;
s_far_code_ptr = (s_far_code_size > 0) ? (static_cast<u8*>(s_code_ptr) + s_code_size) : nullptr;
s_free_far_code_ptr = s_far_code_ptr;
s_far_code_used = 0;

MemMap::BeginCodeWrite();

std::memset(s_code_ptr, 0, RECOMPILER_CODE_CACHE_SIZE);
MemMap::FlushInstructionCache(s_code_ptr, RECOMPILER_CODE_CACHE_SIZE);

MemMap::EndCodeWrite();
}

u8* CPU::CodeCache::GetFreeCodePointer()
{
return s_code_buffer;
return s_free_code_ptr;
}

u32 CPU::CodeCache::GetFreeCodeSpace()
{
return s_code_size - s_code_used;
}

void CPU::CodeCache::CommitCode(u32 length)
{
if (length == 0) [[unlikely]]
return;

MemMap::FlushInstructionCache(s_free_code_ptr, length);

Assert(length <= (s_code_size - s_code_used));
s_free_code_ptr += length;
s_code_used += length;
}

u8* CPU::CodeCache::GetFreeFarCodePointer()
{
return s_free_far_code_ptr;
}

u32 CPU::CodeCache::GetFreeFarCodeSpace()
{
return s_far_code_size - s_far_code_used;
}

void CPU::CodeCache::CommitFarCode(u32 length)
{
if (length == 0) [[unlikely]]
return;

MemMap::FlushInstructionCache(s_free_far_code_ptr, length);

Assert(length <= (s_far_code_size - s_far_code_used));
s_free_far_code_ptr += length;
s_far_code_used += length;
}

void CPU::CodeCache::AlignCode(u32 alignment)
{
#if defined(CPU_ARCH_X64)
constexpr u8 padding_value = 0xcc; // int3
#else
constexpr u8 padding_value = 0x00;
#endif

DebugAssert(Common::IsPow2(alignment));
const u32 num_padding_bytes =
std::min(static_cast<u32>(Common::AlignUpPow2(reinterpret_cast<uintptr_t>(s_free_code_ptr), alignment) -
reinterpret_cast<uintptr_t>(s_free_code_ptr)),
GetFreeCodeSpace());
std::memset(s_free_code_ptr, padding_value, num_padding_bytes);
s_free_code_ptr += num_padding_bytes;
s_code_used += num_padding_bytes;
}

const void* CPU::CodeCache::GetInterpretUncachedBlockFunction()
Expand Down Expand Up @@ -1460,13 +1545,13 @@ void CPU::CodeCache::CompileASMFunctions()
{
MemMap::BeginCodeWrite();

const u32 asm_size = EmitASMFunctions(s_code_buffer.GetFreeCodePointer(), s_code_buffer.GetFreeCodeSpace());
const u32 asm_size = EmitASMFunctions(GetFreeCodePointer(), GetFreeCodeSpace());

#ifdef ENABLE_RECOMPILER_PROFILING
MIPSPerfScope.Register(s_code_buffer.GetFreeCodePointer(), asm_size, "ASMFunctions");
MIPSPerfScope.Register(GetFreeCodePointer(), asm_size, "ASMFunctions");
#endif

s_code_buffer.CommitCode(asm_size);
CommitCode(asm_size);
MemMap::EndCodeWrite();
}

Expand All @@ -1479,7 +1564,7 @@ bool CPU::CodeCache::CompileBlock(Block* block)
#ifdef ENABLE_RECOMPILER
if (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler)
{
Recompiler::CodeGenerator codegen(&s_code_buffer);
Recompiler::CodeGenerator codegen;
host_code = codegen.CompileBlock(block, &host_code_size, &host_far_code_size);
}
#endif
Expand All @@ -1503,12 +1588,13 @@ bool CPU::CodeCache::CompileBlock(Block* block)
s_total_instructions_compiled += block->size;
s_total_host_instructions_emitted += host_instructions;

Log_ProfileFmt("0x{:08X}: {}/{}b for {}b ({}i), blowup: {:.2f}x, cache: {:.2f}%/{:.2f}%, ipi: {:.2f}/{:.2f}",
block->pc, host_code_size, host_far_code_size, block->size * 4, block->size,
static_cast<float>(host_code_size) / static_cast<float>(block->size * 4), s_code_buffer.GetUsedPct(),
s_code_buffer.GetFarUsedPct(), static_cast<float>(host_instructions) / static_cast<float>(block->size),
static_cast<float>(s_total_host_instructions_emitted) /
static_cast<float>(s_total_instructions_compiled));
DEV_LOG("0x{:08X}: {}/{}b for {}b ({}i), blowup: {:.2f}x, cache: {:.2f}%/{:.2f}%, ipi: {:.2f}/{:.2f}", block->pc,
host_code_size, host_far_code_size, block->size * 4, block->size,
static_cast<float>(host_code_size) / static_cast<float>(block->size * 4),
(static_cast<float>(s_code_used) / static_cast<float>(s_code_size)) * 100.0f,
(static_cast<float>(s_far_code_used) / static_cast<float>(s_far_code_size)) * 100.0f,
static_cast<float>(host_instructions) / static_cast<float>(block->size),
static_cast<float>(s_total_host_instructions_emitted) / static_cast<float>(s_total_instructions_compiled));
#endif

#if 0
Expand Down
16 changes: 14 additions & 2 deletions src/core/cpu_code_cache_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#include "cpu_core_private.h"
#include "cpu_types.h"

#include "util/jit_code_buffer.h"
#include "util/page_fault_handler.h"

#include <array>
Expand Down Expand Up @@ -234,7 +233,20 @@ void LogCurrentState();
#define ENABLE_HOST_DISASSEMBLY 1
#endif

JitCodeBuffer& GetCodeBuffer();
/// Access to normal code allocator.
u8* GetFreeCodePointer();
u32 GetFreeCodeSpace();
void CommitCode(u32 length);

/// Access to far code allocator.
u8* GetFreeFarCodePointer();
u32 GetFreeFarCodeSpace();
void CommitFarCode(u32 length);

/// Adjusts the free code pointer to the specified alignment, padding with bytes.
/// Assumes alignment is a power-of-two.
void AlignCode(u32 alignment);

const void* GetInterpretUncachedBlockFunction();

void CompileOrRevalidateBlock(u32 start_pc);
Expand Down
20 changes: 9 additions & 11 deletions src/core/cpu_newrec_compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,8 @@ void CPU::NewRec::Compiler::BeginBlock()

const void* CPU::NewRec::Compiler::CompileBlock(CodeCache::Block* block, u32* host_code_size, u32* host_far_code_size)
{
JitCodeBuffer& buffer = CodeCache::GetCodeBuffer();
Reset(block, buffer.GetFreeCodePointer(), buffer.GetFreeCodeSpace(), buffer.GetFreeFarCodePointer(),
buffer.GetFreeFarCodeSpace());
Reset(block, CPU::CodeCache::GetFreeCodePointer(), CPU::CodeCache::GetFreeCodeSpace(),
CPU::CodeCache::GetFreeFarCodePointer(), CPU::CodeCache::GetFreeFarCodeSpace());

DEBUG_LOG("Block range: {:08X} -> {:08X}", block->pc, block->pc + block->size * 4);

Expand Down Expand Up @@ -141,8 +140,8 @@ const void* CPU::NewRec::Compiler::CompileBlock(CodeCache::Block* block, u32* ho
const void* code = EndCompile(&code_size, &far_code_size);
*host_code_size = code_size;
*host_far_code_size = far_code_size;
buffer.CommitCode(code_size);
buffer.CommitFarCode(far_code_size);
CPU::CodeCache::CommitCode(code_size);
CPU::CodeCache::CommitFarCode(far_code_size);

return code;
}
Expand Down Expand Up @@ -2341,21 +2340,20 @@ void CPU::NewRec::BackpatchLoadStore(void* exception_pc, const CodeCache::Loadst
static_cast<TickCount>(static_cast<u32>(info.cycles)) - (info.is_load ? Bus::RAM_READ_TICKS : 0);
const TickCount cycles_to_remove = static_cast<TickCount>(static_cast<u32>(info.cycles));

JitCodeBuffer& buffer = CodeCache::GetCodeBuffer();
void* thunk_address = buffer.GetFreeFarCodePointer();
void* thunk_address = CPU::CodeCache::GetFreeFarCodePointer();
const u32 thunk_size = CompileLoadStoreThunk(
thunk_address, buffer.GetFreeFarCodeSpace(), exception_pc, info.code_size, cycles_to_add, cycles_to_remove,
thunk_address, CPU::CodeCache::GetFreeFarCodeSpace(), exception_pc, info.code_size, cycles_to_add, cycles_to_remove,
info.gpr_bitmask, info.address_register, info.data_register, info.AccessSize(), info.is_signed, info.is_load);

#if 0
Log_DebugPrint("**Backpatch Thunk**");
CodeCache::DisassembleAndLogHostCode(thunk_address, thunk_size);
CPU::CodeCache::DisassembleAndLogHostCode(thunk_address, thunk_size);
#endif

// backpatch to a jump to the slowmem handler
CodeCache::EmitJump(exception_pc, thunk_address, true);
CPU::CodeCache::EmitJump(exception_pc, thunk_address, true);

buffer.CommitFarCode(thunk_size);
CPU::CodeCache::CommitFarCode(thunk_size);
}

void CPU::NewRec::Compiler::InitSpeculativeRegs()
Expand Down
7 changes: 1 addition & 6 deletions src/core/cpu_recompiler_code_generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
#include <utility>
#include <vector>

#include "util/jit_code_buffer.h"

#include "cpu_code_cache_private.h"
#include "cpu_recompiler_register_cache.h"
#include "cpu_recompiler_thunks.h"
Expand Down Expand Up @@ -49,11 +47,10 @@ class CodeGenerator
const CodeCache::InstructionInfo* info;
};

CodeGenerator(JitCodeBuffer* code_buffer);
CodeGenerator();
~CodeGenerator();

static const char* GetHostRegName(HostReg reg, RegSize size = HostPointerSize);
static void AlignCodeBuffer(JitCodeBuffer* code_buffer);

static void BackpatchLoadStore(void* host_pc, const CodeCache::LoadstoreBackpatchInfo& lbi);

Expand Down Expand Up @@ -267,8 +264,6 @@ class CodeGenerator
bool Compile_cop0(Instruction instruction, const CodeCache::InstructionInfo& info);
bool Compile_cop2(Instruction instruction, const CodeCache::InstructionInfo& info);

JitCodeBuffer* m_code_buffer;

CodeCache::Block* m_block = nullptr;
CodeBlockInstruction m_block_start = {};
CodeBlockInstruction m_block_end = {};
Expand Down
36 changes: 15 additions & 21 deletions src/core/cpu_recompiler_code_generator_aarch32.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -358,12 +358,11 @@ static const a32::Register GetFastmemBasePtrReg()
return GetHostReg32(RMEMBASEPTR);
}

CodeGenerator::CodeGenerator(JitCodeBuffer* code_buffer)
: m_code_buffer(code_buffer), m_register_cache(*this),
m_near_emitter(static_cast<vixl::byte*>(code_buffer->GetFreeCodePointer()), code_buffer->GetFreeCodeSpace(),
a32::A32),
m_far_emitter(static_cast<vixl::byte*>(code_buffer->GetFreeFarCodePointer()), code_buffer->GetFreeFarCodeSpace(),
a32::A32),
CodeGenerator::CodeGenerator()
: m_register_cache(*this), m_near_emitter(static_cast<vixl::byte*>(CPU::CodeCache::GetFreeCodePointer()),
CPU::CodeCache::GetFreeCodeSpace(), a32::A32),
m_far_emitter(static_cast<vixl::byte*>(CPU::CodeCache::GetFreeFarCodePointer()),
CPU::CodeCache::GetFreeFarCodeSpace(), a32::A32),
m_emit(&m_near_emitter)
{
InitHostRegs();
Expand All @@ -387,11 +386,6 @@ const char* CodeGenerator::GetHostRegName(HostReg reg, RegSize size /*= HostPoin
}
}

void CodeGenerator::AlignCodeBuffer(JitCodeBuffer* code_buffer)
{
code_buffer->Align(16, 0x90);
}

void CodeGenerator::InitHostRegs()
{
// allocate nonvolatile before volatile
Expand All @@ -414,17 +408,17 @@ void CodeGenerator::SwitchToNearCode()

void* CodeGenerator::GetStartNearCodePointer() const
{
return static_cast<u8*>(m_code_buffer->GetFreeCodePointer());
return static_cast<u8*>(CPU::CodeCache::GetFreeCodePointer());
}

void* CodeGenerator::GetCurrentNearCodePointer() const
{
return static_cast<u8*>(m_code_buffer->GetFreeCodePointer()) + m_near_emitter.GetCursorOffset();
return static_cast<u8*>(CPU::CodeCache::GetFreeCodePointer()) + m_near_emitter.GetCursorOffset();
}

void* CodeGenerator::GetCurrentFarCodePointer() const
{
return static_cast<u8*>(m_code_buffer->GetFreeFarCodePointer()) + m_far_emitter.GetCursorOffset();
return static_cast<u8*>(CPU::CodeCache::GetFreeFarCodePointer()) + m_far_emitter.GetCursorOffset();
}

Value CodeGenerator::GetValueInHostRegister(const Value& value, bool allow_zero_register /* = true */)
Expand Down Expand Up @@ -517,17 +511,17 @@ const void* CodeGenerator::FinalizeBlock(u32* out_host_code_size, u32* out_host_
m_near_emitter.FinalizeCode();
m_far_emitter.FinalizeCode();

const void* code = m_code_buffer->GetFreeCodePointer();
const void* code = CPU::CodeCache::GetFreeCodePointer();
*out_host_code_size = static_cast<u32>(m_near_emitter.GetSizeOfCodeGenerated());
*out_host_far_code_size = static_cast<u32>(m_far_emitter.GetSizeOfCodeGenerated());

m_code_buffer->CommitCode(static_cast<u32>(m_near_emitter.GetSizeOfCodeGenerated()));
m_code_buffer->CommitFarCode(static_cast<u32>(m_far_emitter.GetSizeOfCodeGenerated()));
CPU::CodeCache::CommitCode(static_cast<u32>(m_near_emitter.GetSizeOfCodeGenerated()));
CPU::CodeCache::CommitFarCode(static_cast<u32>(m_far_emitter.GetSizeOfCodeGenerated()));

m_near_emitter = CodeEmitter(static_cast<vixl::byte*>(m_code_buffer->GetFreeCodePointer()),
m_code_buffer->GetFreeCodeSpace(), a32::A32);
m_far_emitter = CodeEmitter(static_cast<vixl::byte*>(m_code_buffer->GetFreeFarCodePointer()),
m_code_buffer->GetFreeFarCodeSpace(), a32::A32);
m_near_emitter = CodeEmitter(static_cast<vixl::byte*>(CPU::CodeCache::GetFreeCodePointer()),
CPU::CodeCache::GetFreeCodeSpace(), a32::A32);
m_far_emitter = CodeEmitter(static_cast<vixl::byte*>(CPU::CodeCache::GetFreeFarCodePointer()),
CPU::CodeCache::GetFreeFarCodeSpace(), a32::A32);

return code;
}
Expand Down
Loading

0 comments on commit 9b42ad3

Please sign in to comment.