Skip to content

Commit

Permalink
CPU/Recompiler/AArch32: Load membase on demand
Browse files Browse the repository at this point in the history
  • Loading branch information
stenzek committed Oct 21, 2023
1 parent cce1ec5 commit 52e0d8d
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 30 deletions.
31 changes: 16 additions & 15 deletions src/core/cpu_newrec_compiler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,6 @@ void CPU::NewRec::AArch64Compiler::EndAndLinkBlock(const std::optional<u32>& new
DebugAssert(!m_dirty_pc);

// TODO: try extracting this to a function
// TODO: move the cycle flush in here..

// save cycles for event test
const TickCount cycles = std::exchange(m_cycles, 0);
Expand Down Expand Up @@ -621,7 +620,12 @@ void CPU::NewRec::AArch64Compiler::Flush(u32 flags)
if (flags & FLUSH_INSTRUCTION_BITS)
{
// This sucks, but it's only used for fallbacks.
Panic("Not implemented");
EmitMov(RWARG1, inst->bits);
EmitMov(RWARG2, m_current_instruction_pc);
EmitMov(RWARG3, m_current_instruction_branch_delay_slot);
armAsm->str(RWARG1, PTR(&g_state.current_instruction.bits));
armAsm->str(RWARG2, PTR(&g_state.current_instruction_pc));
armAsm->strb(RWARG3, PTR(&g_state.current_instruction_in_branch_delay_slot));
}

if (flags & FLUSH_LOAD_DELAY_FROM_STATE && m_load_delay_dirty)
Expand Down Expand Up @@ -699,26 +703,23 @@ void CPU::NewRec::AArch64Compiler::Compile_Fallback()
{
Flush(FLUSH_FOR_INTERPRETER);

#if 0
cg->call(&CPU::Recompiler::Thunks::InterpretInstruction);
EmitCall(armAsm, &CPU::Recompiler::Thunks::InterpretInstruction);

// TODO: make me less garbage
// TODO: this is wrong, it flushes the load delay on the same cycle when we return.
// but nothing should be going through here..
Label no_load_delay;
cg->movzx(RWARG1, cg->byte[PTR(&g_state.next_load_delay_reg)]);
cg->cmp(RWARG1, static_cast<u8>(Reg::count));
cg->je(no_load_delay, CodeGenerator::T_SHORT);
cg->mov(RWARG2, cg->dword[PTR(&g_state.next_load_delay_value)]);
cg->mov(cg->byte[PTR(&g_state.load_delay_reg)], RWARG1);
cg->mov(cg->dword[PTR(&g_state.load_delay_value)], RWARG2);
cg->mov(cg->byte[PTR(&g_state.next_load_delay_reg)], static_cast<u32>(Reg::count));
cg->L(no_load_delay);
armAsm->ldrb(RWARG1, PTR(&g_state.next_load_delay_reg));
armAsm->cmp(RWARG1, static_cast<u8>(Reg::count));
armAsm->b(&no_load_delay, eq);
armAsm->ldr(RWARG2, PTR(&g_state.next_load_delay_value));
armAsm->strb(RWARG1, PTR(&g_state.load_delay_reg));
armAsm->str(RWARG2, PTR(&g_state.load_delay_value));
EmitMov(RWARG1, static_cast<u32>(Reg::count));
armAsm->strb(RWARG1, PTR(&g_state.next_load_delay_reg));
armAsm->bind(&no_load_delay);

m_load_delay_dirty = EMULATE_LOAD_DELAYS;
#else
Panic("Fixme");
#endif
}

void CPU::NewRec::AArch64Compiler::CheckBranchTarget(const vixl::aarch64::WRegister& pcreg)
Expand Down
2 changes: 2 additions & 0 deletions src/core/cpu_recompiler_code_generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ class CodeGenerator
const Value& address, RegSize size, const Value& value);
void EmitStoreGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info,
const Value& address, RegSize size, const Value& value, bool in_far_code);
void EnsureMembaseLoaded();
void EmitUpdateFastmemBase();

// Unconditional branch to pointer. May allocate a scratch register.
Expand Down Expand Up @@ -291,6 +292,7 @@ class CodeGenerator
bool m_load_delay_dirty = false;
bool m_next_load_delay_dirty = false;
bool m_gte_busy_cycles_dirty = false;
bool m_membase_loaded = false;

//////////////////////////////////////////////////////////////////////////
// Speculative Constants
Expand Down
32 changes: 18 additions & 14 deletions src/core/cpu_recompiler_code_generator_aarch32.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,13 +224,12 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
#undef RARG3
#undef RARG4
#undef RSCRATCH
#undef RMEMBASE
#undef RSTATE

namespace CPU::Recompiler {

constexpr HostReg RCPUPTR = 4;
constexpr HostReg RMEMBASEPTR = 5;
constexpr HostReg RMEMBASEPTR = 3;
constexpr HostReg RRETURN = 0;
constexpr HostReg RARG1 = 0;
constexpr HostReg RARG2 = 1;
Expand Down Expand Up @@ -385,24 +384,13 @@ void CodeGenerator::EmitBeginBlock(bool allocate_registers /* = true */)
// m_emit->Mov(GetCPUPtrReg(), reinterpret_cast<uintptr_t>(&g_state));
DebugAssert(cpu_reg_allocated);
UNREFERENCED_VARIABLE(cpu_reg_allocated);

// If there's loadstore instructions, preload the fastmem base.
if (m_block->HasFlag(CodeCache::BlockFlags::ContainsLoadStoreInstructions))
{
const bool fastmem_reg_allocated = m_register_cache.AllocateHostReg(RMEMBASEPTR);
Assert(fastmem_reg_allocated);
m_emit->Ldr(GetFastmemBasePtrReg(), a32::MemOperand(GetCPUPtrReg(), offsetof(State, fastmem_base)));
}
}
}

void CodeGenerator::EmitEndBlock(bool free_registers /* = true */, const void* jump_to)
{
if (free_registers)
{
if (m_block->HasFlag(CodeCache::BlockFlags::ContainsLoadStoreInstructions))
m_register_cache.FreeHostReg(RMEMBASEPTR);

m_register_cache.FreeHostReg(RCPUPTR);
m_register_cache.FreeHostReg(14);
m_register_cache.PopCalleeSavedRegisters(true);
Expand Down Expand Up @@ -1058,6 +1046,7 @@ void CodeGenerator::EmitSetConditionResult(HostReg to_reg, RegSize to_size, Cond
u32 CodeGenerator::PrepareStackForCall()
{
m_register_cache.PushCallerSavedRegisters();
m_membase_loaded = false;
return 0;
}

Expand Down Expand Up @@ -1351,13 +1340,24 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value)
}
}

void CodeGenerator::EmitUpdateFastmemBase()
void CodeGenerator::EnsureMembaseLoaded()
{
if (m_membase_loaded)
return;

m_emit->Ldr(GetFastmemBasePtrReg(), a32::MemOperand(GetCPUPtrReg(), offsetof(State, fastmem_base)));
m_membase_loaded = true;
}

void CodeGenerator::EmitUpdateFastmemBase()
{
m_membase_loaded = false;
}

void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result)
{
EnsureMembaseLoaded();

HostReg address_reg;
if (address.IsConstant())
{
Expand Down Expand Up @@ -1396,6 +1396,8 @@ void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size,
void CodeGenerator::EmitLoadGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info,
const Value& address, RegSize size, Value& result)
{
EnsureMembaseLoaded();

HostReg address_reg;
if (address.IsConstant())
{
Expand Down Expand Up @@ -1538,6 +1540,8 @@ void CodeGenerator::EmitLoadGuestMemorySlowmem(Instruction instruction, const Co
void CodeGenerator::EmitStoreGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info,
const Value& address, RegSize size, const Value& value)
{
EnsureMembaseLoaded();

Value actual_value = GetValueInHostRegister(value);

HostReg address_reg;
Expand Down
1 change: 0 additions & 1 deletion src/core/cpu_recompiler_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128;
#define RARG4 vixl::aarch32::r3
#define RSCRATCH vixl::aarch32::r12
#define RSTATE vixl::aarch32::r4
#define RMEMBASE vixl::aarch32::r5

s32 armGetPCDisplacement(const void* current, const void* target);
bool armIsPCDisplacementInImmediateRange(s32 displacement);
Expand Down

0 comments on commit 52e0d8d

Please sign in to comment.