Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multiinstance jit #2201

Merged
merged 9 commits into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 19 additions & 21 deletions src/ARMJIT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "ARMJIT_Internal.h"
#include "ARMJIT_Memory.h"
#include "ARMJIT_Compiler.h"
#include "ARMJIT_Global.h"

#include "ARMInterpreter_ALU.h"
#include "ARMInterpreter_LoadStore.h"
Expand Down Expand Up @@ -467,6 +468,16 @@ InterpreterFunc InterpretTHUMB[ARMInstrInfo::tk_Count] =
};
#undef F

ARMJIT::ARMJIT(melonDS::NDS& nds, std::optional<JITArgs> jit) noexcept :
NDS(nds),
Memory(nds),
JITCompiler(nds),
MaxBlockSize(jit.has_value() ? std::clamp(jit->MaxBlockSize, 1u, 32u) : 32),
LiteralOptimizations(jit.has_value() ? jit->LiteralOptimizations : false),
BranchOptimizations(jit.has_value() ? jit->BranchOptimizations : false),
FastMemory((jit.has_value() ? jit->FastMemory : false) && ARMJIT_Memory::IsFastMemSupported())
{}

void ARMJIT::RetireJitBlock(JitBlock* block) noexcept
{
auto it = RestoreCandidates.find(block->InstrHash);
Expand All @@ -483,6 +494,7 @@ void ARMJIT::RetireJitBlock(JitBlock* block) noexcept

void ARMJIT::SetJITArgs(JITArgs args) noexcept
{
args.FastMemory = args.FastMemory && ARMJIT_Memory::IsFastMemSupported();
args.MaxBlockSize = std::clamp(args.MaxBlockSize, 1u, 32u);

if (MaxBlockSize != args.MaxBlockSize
Expand All @@ -499,36 +511,22 @@ void ARMJIT::SetJITArgs(JITArgs args) noexcept

void ARMJIT::SetMaxBlockSize(int size) noexcept
{
size = std::clamp(size, 1, 32);

if (size != MaxBlockSize)
ResetBlockCache();

MaxBlockSize = size;
SetJITArgs(JITArgs{static_cast<unsigned>(size), LiteralOptimizations, LiteralOptimizations, FastMemory});
}

void ARMJIT::SetLiteralOptimizations(bool enabled) noexcept
{
if (LiteralOptimizations != enabled)
ResetBlockCache();

LiteralOptimizations = enabled;
SetJITArgs(JITArgs{static_cast<unsigned>(MaxBlockSize), enabled, BranchOptimizations, FastMemory});
}

void ARMJIT::SetBranchOptimizations(bool enabled) noexcept
{
if (BranchOptimizations != enabled)
ResetBlockCache();

BranchOptimizations = enabled;
SetJITArgs(JITArgs{static_cast<unsigned>(MaxBlockSize), LiteralOptimizations, enabled, FastMemory});
}

void ARMJIT::SetFastMemory(bool enabled) noexcept
{
if (FastMemory != enabled)
ResetBlockCache();

FastMemory = enabled;
SetJITArgs(JITArgs{static_cast<unsigned>(MaxBlockSize), LiteralOptimizations, BranchOptimizations, enabled});
}

void ARMJIT::CompileBlock(ARM* cpu) noexcept
Expand Down Expand Up @@ -918,7 +916,7 @@ void ARMJIT::CompileBlock(ARM* cpu) noexcept

AddressRange* region = CodeMemRegions[addressRanges[j] >> 27];

if (!PageContainsCode(&region[(addressRanges[j] & 0x7FFF000) / 512]))
if (!PageContainsCode(&region[(addressRanges[j] & 0x7FFF000 & ~(Memory.PageSize - 1)) / 512], Memory.PageSize))
Memory.SetCodeProtection(addressRanges[j] >> 27, addressRanges[j] & 0x7FFFFFF, true);

AddressRange* range = &region[(addressRanges[j] & 0x7FFFFFF) / 512];
Expand Down Expand Up @@ -971,7 +969,7 @@ void ARMJIT::InvalidateByAddr(u32 localAddr) noexcept
range->Blocks.Remove(i);

if (range->Blocks.Length == 0
&& !PageContainsCode(&region[(localAddr & 0x7FFF000) / 512]))
&& !PageContainsCode(&region[(localAddr & 0x7FFF000 & ~(Memory.PageSize - 1)) / 512], Memory.PageSize))
{
Memory.SetCodeProtection(localAddr >> 27, localAddr & 0x7FFFFFF, false);
}
Expand Down Expand Up @@ -1005,7 +1003,7 @@ void ARMJIT::InvalidateByAddr(u32 localAddr) noexcept

if (otherRange->Blocks.Length == 0)
{
if (!PageContainsCode(&otherRegion[(addr & 0x7FFF000) / 512]))
if (!PageContainsCode(&otherRegion[(addr & 0x7FFF000 & ~(Memory.PageSize - 1)) / 512], Memory.PageSize))
Memory.SetCodeProtection(addr >> 27, addr & 0x7FFFFFF, false);

otherRange->Code = 0;
Expand Down
11 changes: 2 additions & 9 deletions src/ARMJIT.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,7 @@ class JitBlock;
class ARMJIT
{
public:
ARMJIT(melonDS::NDS& nds, std::optional<JITArgs> jit) noexcept :
NDS(nds),
Memory(nds),
JITCompiler(nds),
MaxBlockSize(jit.has_value() ? std::clamp(jit->MaxBlockSize, 1u, 32u) : 32),
LiteralOptimizations(jit.has_value() ? jit->LiteralOptimizations : false),
BranchOptimizations(jit.has_value() ? jit->BranchOptimizations : false),
FastMemory(jit.has_value() ? jit->FastMemory : false)
{}
ARMJIT(melonDS::NDS& nds, std::optional<JITArgs> jit) noexcept;
~ARMJIT() noexcept;
void InvalidateByAddr(u32) noexcept;
void CheckAndInvalidateWVRAM(int) noexcept;
Expand Down Expand Up @@ -80,6 +72,7 @@ class ARMJIT
bool LiteralOptimizations = false;
bool BranchOptimizations = false;
bool FastMemory = false;

public:
melonDS::NDS& NDS;
TinyVector<u32> InvalidLiterals {};
Expand Down
49 changes: 10 additions & 39 deletions src/ARMJIT_A64/ARMJIT_Compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,7 @@
#include "../ARMInterpreter.h"
#include "../ARMJIT.h"
#include "../NDS.h"

#if defined(__SWITCH__)
#include <switch.h>

extern char __start__;
#elif defined(_WIN32)
#include <windows.h>
#else
#include <sys/mman.h>
#include <unistd.h>
#endif
#include "../ARMJIT_Global.h"

#include <stdlib.h>

Expand Down Expand Up @@ -66,11 +56,6 @@ const int RegisterCache<Compiler, ARM64Reg>::NativeRegsAvailable = 15;

const BitSet32 CallerSavedPushRegs({W8, W9, W10, W11, W12, W13, W14, W15});

const int JitMemSize = 16 * 1024 * 1024;
#ifndef __SWITCH__
u8 JitMem[JitMemSize];
#endif

void Compiler::MovePC()
{
ADD(MapReg(15), MapReg(15), Thumb ? 2 : 4);
Expand Down Expand Up @@ -260,29 +245,12 @@ Compiler::Compiler(melonDS::NDS& nds) : Arm64Gen::ARM64XEmitter(), NDS(nds)
SetCodeBase((u8*)JitRWStart, (u8*)JitRXStart);
JitMemMainSize = JitMemSize;
#else
#ifdef _WIN32
SYSTEM_INFO sysInfo;
GetSystemInfo(&sysInfo);

u64 pageSize = (u64)sysInfo.dwPageSize;
#else
u64 pageSize = sysconf(_SC_PAGE_SIZE);
#endif
u8* pageAligned = (u8*)(((u64)JitMem & ~(pageSize - 1)) + pageSize);
u64 alignedSize = (((u64)JitMem + sizeof(JitMem)) & ~(pageSize - 1)) - (u64)pageAligned;

#if defined(_WIN32)
DWORD dummy;
VirtualProtect(pageAligned, alignedSize, PAGE_EXECUTE_READWRITE, &dummy);
#elif defined(__APPLE__)
pageAligned = (u8*)mmap(NULL, 1024*1024*16, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS | MAP_JIT,-1, 0);
nds.JIT.JitEnableWrite();
#else
mprotect(pageAligned, alignedSize, PROT_EXEC | PROT_READ | PROT_WRITE);
#endif

SetCodeBase(pageAligned, pageAligned);
JitMemMainSize = alignedSize;
ARMJIT_Global::Init();

CodeMemBase = ARMJIT_Global::AllocateCodeMem();

SetCodeBase(reinterpret_cast<u8*>(CodeMemBase), reinterpret_cast<u8*>(CodeMemBase));
JitMemMainSize = ARMJIT_Global::CodeMemorySliceSize;
#endif
SetCodePtr(0);

Expand Down Expand Up @@ -493,6 +461,9 @@ Compiler::~Compiler()
free(JitRWBase);
}
#endif

ARMJIT_Global::FreeCodeMem(CodeMemBase);
ARMJIT_Global::DeInit();
}

void Compiler::LoadCycles()
Expand Down
1 change: 1 addition & 0 deletions src/ARMJIT_A64/ARMJIT_Compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ class Compiler : public Arm64Gen::ARM64XEmitter
void* JitRWStart;
void* JitRXStart;
#endif
void* CodeMemBase;

void* ReadBanked, *WriteBanked;

Expand Down
118 changes: 118 additions & 0 deletions src/ARMJIT_Global.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
#include "ARMJIT_Global.h"
#include "ARMJIT_Memory.h"

#ifdef _WIN32
#include <windows.h>
#else
#include <sys/mman.h>
#include <unistd.h>
#endif

#include <stdio.h>
#include <stdint.h>

#include <mutex>

namespace melonDS
{

namespace ARMJIT_Global
{

std::mutex globalMutex;

#ifndef __APPLE__
static constexpr size_t NumCodeMemSlices = 4;
static constexpr size_t CodeMemoryAlignedSize = NumCodeMemSlices * CodeMemorySliceSize;

// I haven't heard of pages larger than 16 KB
u8 CodeMemory[CodeMemoryAlignedSize + 16*1024];

u32 AvailableCodeMemSlices = (1 << NumCodeMemSlices) - 1;

u8* GetAlignedCodeMemoryStart()
{
return reinterpret_cast<u8*>((reinterpret_cast<intptr_t>(CodeMemory) + (16*1024-1)) & ~static_cast<intptr_t>(16*1024-1));
}
#endif

int RefCounter = 0;

void* AllocateCodeMem()
{
std::lock_guard guard(globalMutex);

#ifndef __APPLE__
if (AvailableCodeMemSlices)
{
int slice = __builtin_ctz(AvailableCodeMemSlices);
AvailableCodeMemSlices &= ~(1 << slice);
//printf("allocating slice %d\n", slice);
return &GetAlignedCodeMemoryStart()[slice * CodeMemorySliceSize];
}
#endif

// allocate
#ifdef _WIN32
return VirtualAlloc(nullptr, CodeMemorySliceSize, MEM_RESERVE|MEM_COMMIT, PAGE_EXECUTE_READWRITE);
#else
//printf("mmaping...\n");
return mmap(nullptr, CodeMemorySliceSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
#endif
}

void FreeCodeMem(void* codeMem)
{
std::lock_guard guard(globalMutex);

for (int i = 0; i < NumCodeMemSlices; i++)

Check failure on line 68 in src/ARMJIT_Global.cpp

View workflow job for this annotation

GitHub Actions / x86_64

use of undeclared identifier 'NumCodeMemSlices'

Check failure on line 68 in src/ARMJIT_Global.cpp

View workflow job for this annotation

GitHub Actions / arm64

use of undeclared identifier 'NumCodeMemSlices'
{
if (codeMem == &GetAlignedCodeMemoryStart()[CodeMemorySliceSize * i])

Check failure on line 70 in src/ARMJIT_Global.cpp

View workflow job for this annotation

GitHub Actions / x86_64

use of undeclared identifier 'GetAlignedCodeMemoryStart'

Check failure on line 70 in src/ARMJIT_Global.cpp

View workflow job for this annotation

GitHub Actions / arm64

use of undeclared identifier 'GetAlignedCodeMemoryStart'
{
//printf("freeing slice\n");
AvailableCodeMemSlices |= 1 << i;

Check failure on line 73 in src/ARMJIT_Global.cpp

View workflow job for this annotation

GitHub Actions / x86_64

use of undeclared identifier 'AvailableCodeMemSlices'

Check failure on line 73 in src/ARMJIT_Global.cpp

View workflow job for this annotation

GitHub Actions / arm64

use of undeclared identifier 'AvailableCodeMemSlices'
return;
}
}

#ifdef _WIN32
VirtualFree(codeMem, CodeMemorySliceSize, MEM_RELEASE|MEM_DECOMMIT);
#else
munmap(codeMem, CodeMemorySliceSize);
#endif
}

void Init()
{
std::lock_guard guard(globalMutex);

RefCounter++;
if (RefCounter == 1)
{
#ifdef _WIN32
DWORD dummy;
VirtualProtect(GetAlignedCodeMemoryStart(), CodeMemoryAlignedSize, PAGE_EXECUTE_READWRITE, &dummy);
#elif defined(__APPLE__)
// Apple always uses dynamic allocation
#else
mprotect(GetAlignedCodeMemoryStart(), CodeMemoryAlignedSize, PROT_EXEC | PROT_READ | PROT_WRITE);
#endif

ARMJIT_Memory::RegisterFaultHandler();
}
}

void DeInit()
{
std::lock_guard guard(globalMutex);

RefCounter--;
if (RefCounter == 0)
{
ARMJIT_Memory::UnregisterFaultHandler();
}
}

}

}
44 changes: 44 additions & 0 deletions src/ARMJIT_Global.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
Copyright 2016-2024 melonDS team

This file is part of melonDS.

melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.

melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/

#ifndef ARMJIT_GLOBAL_H
#define ARMJIT_GLOBAL_H

#include "types.h"

#include <stdlib.h>

namespace melonDS
{

namespace ARMJIT_Global
{

static constexpr size_t CodeMemorySliceSize = 1024*1024*32;

void Init();
void DeInit();

void* AllocateCodeMem();
void FreeCodeMem(void* codeMem);

}

}

#endif
4 changes: 2 additions & 2 deletions src/ARMJIT_Internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,9 @@ typedef void (*InterpreterFunc)(ARM* cpu);
extern InterpreterFunc InterpretARM[];
extern InterpreterFunc InterpretTHUMB[];

inline bool PageContainsCode(const AddressRange* range)
inline bool PageContainsCode(const AddressRange* range, u32 pageSize)
{
for (int i = 0; i < 8; i++)
for (int i = 0; i < pageSize / 512; i++)
{
if (range[i].Blocks.Length > 0)
return true;
Expand Down
Loading
Loading