Skip to content

Commit

Permalink
Merge pull request #3984 from Sonicadvance1/atomic_tso_subchecks
Browse files Browse the repository at this point in the history
FEXCore: Splits up atomic enablement checks
  • Loading branch information
Sonicadvance1 authored Aug 20, 2024
2 parents 92c951c + 9c8438f commit 689b461
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 28 deletions.
37 changes: 21 additions & 16 deletions FEXCore/Source/Interface/Context/Context.h
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,8 @@ class ContextImpl final : public FEXCore::Context::Context {
FEX_CONFIG_OPT(Is64BitMode, IS64BIT_MODE);
FEX_CONFIG_OPT(TSOEnabled, TSOENABLED);
FEX_CONFIG_OPT(TSOAutoMigration, TSOAUTOMIGRATION);
FEX_CONFIG_OPT(VectorTSOEnabled, VECTORTSOENABLED);
FEX_CONFIG_OPT(MemcpySetTSOEnabled, MEMCPYSETTSOENABLED);
FEX_CONFIG_OPT(ABILocalFlags, ABILOCALFLAGS);
FEX_CONFIG_OPT(AOTIRCapture, AOTIRCAPTURE);
FEX_CONFIG_OPT(AOTIRGenerate, AOTIRGENERATE);
Expand Down Expand Up @@ -344,25 +346,19 @@ class ContextImpl final : public FEXCore::Context::Context {
return AtomicTSOEmulationEnabled;
}

void SetHardwareTSOSupport(bool HardwareTSOSupported) override {
SupportsHardwareTSO = HardwareTSOSupported;
UpdateAtomicTSOEmulationConfig();
// If atomic-based TSO emulation is enabled for vector operations.
bool IsVectorAtomicTSOEnabled() const {
return VectorAtomicTSOEmulationEnabled;
}

// Returns if Software TSO emulation is required.
// NOTE: This doesn't necessary return if Atomic-based TSO is currently enabled.
// This will still return true if on a single thread and TSO is currently disabled.
//
// This is to ensure that if early initialization checks CPU features and TSO /could/ be enabled, that
// we return consistent results.
//
// To check if Atomic TSO is currently enabled in the JIT, use `IsAtomicTSOEnabled` instead.
bool SoftwareTSORequired() const {
if (SupportsHardwareTSO) {
return false;
}
// If atomic-based TSO emulation is enabled for memcpy operations.
bool IsMemcpyAtomicTSOEnabled() const {
return MemcpyAtomicTSOEmulationEnabled;
}

return Config.TSOEnabled;
void SetHardwareTSOSupport(bool HardwareTSOSupported) override {
SupportsHardwareTSO = HardwareTSOSupported;
UpdateAtomicTSOEmulationConfig();
}

void EnableExitOnHLT() override {
Expand All @@ -378,9 +374,15 @@ class ContextImpl final : public FEXCore::Context::Context {
if (SupportsHardwareTSO) {
// If the hardware supports TSO then we don't need to emulate it through atomics.
AtomicTSOEmulationEnabled = false;
VectorAtomicTSOEmulationEnabled = false;
MemcpyAtomicTSOEmulationEnabled = false;
} else {
// Atomic TSO emulation only enabled if the config option is enabled.
AtomicTSOEmulationEnabled = (IsMemoryShared || !Config.TSOAutoMigration) && Config.TSOEnabled;
// Atomic vector TSO emulation only enabled if TSO emulation is enabled and also vector TSO is enabled.
VectorAtomicTSOEmulationEnabled = (IsMemoryShared || !Config.TSOAutoMigration) && Config.TSOEnabled && Config.VectorTSOEnabled;
// Atomic memcpy TSO emulation only enabled if TSO emulation is enabled and also memcpy TSO is enabled.
MemcpyAtomicTSOEmulationEnabled = (IsMemoryShared || !Config.TSOAutoMigration) && Config.TSOEnabled && Config.MemcpySetTSOEnabled;
}
}

Expand All @@ -403,6 +405,9 @@ class ContextImpl final : public FEXCore::Context::Context {
bool IsMemoryShared = false;
bool SupportsHardwareTSO = false;
bool AtomicTSOEmulationEnabled = true;
bool VectorAtomicTSOEmulationEnabled = false;
bool MemcpyAtomicTSOEmulationEnabled = false;

bool ExitOnHLT = false;
FEX_CONFIG_OPT(AppFilename, APP_FILENAME);

Expand Down
4 changes: 2 additions & 2 deletions FEXCore/Source/Interface/Core/CPUID.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -608,8 +608,8 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_07h(uint32_t Leaf) const {
// Disable Enhanced REP MOVS when TSO is enabled.
// vcruntime140 memmove will use `rep movsb` in this case which completely destroys perf in Hades(appId 1145360)
// This is due to LRCPC performance on Cortex being abysmal.
// Only enable EnhancedREPMOVS if SoftwareTSO isn't required OR if MemcpySetTSO is not enabled.
const uint32_t SupportsEnhancedREPMOVS = CTX->SoftwareTSORequired() == false || MemcpySetTSOEnabled() == false;
// Only enable EnhancedREPMOVS if atomic memcpy tso emulation isn't enabled.
const uint32_t SupportsEnhancedREPMOVS = CTX->IsMemcpyAtomicTSOEnabled() == false;
const uint32_t SupportsVPCLMULQDQ = CTX->HostFeatures.SupportsPMULL_128Bit && SupportsAVX();

// Number of subfunctions
Expand Down
1 change: 0 additions & 1 deletion FEXCore/Source/Interface/Core/CPUID.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,6 @@ class CPUIDEmu final {
uint32_t Cores {};
FEX_CONFIG_OPT(HideHypervisorBit, HIDEHYPERVISORBIT);
FEX_CONFIG_OPT(SmallTSCScale, SMALLTSCSCALE);
FEX_CONFIG_OPT(MemcpySetTSOEnabled, MEMCPYSETTSOENABLED);

// XFEATURE_ENABLED_MASK
// Mask that configures what features are enabled on the CPU.
Expand Down
2 changes: 0 additions & 2 deletions FEXCore/Source/Interface/Core/JIT/Arm64/JITClass.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,6 @@ class Arm64JITCore final : public CPUBackend, public Arm64Emitter {

private:
FEX_CONFIG_OPT(ParanoidTSO, PARANOIDTSO);
FEX_CONFIG_OPT(VectorTSOEnabled, VECTORTSOENABLED);
FEX_CONFIG_OPT(MemcpySetTSOEnabled, MEMCPYSETTSOENABLED);

const bool HostSupportsSVE128 {};
const bool HostSupportsSVE256 {};
Expand Down
14 changes: 7 additions & 7 deletions FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -676,7 +676,7 @@ DEF_OP(LoadMemTSO) {
}
default: LOGMAN_MSG_A_FMT("Unhandled LoadMemTSO size: {}", OpSize); break;
}
if (VectorTSOEnabled()) {
if (CTX->IsVectorAtomicTSOEnabled()) {
// Half-barrier.
dmb(ARMEmitter::BarrierScope::ISHLD);
}
Expand Down Expand Up @@ -1191,7 +1191,7 @@ DEF_OP(VLoadVectorElement) {
}

// Emit a half-barrier if TSO is enabled.
if (CTX->IsAtomicTSOEnabled() && VectorTSOEnabled()) {
if (CTX->IsVectorAtomicTSOEnabled()) {
dmb(ARMEmitter::BarrierScope::ISHLD);
}
}
Expand All @@ -1210,7 +1210,7 @@ DEF_OP(VStoreVectorElement) {
"size");

// Emit a half-barrier if TSO is enabled.
if (CTX->IsAtomicTSOEnabled() && VectorTSOEnabled()) {
if (CTX->IsVectorAtomicTSOEnabled()) {
dmb(ARMEmitter::BarrierScope::ISH);
}

Expand Down Expand Up @@ -1272,7 +1272,7 @@ DEF_OP(VBroadcastFromMem) {
}

// Emit a half-barrier if TSO is enabled.
if (CTX->IsAtomicTSOEnabled() && VectorTSOEnabled()) {
if (CTX->IsVectorAtomicTSOEnabled()) {
dmb(ARMEmitter::BarrierScope::ISHLD);
}
}
Expand Down Expand Up @@ -1492,7 +1492,7 @@ DEF_OP(StoreMemTSO) {
}
}
} else {
if (VectorTSOEnabled()) {
if (CTX->IsVectorAtomicTSOEnabled()) {
// Half-Barrier.
dmb(ARMEmitter::BarrierScope::ISH);
}
Expand Down Expand Up @@ -1524,7 +1524,7 @@ DEF_OP(MemSet) {
// that the value is zero, we can optimize any operation larger than 8-bit down to 8-bit to use the MOPS implementation.
const auto Op = IROp->C<IR::IROp_MemSet>();

const bool IsAtomic = Op->IsAtomic && MemcpySetTSOEnabled();
const bool IsAtomic = CTX->IsMemcpyAtomicTSOEnabled();
const int32_t Size = Op->Size;
const auto MemReg = GetReg(Op->Addr.ID());
const auto Value = GetReg(Op->Value.ID());
Expand Down Expand Up @@ -1714,7 +1714,7 @@ DEF_OP(MemCpy) {
// Assuming non-atomicity and non-faulting behaviour, this can accelerate this implementation.
const auto Op = IROp->C<IR::IROp_MemCpy>();

const bool IsAtomic = Op->IsAtomic && MemcpySetTSOEnabled();
const bool IsAtomic = CTX->IsMemcpyAtomicTSOEnabled();
const int32_t Size = Op->Size;
const auto MemRegDest = GetReg(Op->Dest.ID());
const auto MemRegSrc = GetReg(Op->Src.ID());
Expand Down

0 comments on commit 689b461

Please sign in to comment.