Skip to content

Commit

Permalink
Merge pull request #4172 from alyssarosenzweig/jit/cf
Browse files Browse the repository at this point in the history
OpcodeDispatcher: drop PossiblySetNZCV
  • Loading branch information
Sonicadvance1 authored Nov 26, 2024
2 parents 0463512 + b6f34fa commit f41b9bc
Show file tree
Hide file tree
Showing 20 changed files with 1,049 additions and 1,025 deletions.
31 changes: 5 additions & 26 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,6 @@ void OpDispatchBuilder::RETOp(OpcodeArgs) {
if (CTX->Config.ABILocalFlags) {
_InvalidateFlags(~0UL); // all flags
InvalidatePF_AF();
// Deferred flags are invalidated now
InvalidateDeferredFlags();
}

Ref SP = _RMWHandle(LoadGPRRegister(X86State::REG_RSP));
Expand Down Expand Up @@ -557,8 +555,6 @@ void OpDispatchBuilder::CALLOp(OpcodeArgs) {
if (CTX->Config.ABILocalFlags) {
_InvalidateFlags(~0UL); // all flags
InvalidatePF_AF();
// Deferred flags are invalidated now
InvalidateDeferredFlags();
}

auto ConstantPC = GetRelocatedPC(Op);
Expand Down Expand Up @@ -2029,8 +2025,6 @@ void OpDispatchBuilder::RCROp(OpcodeArgs) {
Ref Res = _Lshr(OpSize, Dest, Src);
auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);

InvalidateDeferredFlags();

// Constant folded version of the above, with fused shifts.
if (Const > 1) {
Res = _Orlshl(OpSize, Res, Dest, Size + 1 - Const);
Expand Down Expand Up @@ -2254,8 +2248,6 @@ void OpDispatchBuilder::RCLOp(OpcodeArgs) {
Ref Res = _Lshl(OpSize, Dest, Src);
auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);

InvalidateDeferredFlags();

// Res |= (Src << (Size - Shift + 1));
if (Const > 1) {
Res = _Orlshr(OpSize, Res, Dest, Size + 1 - Const);
Expand Down Expand Up @@ -2384,9 +2376,6 @@ void OpDispatchBuilder::BTOp(OpcodeArgs, uint32_t SrcIndex, BTAction Action) {
const uint32_t Size = GetDstBitSize(Op);
const uint32_t Mask = Size - 1;

// Deferred flags are invalidated now
InvalidateDeferredFlags();

if (IsNonconstant) {
// Because we mask explicitly with And/Bfe/Sbfe after, we can allow garbage here.
Src = LoadSource(GPRClass, Op, Op->Src[SrcIndex], Op->Flags, {.AllowUpperGarbage = true});
Expand Down Expand Up @@ -2416,7 +2405,7 @@ void OpDispatchBuilder::BTOp(OpcodeArgs, uint32_t SrcIndex, BTAction Action) {
Value = _Lshr(IR::SizeToOpSize(LshrSize), Value, BitSelect);
}

SetCFDirect(Value, ConstantShift, true);
SetCFDirect_InvalidateNZV(Value, ConstantShift, Value);
}

switch (Action) {
Expand Down Expand Up @@ -2449,7 +2438,7 @@ void OpDispatchBuilder::BTOp(OpcodeArgs, uint32_t SrcIndex, BTAction Action) {
Value = Dest;
}

SetCFInverted(Value, ConstantShift, true);
SetCFInverted_InvalidateNZV(Value, ConstantShift, true);
StoreResult(GPRClass, Op, Dest, OpSize::iInvalid);
break;
}
Expand Down Expand Up @@ -2855,14 +2844,11 @@ void OpDispatchBuilder::DASOp(OpcodeArgs) {
}

void OpDispatchBuilder::AAAOp(OpcodeArgs) {
InvalidateDeferredFlags();

auto A = LoadGPRRegister(X86State::REG_RAX);
auto AF = CalculateAFForDecimal(A);

// CF = AF, OF/SF/ZF/PF undefined
ZeroNZCV();
SetCFDirect(AF);
SetCFDirect_InvalidateNZV(AF);
SetAFAndFixup(AF);
CalculateDeferredFlags();

Expand All @@ -2875,14 +2861,11 @@ void OpDispatchBuilder::AAAOp(OpcodeArgs) {
}

void OpDispatchBuilder::AASOp(OpcodeArgs) {
InvalidateDeferredFlags();

auto A = LoadGPRRegister(X86State::REG_RAX);
auto AF = CalculateAFForDecimal(A);

// CF = AF, OF/SF/ZF/PF undefined
ZeroNZCV();
SetCFDirect(AF);
SetCFDirect_InvalidateNZV(AF);
SetAFAndFixup(AF);
CalculateDeferredFlags();

Expand All @@ -2895,8 +2878,6 @@ void OpDispatchBuilder::AASOp(OpcodeArgs) {
}

void OpDispatchBuilder::AAMOp(OpcodeArgs) {
InvalidateDeferredFlags();

auto AL = LoadGPRRegister(X86State::REG_RAX, OpSize::i8Bit);
auto Imm8 = _Constant(Op->Src[0].Data.Literal.Value & 0xFF);
auto UDivOp = _UDiv(OpSize::i64Bit, AL, Imm8);
Expand All @@ -2910,8 +2891,6 @@ void OpDispatchBuilder::AAMOp(OpcodeArgs) {
}

void OpDispatchBuilder::AADOp(OpcodeArgs) {
InvalidateDeferredFlags();

auto A = LoadGPRRegister(X86State::REG_RAX);
auto AH = _Lshr(OpSize::i32Bit, A, _Constant(8));
auto Imm8 = _Constant(Op->Src[0].Data.Literal.Value & 0xFF);
Expand Down Expand Up @@ -4876,7 +4855,7 @@ void OpDispatchBuilder::RDRANDOp(OpcodeArgs) {
SetCFInverted(CF_inv);
} else {
// Accelerated path. Invalid is 0 or 1, so set NZCV with a single rmif.
HandleNZCVWrite(1u << 29 /* C */);
HandleNZCVWrite();
_RmifNZCV(CF_inv, (64 - 1) /* rotate bit 0 into bit 1 = C */, 0xf);
CFInverted = true;
}
Expand Down
93 changes: 47 additions & 46 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ class OpDispatchBuilder final : public IREmitter {
// If we loaded flags but didn't change them, invalidate the cached copy and move on.
// Changes get stored out by CalculateDeferredFlags.
CachedNZCV = nullptr;
PossiblySetNZCVBits = ~0U;
CFInverted = CFInvertedABI;
FlushRegisterCache();

Expand Down Expand Up @@ -1329,7 +1328,6 @@ class OpDispatchBuilder final : public IREmitter {

Ref CachedNZCV {};
bool NZCVDirty {};
uint32_t PossiblySetNZCVBits {};

// Set if the host carry is inverted from the guest carry. This is set after
// subtraction, because arm64 and x86 have inverted borrow flags, but clear
Expand Down Expand Up @@ -1579,31 +1577,26 @@ class OpDispatchBuilder final : public IREmitter {
return IR::SizeToOpSize(GetSrcSize(Op));
}

// Set flag tracking to prepare for an operation that directly writes NZCV. If
// some bits are known to be zeroed, the PossiblySetNZCVBits mask can be
// passed. Otherwise, it defaults to assuming all bits may be set after
// (this is conservative).
void HandleNZCVWrite(uint32_t _PossiblySetNZCVBits = ~0) {
InvalidateDeferredFlags();
// Set flag tracking to prepare for an operation that directly writes NZCV.
void HandleNZCVWrite() {
CachedNZCV = nullptr;
PossiblySetNZCVBits = _PossiblySetNZCVBits;
NZCVDirty = false;
}

// Set flag tracking to prepare for a read-modify-write operation on NZCV.
void HandleNZCV_RMW(uint32_t _PossiblySetNZCVBits = ~0) {
void HandleNZCV_RMW() {
CalculateDeferredFlags();

if (NZCVDirty && CachedNZCV) {
_StoreNZCV(CachedNZCV);
}

HandleNZCVWrite(_PossiblySetNZCVBits);
HandleNZCVWrite();
}

// Special case of the above where we are known to zero C/V
void HandleNZ00Write() {
HandleNZCVWrite((1u << 31) | (1u << 30));
HandleNZCVWrite();

// Host carry will be implicitly zeroed, and we want guest carry zeroed as
// well. So do not invert.
Expand All @@ -1625,7 +1618,6 @@ class OpDispatchBuilder final : public IREmitter {

void ZeroNZCV() {
CachedNZCV = _Constant(0);
PossiblySetNZCVBits = 0;
NZCVDirty = true;
}

Expand All @@ -1643,7 +1635,6 @@ class OpDispatchBuilder final : public IREmitter {
_SubNZCV(SrcSize, Res, _Constant(0));
}

PossiblySetNZCVBits |= 1u << IndexNZCV(FEXCore::X86State::RFLAG_CF_RAW_LOC);
CFInverted = true;
} else {
_TestNZ(SrcSize, Res, Res);
Expand All @@ -1662,13 +1653,8 @@ class OpDispatchBuilder final : public IREmitter {
void InsertNZCV(unsigned BitOffset, Ref Value, signed FlagOffset, bool MustMask) {
signed Bit = IndexNZCV(BitOffset);

// If NZCV is not dirty, we always want to use rmif, it's 1 instruction to
// implement this. But if NZCV is dirty, it might still be cheaper to copy
// the GPR flags to NZCV and rmif. This is a heuristic for cases where we
// expect that 2 instruction sequence to be a win (versus something like
// bfe+mov+bfi+mov which can happen with our RA..). It's not totally
// conservative but it's pretty good in practice.
bool PreferRmif = !NZCVDirty || FlagOffset || MustMask || (PossiblySetNZCVBits & (1u << Bit));
// Heuristic to choose rmif vs msr.
bool PreferRmif = !NZCVDirty || FlagOffset || MustMask;

if (CTX->HostFeatures.SupportsFlagM && PreferRmif) {
// Update NZCV
Expand All @@ -1689,16 +1675,8 @@ class OpDispatchBuilder final : public IREmitter {
Value = _Bfe(OpSize::i64Bit, 1, FlagOffset, Value);
}

if (PossiblySetNZCVBits == 0) {
SetNZCV(_Lshl(OpSize::i64Bit, Value, _Constant(Bit)));
} else if ((PossiblySetNZCVBits & (1u << Bit)) == 0) {
SetNZCV(_Orlshl(OpSize::i32Bit, GetNZCV(), Value, Bit));
} else {
SetNZCV(_Bfi(OpSize::i32Bit, 1, Bit, GetNZCV(), Value));
}
SetNZCV(_Bfi(OpSize::i32Bit, 1, Bit, GetNZCV(), Value));
}

PossiblySetNZCVBits |= (1u << Bit);
}

// If we don't care about N/C/V and just need Z, we can test with a simple
Expand Down Expand Up @@ -1737,7 +1715,6 @@ class OpDispatchBuilder final : public IREmitter {

void CarryInvert() {
CFInverted ^= true;
PossiblySetNZCVBits |= 1u << IndexNZCV(FEXCore::X86State::RFLAG_CF_RAW_LOC);
}

template<unsigned BitOffset>
Expand All @@ -1751,6 +1728,44 @@ class OpDispatchBuilder final : public IREmitter {
CFInverted = true;
}

// Set CF directly to the given 0/1 value. This needs to respect the
// invert. We use a subtraction:
//
// 0 - x = 0 + (~x) + 1.
//
// If x = 0, then 0 + (~0) + 1 = 0x100000000 so hardware C is set.
// If x = 1, then 0 + (~1) + 1 = 0x0ffffffff so hardware C is not set.
void SetCFDirect_InvalidateNZV(Ref Value, unsigned ValueOffset = 0, bool MustMask = false) {
if (ValueOffset || MustMask) {
Value = _Bfe(OpSize::i64Bit, 1, ValueOffset, Value);
}

HandleNZCVWrite();
_SubNZCV(OpSize::i32Bit, _Constant(0), Value);
CFInverted = true;
}

// As above but with
//
// x - 1
//
// If x = 0, hardware C is not set. If x = 1, hardware C is set.
void SetCFInverted_InvalidateNZV(Ref Value, unsigned ValueOffset = 0, bool MustMask = false) {
if (CTX->HostFeatures.SupportsFlagM) {
// This turns into a single rmif
SetCFInverted(Value, ValueOffset, MustMask);
} else {
// Do math on flagm
if (ValueOffset || MustMask) {
Value = _Bfe(OpSize::i64Bit, 1, ValueOffset, Value);
}

HandleNZCVWrite();
_SubNZCV(OpSize::i32Bit, Value, _InlineConstant(1));
CFInverted = true;
}
}

void SetCFInverted(Ref Value, unsigned ValueOffset = 0, bool MustMask = false) {
SetRFLAG(Value, X86State::RFLAG_CF_RAW_LOC, ValueOffset, MustMask);
CFInverted = true;
Expand Down Expand Up @@ -2010,9 +2025,7 @@ class OpDispatchBuilder final : public IREmitter {
Invert ^= CFInverted;
}

if (!(PossiblySetNZCVBits & (1u << IndexNZCV(BitOffset)))) {
return _Constant(Invert ? 1 : 0);
} else if (NZCVDirty) {
if (NZCVDirty) {
auto Value = _Bfe(OpSize::i32Bit, 1, IndexNZCV(BitOffset), GetNZCV());

if (Invert) {
Expand Down Expand Up @@ -2119,7 +2132,6 @@ class OpDispatchBuilder final : public IREmitter {
//
// Our AXFlag emulation on FlagM2-less systems needs V_inv passed.
_AXFlag(CTX->HostFeatures.SupportsFlagM2 ? Invalid() : V_inv);
PossiblySetNZCVBits = ~0;
CFInverted = true;
}

Expand All @@ -2133,7 +2145,6 @@ class OpDispatchBuilder final : public IREmitter {

// Convert to x86 flags, saves us from or'ing after.
_AXFlag(Invalid());
PossiblySetNZCVBits = ~0;
CFInverted = true;

// Copy the values.
Expand Down Expand Up @@ -2241,14 +2252,6 @@ class OpDispatchBuilder final : public IREmitter {
*/
void CalculateDeferredFlags();

/**
* @brief Invalidates NZCV. Mostly vestigial.
*/
void InvalidateDeferredFlags() {
// No NZCV bits will be set, they are all invalid.
PossiblySetNZCVBits = 0;
}

void ZeroShiftResult(FEXCore::X86Tables::DecodedOp Op) {
// In the case of zero-rotate, we need to store the destination still to deal with 32-bit semantics.
const auto Size = OpSizeFromSrc(Op);
Expand Down Expand Up @@ -2277,7 +2280,6 @@ class OpDispatchBuilder final : public IREmitter {
}

// Otherwise, prepare to branch.
uint32_t OldSetNZCVBits = PossiblySetNZCVBits;
auto Zero = _Constant(0);

// If the shift is zero, do not touch the flags.
Expand Down Expand Up @@ -2312,7 +2314,6 @@ class OpDispatchBuilder final : public IREmitter {

SetCurrentCodeBlock(EndBlock);
StartNewBlock();
PossiblySetNZCVBits |= OldSetNZCVBits;
}

/**
Expand Down
5 changes: 0 additions & 5 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2226,8 +2226,6 @@ void OpDispatchBuilder::AVX128_VPERM2(OpcodeArgs) {

template<IR::OpSize ElementSize>
void OpDispatchBuilder::AVX128_VTESTP(OpcodeArgs) {
InvalidateDeferredFlags();

const auto Size = GetSrcSize(Op);
const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE;

Expand Down Expand Up @@ -2296,9 +2294,6 @@ void OpDispatchBuilder::AVX128_VTESTP(OpcodeArgs) {
}

void OpDispatchBuilder::AVX128_PTest(OpcodeArgs) {
// Invalidate deferred flags early
InvalidateDeferredFlags();

const auto Size = GetSrcSize(Op);
const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE;

Expand Down
4 changes: 0 additions & 4 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,9 @@ void OpDispatchBuilder::SetPackedRFLAG(bool Lower8, Ref Src) {
size_t NumFlags = FlagOffsets.size();
if (Lower8) {
// Calculate flags early.
// Could use InvalidateDeferredFlags() if we had masked invalidation.
// This is only a partial overwrite of flags since OF isn't stored here.
CalculateDeferredFlags();
NumFlags = 5;
} else {
// We are overwriting all RFLAGS. Invalidate the deferred flag state.
InvalidateDeferredFlags();
}

// PF and CF are both stored inverted, so hoist the invert.
Expand Down
Loading

0 comments on commit f41b9bc

Please sign in to comment.