From 9b81a8389405e52e6bc48717ecd8bc9527acbfde Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 7 Nov 2023 10:02:28 -0400 Subject: [PATCH 1/2] OpcodeDispatcher: Add DeriveOp helper The "create op with wrong opcode, then change the opcode" pattern is REALLY dangerous. This does not address that. But when we start doing NZCV trickery, it will get /more/ dangerous, and so it's time to add a helper and make the convenient thing the safe(r) thing. This helper correctly saves NZCV /before/ the instruction like the real builders would. It also provides a spot for future safety asserts if someone is motivated. Signed-off-by: Alyssa Rosenzweig --- FEXCore/Source/Interface/Core/OpcodeDispatcher.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index 244e28016b..c83e81af14 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -1329,6 +1329,16 @@ friend class FEXCore::IR::PassManager; } } + // Helper to derive Dest by a given builder-using Expression with the opcode + // replaced with NewOp. Useful for generic building code. Not safe in general. + // but does the right handling of ImplicitFlagClobber at least and must be + // used instead of raw Op mutation. +#define DeriveOp(Dest, NewOp, Expr) \ + if (ImplicitFlagClobber(NewOp)) \ + SaveNZCV(); \ + auto Dest = (Expr); \ + Dest.first->Header.Op = (NewOp) + // Named constant cache for the current block. // Different arrays for sizes 1,2,4,8,16,32. OrderedNode *CachedNamedVectorConstants[FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_MAX][6]{}; From 73958b9163b0c8f3ad7244030f535a6139195a24 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 7 Nov 2023 10:11:25 -0400 Subject: [PATCH 2/2] OpcodeDispatcher: Use DeriveOp Replace every instance of the Op overwrite pattern, and ban that anti-pattern from the codebase in the future. This will prevent piles of NZCV related regressions. Signed-off-by: Alyssa Rosenzweig --- .../Interface/Core/OpcodeDispatcher.cpp | 18 ++------ .../Core/OpcodeDispatcher/Vector.cpp | 41 +++++-------------- .../Interface/Core/OpcodeDispatcher/X87.cpp | 8 +--- .../Core/OpcodeDispatcher/X87F64.cpp | 8 +--- 4 files changed, 19 insertions(+), 56 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index a107cece22..90f20e0f64 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -404,9 +404,7 @@ void OpDispatchBuilder::SecondaryALUOp(OpcodeArgs) { } else { Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = AllowUpperGarbage || Size >= 4}); - auto ALUOp = _Add(IR::SizeToOpSize(std::max(4u, Size)), Dest, Src); - // Overwrite our IR's op type - ALUOp.first->Header.Op = IROp; + DeriveOp(ALUOp, IROp, _Add(IR::SizeToOpSize(std::max(4u, Size)), Dest, Src)); Result = ALUOp; @@ -5398,15 +5396,10 @@ void OpDispatchBuilder::ALUOpImpl(OpcodeArgs, FEXCore::IR::IROps ALUIROp, FEXCor OrderedNode *DestMem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); DestMem = AppendSegmentOffset(DestMem, Op->Flags); - auto FetchOp = _AtomicFetchAdd(IR::SizeToOpSize(Size), Src, DestMem); - // Overwrite our atomic op type - FetchOp.first->Header.Op = AtomicFetchOp; + DeriveOp(FetchOp, AtomicFetchOp, _AtomicFetchAdd(IR::SizeToOpSize(Size), Src, DestMem)); Dest = FetchOp; - auto ALUOp = _Add(OpSize, Dest, Src); - // Overwrite our IR's op type - ALUOp.first->Header.Op = ALUIROp; - + DeriveOp(ALUOp, ALUIROp, _Add(OpSize, Dest, Src)); Result = ALUOp; } else { @@ -5425,10 +5418,7 @@ void OpDispatchBuilder::ALUOpImpl(OpcodeArgs, FEXCore::IR::IROps ALUIROp, FEXCor Result = _Constant(0); } else { - auto ALUOp = _Add(OpSize, Dest, Src); - // Overwrite our IR's op type - ALUOp.first->Header.Op = ALUIROp; - + DeriveOp(ALUOp, ALUIROp, _Add(OpSize, Dest, Src)); Result = ALUOp; } diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp index bccf10fd31..82feacf718 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp @@ -225,9 +225,7 @@ void OpDispatchBuilder::VectorALUOpImpl(OpcodeArgs, IROps IROp, size_t ElementSi OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - auto ALUOp = _VAdd(Size, ElementSize, Dest, Src); - // Overwrite our IR's op type - ALUOp.first->Header.Op = IROp; + DeriveOp(ALUOp, IROp, _VAdd(Size, ElementSize, Dest, Src)); StoreResult(FPRClass, Op, ALUOp, -1); } @@ -371,9 +369,7 @@ void OpDispatchBuilder::AVXVectorALUOpImpl(OpcodeArgs, IROps IROp, size_t Elemen OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); - auto ALUOp = _VAdd(Size, ElementSize, Src1, Src2); - // Overwrite our IR's op type - ALUOp.first->Header.Op = IROp; + DeriveOp(ALUOp, IROp, _VAdd(Size, ElementSize, Src1, Src2)); StoreResult(FPRClass, Op, ALUOp, -1); } @@ -506,9 +502,7 @@ void OpDispatchBuilder::VectorALUROpImpl(OpcodeArgs, IROps IROp, size_t ElementS OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - auto ALUOp = _VAdd(Size, ElementSize, Src, Dest); - // Overwrite our IR's op type - ALUOp.first->Header.Op = IROp; + DeriveOp(ALUOp, IROp, _VAdd(Size, ElementSize, Src, Dest)); StoreResult(FPRClass, Op, ALUOp, -1); } @@ -540,10 +534,8 @@ OrderedNode* OpDispatchBuilder::VectorScalarInsertALUOpImpl(OpcodeArgs, IROps IR {.AllowUpperGarbage = true}); // If OpSize == ElementSize then it only does the lower scalar op - auto ALUOp = _VFAddScalarInsert(IR::SizeToOpSize(DstSize), ElementSize, Src1, Src2, ZeroUpperBits); - // Overwrite our IR's op type - ALUOp.first->Header.Op = IROp; - + DeriveOp(ALUOp, IROp, + _VFAddScalarInsert(IR::SizeToOpSize(DstSize), ElementSize, Src1, Src2, ZeroUpperBits)); return ALUOp; } @@ -626,10 +618,7 @@ OrderedNode* OpDispatchBuilder::VectorScalarUnaryInsertALUOpImpl(OpcodeArgs, IRO {.AllowUpperGarbage = true}); // If OpSize == ElementSize then it only does the lower scalar op - auto ALUOp = _VFSqrtScalarInsert(IR::SizeToOpSize(DstSize), ElementSize, Src1, Src2, ZeroUpperBits); - // Overwrite our IR's op type - ALUOp.first->Header.Op = IROp; - + DeriveOp(ALUOp, IROp, _VFSqrtScalarInsert(IR::SizeToOpSize(DstSize), ElementSize, Src1, Src2, ZeroUpperBits)); return ALUOp; } @@ -940,9 +929,7 @@ void OpDispatchBuilder::VectorUnaryOpImpl(OpcodeArgs, IROps IROp, size_t Element OrderedNode *Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); - auto ALUOp = _VFSqrt(OpSize, ElementSize, Src); - // Overwrite our IR's op type - ALUOp.first->Header.Op = IROp; + DeriveOp(ALUOp, IROp, _VFSqrt(OpSize, ElementSize, Src)); StoreResult(FPRClass, Op, ALUOp, -1); } @@ -979,9 +966,7 @@ void OpDispatchBuilder::AVXVectorUnaryOpImpl(OpcodeArgs, IROps IROp, size_t Elem OrderedNode *Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); - auto ALUOp = _VFSqrt(OpSize, ElementSize, Src); - // Overwrite our IR's op type - ALUOp.first->Header.Op = IROp; + DeriveOp(ALUOp, IROp, _VFSqrt(OpSize, ElementSize, Src)); // NOTE: We don't need to clear the upper lanes here, since the // IR ops make use of 128-bit AdvSimd for 128-bit cases, @@ -1017,9 +1002,7 @@ void OpDispatchBuilder::VectorUnaryDuplicateOpImpl(OpcodeArgs, IROps IROp, size_ OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - auto ALUOp = _VFSqrt(ElementSize, ElementSize, Src); - // Overwrite our IR's op type - ALUOp.first->Header.Op = IROp; + DeriveOp(ALUOp, IROp, _VFSqrt(ElementSize, ElementSize, Src)); // Duplicate the lower bits auto Result = _VDupElement(Size, ElementSize, ALUOp, 0); @@ -1746,8 +1729,7 @@ void OpDispatchBuilder::VHADDPOp(OpcodeArgs) { OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); - auto Res = _VFAddP(SrcSize, ElementSize, Src1, Src2); - Res.first->Header.Op = IROp; + DeriveOp(Res, IROp, _VFAddP(SrcSize, ElementSize, Src1, Src2)); OrderedNode *Dest = Res; if (Is256Bit) { @@ -2439,8 +2421,7 @@ void OpDispatchBuilder::AVXVariableShiftImpl(OpcodeArgs, IROps IROp) { OrderedNode *Vector = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], DstSize, Op->Flags); OrderedNode *ShiftVector = LoadSource_WithOpSize(FPRClass, Op, Op->Src[1], DstSize, Op->Flags); - auto Shift = _VUShr(DstSize, SrcSize, Vector, ShiftVector, true); - Shift.first->Header.Op = IROp; + DeriveOp(Shift, IROp, _VUShr(DstSize, SrcSize, Vector, ShiftVector, true)); StoreResult(FPRClass, Op, Shift, -1); } diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp index f1e0cbc8ce..f469e4b82f 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp @@ -856,9 +856,7 @@ void OpDispatchBuilder::X87UnaryOp(OpcodeArgs) { auto top = GetX87Top(); auto a = _LoadContextIndexed(top, 16, MMBaseOffset(), 16, FPRClass); - auto result = _F80Round(a); - // Overwrite the op - result.first->Header.Op = IROp; + DeriveOp(result, IROp, _F80Round(a)); if constexpr (IROp == IR::OP_F80SIN || IROp == IR::OP_F80COS) { @@ -889,9 +887,7 @@ void OpDispatchBuilder::X87BinaryOp(OpcodeArgs) { auto a = _LoadContextIndexed(top, 16, MMBaseOffset(), 16, FPRClass); st1 = _LoadContextIndexed(st1, 16, MMBaseOffset(), 16, FPRClass); - auto result = _F80Add(a, st1); - // Overwrite the op - result.first->Header.Op = IROp; + DeriveOp(result, IROp, _F80Add(a, st1)); if constexpr (IROp == IR::OP_F80FPREM || IROp == IR::OP_F80FPREM1) { diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87F64.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87F64.cpp index 8b484e9f71..e119fe27fd 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87F64.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87F64.cpp @@ -767,9 +767,7 @@ void OpDispatchBuilder::X87UnaryOpF64(OpcodeArgs) { auto top = GetX87Top(); auto a = _LoadContextIndexed(top, 8, MMBaseOffset(), 16, FPRClass); - auto result = _F64SIN(a); - // Overwrite the op - result.first->Header.Op = IROp; + DeriveOp(result, IROp, _F64SIN(a)); if constexpr (IROp == IR::OP_F64SIN || IROp == IR::OP_F64COS) { @@ -799,9 +797,7 @@ void OpDispatchBuilder::X87BinaryOpF64(OpcodeArgs) { auto a = _LoadContextIndexed(top, 8, MMBaseOffset(), 16, FPRClass); st1 = _LoadContextIndexed(st1, 8, MMBaseOffset(), 16, FPRClass); - auto result = _F64ATAN(a, st1); - // Overwrite the op - result.first->Header.Op = IROp; + DeriveOp(result, IROp, _F64ATAN(a, st1)); if constexpr (IROp == IR::OP_F64FPREM || IROp == IR::OP_F64FPREM1) {