Skip to content

Commit

Permalink
Merge pull request #3257 from alyssarosenzweig/helper/derive-op
Browse files Browse the repository at this point in the history
Add helper for deriving ops by opcode
  • Loading branch information
alyssarosenzweig authored Nov 8, 2023
2 parents fa8c35f + 73958b9 commit 3f1f7fa
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 56 deletions.
18 changes: 4 additions & 14 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -404,9 +404,7 @@ void OpDispatchBuilder::SecondaryALUOp(OpcodeArgs) {
}
else {
Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = AllowUpperGarbage || Size >= 4});
auto ALUOp = _Add(IR::SizeToOpSize(std::max<uint8_t>(4u, Size)), Dest, Src);
// Overwrite our IR's op type
ALUOp.first->Header.Op = IROp;
DeriveOp(ALUOp, IROp, _Add(IR::SizeToOpSize(std::max<uint8_t>(4u, Size)), Dest, Src));

Result = ALUOp;

Expand Down Expand Up @@ -5398,15 +5396,10 @@ void OpDispatchBuilder::ALUOpImpl(OpcodeArgs, FEXCore::IR::IROps ALUIROp, FEXCor
OrderedNode *DestMem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false});
DestMem = AppendSegmentOffset(DestMem, Op->Flags);

auto FetchOp = _AtomicFetchAdd(IR::SizeToOpSize(Size), Src, DestMem);
// Overwrite our atomic op type
FetchOp.first->Header.Op = AtomicFetchOp;
DeriveOp(FetchOp, AtomicFetchOp, _AtomicFetchAdd(IR::SizeToOpSize(Size), Src, DestMem));
Dest = FetchOp;

auto ALUOp = _Add(OpSize, Dest, Src);
// Overwrite our IR's op type
ALUOp.first->Header.Op = ALUIROp;

DeriveOp(ALUOp, ALUIROp, _Add(OpSize, Dest, Src));
Result = ALUOp;
}
else {
Expand All @@ -5425,10 +5418,7 @@ void OpDispatchBuilder::ALUOpImpl(OpcodeArgs, FEXCore::IR::IROps ALUIROp, FEXCor

Result = _Constant(0);
} else {
auto ALUOp = _Add(OpSize, Dest, Src);
// Overwrite our IR's op type
ALUOp.first->Header.Op = ALUIROp;

DeriveOp(ALUOp, ALUIROp, _Add(OpSize, Dest, Src));
Result = ALUOp;
}

Expand Down
10 changes: 10 additions & 0 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -1329,6 +1329,16 @@ friend class FEXCore::IR::PassManager;
}
}

// Helper to derive Dest by a given builder-using Expression with the opcode
// replaced with NewOp. Useful for generic building code. Not safe in general.
// but does the right handling of ImplicitFlagClobber at least and must be
// used instead of raw Op mutation.
#define DeriveOp(Dest, NewOp, Expr) \
if (ImplicitFlagClobber(NewOp)) \
SaveNZCV(); \
auto Dest = (Expr); \
Dest.first->Header.Op = (NewOp)

// Named constant cache for the current block.
// Different arrays for sizes 1,2,4,8,16,32.
OrderedNode *CachedNamedVectorConstants[FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_MAX][6]{};
Expand Down
41 changes: 11 additions & 30 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,7 @@ void OpDispatchBuilder::VectorALUOpImpl(OpcodeArgs, IROps IROp, size_t ElementSi
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags);

auto ALUOp = _VAdd(Size, ElementSize, Dest, Src);
// Overwrite our IR's op type
ALUOp.first->Header.Op = IROp;
DeriveOp(ALUOp, IROp, _VAdd(Size, ElementSize, Dest, Src));

StoreResult(FPRClass, Op, ALUOp, -1);
}
Expand Down Expand Up @@ -371,9 +369,7 @@ void OpDispatchBuilder::AVXVectorALUOpImpl(OpcodeArgs, IROps IROp, size_t Elemen
OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags);

auto ALUOp = _VAdd(Size, ElementSize, Src1, Src2);
// Overwrite our IR's op type
ALUOp.first->Header.Op = IROp;
DeriveOp(ALUOp, IROp, _VAdd(Size, ElementSize, Src1, Src2));

StoreResult(FPRClass, Op, ALUOp, -1);
}
Expand Down Expand Up @@ -506,9 +502,7 @@ void OpDispatchBuilder::VectorALUROpImpl(OpcodeArgs, IROps IROp, size_t ElementS
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags);

auto ALUOp = _VAdd(Size, ElementSize, Src, Dest);
// Overwrite our IR's op type
ALUOp.first->Header.Op = IROp;
DeriveOp(ALUOp, IROp, _VAdd(Size, ElementSize, Src, Dest));

StoreResult(FPRClass, Op, ALUOp, -1);
}
Expand Down Expand Up @@ -540,10 +534,8 @@ OrderedNode* OpDispatchBuilder::VectorScalarInsertALUOpImpl(OpcodeArgs, IROps IR
{.AllowUpperGarbage = true});

// If OpSize == ElementSize then it only does the lower scalar op
auto ALUOp = _VFAddScalarInsert(IR::SizeToOpSize(DstSize), ElementSize, Src1, Src2, ZeroUpperBits);
// Overwrite our IR's op type
ALUOp.first->Header.Op = IROp;

DeriveOp(ALUOp, IROp,
_VFAddScalarInsert(IR::SizeToOpSize(DstSize), ElementSize, Src1, Src2, ZeroUpperBits));
return ALUOp;
}

Expand Down Expand Up @@ -626,10 +618,7 @@ OrderedNode* OpDispatchBuilder::VectorScalarUnaryInsertALUOpImpl(OpcodeArgs, IRO
{.AllowUpperGarbage = true});

// If OpSize == ElementSize then it only does the lower scalar op
auto ALUOp = _VFSqrtScalarInsert(IR::SizeToOpSize(DstSize), ElementSize, Src1, Src2, ZeroUpperBits);
// Overwrite our IR's op type
ALUOp.first->Header.Op = IROp;

DeriveOp(ALUOp, IROp, _VFSqrtScalarInsert(IR::SizeToOpSize(DstSize), ElementSize, Src1, Src2, ZeroUpperBits));
return ALUOp;
}

Expand Down Expand Up @@ -940,9 +929,7 @@ void OpDispatchBuilder::VectorUnaryOpImpl(OpcodeArgs, IROps IROp, size_t Element

OrderedNode *Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags);

auto ALUOp = _VFSqrt(OpSize, ElementSize, Src);
// Overwrite our IR's op type
ALUOp.first->Header.Op = IROp;
DeriveOp(ALUOp, IROp, _VFSqrt(OpSize, ElementSize, Src));

StoreResult(FPRClass, Op, ALUOp, -1);
}
Expand Down Expand Up @@ -979,9 +966,7 @@ void OpDispatchBuilder::AVXVectorUnaryOpImpl(OpcodeArgs, IROps IROp, size_t Elem

OrderedNode *Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags);

auto ALUOp = _VFSqrt(OpSize, ElementSize, Src);
// Overwrite our IR's op type
ALUOp.first->Header.Op = IROp;
DeriveOp(ALUOp, IROp, _VFSqrt(OpSize, ElementSize, Src));

// NOTE: We don't need to clear the upper lanes here, since the
// IR ops make use of 128-bit AdvSimd for 128-bit cases,
Expand Down Expand Up @@ -1017,9 +1002,7 @@ void OpDispatchBuilder::VectorUnaryDuplicateOpImpl(OpcodeArgs, IROps IROp, size_

OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);

auto ALUOp = _VFSqrt(ElementSize, ElementSize, Src);
// Overwrite our IR's op type
ALUOp.first->Header.Op = IROp;
DeriveOp(ALUOp, IROp, _VFSqrt(ElementSize, ElementSize, Src));

// Duplicate the lower bits
auto Result = _VDupElement(Size, ElementSize, ALUOp, 0);
Expand Down Expand Up @@ -1746,8 +1729,7 @@ void OpDispatchBuilder::VHADDPOp(OpcodeArgs) {
OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags);

auto Res = _VFAddP(SrcSize, ElementSize, Src1, Src2);
Res.first->Header.Op = IROp;
DeriveOp(Res, IROp, _VFAddP(SrcSize, ElementSize, Src1, Src2));

OrderedNode *Dest = Res;
if (Is256Bit) {
Expand Down Expand Up @@ -2439,8 +2421,7 @@ void OpDispatchBuilder::AVXVariableShiftImpl(OpcodeArgs, IROps IROp) {
OrderedNode *Vector = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], DstSize, Op->Flags);
OrderedNode *ShiftVector = LoadSource_WithOpSize(FPRClass, Op, Op->Src[1], DstSize, Op->Flags);

auto Shift = _VUShr(DstSize, SrcSize, Vector, ShiftVector, true);
Shift.first->Header.Op = IROp;
DeriveOp(Shift, IROp, _VUShr(DstSize, SrcSize, Vector, ShiftVector, true));

StoreResult(FPRClass, Op, Shift, -1);
}
Expand Down
8 changes: 2 additions & 6 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -856,9 +856,7 @@ void OpDispatchBuilder::X87UnaryOp(OpcodeArgs) {
auto top = GetX87Top();
auto a = _LoadContextIndexed(top, 16, MMBaseOffset(), 16, FPRClass);

auto result = _F80Round(a);
// Overwrite the op
result.first->Header.Op = IROp;
DeriveOp(result, IROp, _F80Round(a));

if constexpr (IROp == IR::OP_F80SIN ||
IROp == IR::OP_F80COS) {
Expand Down Expand Up @@ -889,9 +887,7 @@ void OpDispatchBuilder::X87BinaryOp(OpcodeArgs) {
auto a = _LoadContextIndexed(top, 16, MMBaseOffset(), 16, FPRClass);
st1 = _LoadContextIndexed(st1, 16, MMBaseOffset(), 16, FPRClass);

auto result = _F80Add(a, st1);
// Overwrite the op
result.first->Header.Op = IROp;
DeriveOp(result, IROp, _F80Add(a, st1));

if constexpr (IROp == IR::OP_F80FPREM ||
IROp == IR::OP_F80FPREM1) {
Expand Down
8 changes: 2 additions & 6 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/X87F64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -767,9 +767,7 @@ void OpDispatchBuilder::X87UnaryOpF64(OpcodeArgs) {
auto top = GetX87Top();
auto a = _LoadContextIndexed(top, 8, MMBaseOffset(), 16, FPRClass);

auto result = _F64SIN(a);
// Overwrite the op
result.first->Header.Op = IROp;
DeriveOp(result, IROp, _F64SIN(a));

if constexpr (IROp == IR::OP_F64SIN ||
IROp == IR::OP_F64COS) {
Expand Down Expand Up @@ -799,9 +797,7 @@ void OpDispatchBuilder::X87BinaryOpF64(OpcodeArgs) {
auto a = _LoadContextIndexed(top, 8, MMBaseOffset(), 16, FPRClass);
st1 = _LoadContextIndexed(st1, 8, MMBaseOffset(), 16, FPRClass);

auto result = _F64ATAN(a, st1);
// Overwrite the op
result.first->Header.Op = IROp;
DeriveOp(result, IROp, _F64ATAN(a, st1));

if constexpr (IROp == IR::OP_F64FPREM ||
IROp == IR::OP_F64FPREM1) {
Expand Down

0 comments on commit 3f1f7fa

Please sign in to comment.