Skip to content

Commit

Permalink
Merge pull request #1338 from lioncash/bic
Browse files Browse the repository at this point in the history
IR: Add handling for ANDN operations
  • Loading branch information
Sonicadvance1 authored Nov 3, 2021
2 parents 43454ab + 49dae08 commit 34b2f93
Show file tree
Hide file tree
Showing 9 changed files with 77 additions and 13 deletions.
21 changes: 21 additions & 0 deletions External/FEXCore/Source/Interface/Core/Interpreter/ALUOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,26 @@ DEF_OP(And) {
}
}

DEF_OP(Andn) {
auto Op = IROp->C<IR::IROp_Andn>();
const uint8_t OpSize = IROp->Size;

void *Src1 = GetSrc<void*>(Data->SSAData, Op->Header.Args[0]);
void *Src2 = GetSrc<void*>(Data->SSAData, Op->Header.Args[1]);
constexpr auto Func = [](auto a, auto b) {
using Type = decltype(a);
return static_cast<Type>(a & static_cast<Type>(~b));
};

switch (OpSize) {
DO_OP(1, uint8_t, Func)
DO_OP(2, uint16_t, Func)
DO_OP(4, uint32_t, Func)
DO_OP(8, uint64_t, Func)
default: LOGMAN_MSG_A_FMT("Unknown size: {}", OpSize); break;
}
}

DEF_OP(Xor) {
auto Op = IROp->C<IR::IROp_Xor>();
uint8_t OpSize = IROp->Size;
Expand Down Expand Up @@ -975,6 +995,7 @@ void InterpreterOps::RegisterALUHandlers() {
REGISTER_OP(UMULH, UMulH);
REGISTER_OP(OR, Or);
REGISTER_OP(AND, And);
REGISTER_OP(ANDN, Andn);
REGISTER_OP(XOR, Xor);
REGISTER_OP(LSHL, Lshl);
REGISTER_OP(LSHR, Lshr);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ namespace FEXCore::CPU {
DEF_OP(UMulH);
DEF_OP(Or);
DEF_OP(And);
DEF_OP(Andn);
DEF_OP(Xor);
DEF_OP(Lshl);
DEF_OP(Lshr);
Expand Down
14 changes: 14 additions & 0 deletions External/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,19 @@ DEF_OP(And) {
}
}

DEF_OP(Andn) {
auto Op = IROp->C<IR::IROp_Andn>();
const auto& Lhs = Op->Header.Args[0];
const auto& Rhs = Op->Header.Args[1];
uint64_t Const{};

if (IsInlineConstant(Rhs, &Const)) {
bic(GRS(Node), GRS(Lhs.ID()), Const);
} else {
bic(GRS(Node), GRS(Lhs.ID()), GRS(Rhs.ID()));
}
}

DEF_OP(Xor) {
auto Op = IROp->C<IR::IROp_Xor>();
uint64_t Const;
Expand Down Expand Up @@ -1079,6 +1092,7 @@ void Arm64JITCore::RegisterALUHandlers() {
REGISTER_OP(UMULH, UMulH);
REGISTER_OP(OR, Or);
REGISTER_OP(AND, And);
REGISTER_OP(ANDN, Andn);
REGISTER_OP(XOR, Xor);
REGISTER_OP(LSHL, Lshl);
REGISTER_OP(LSHR, Lshr);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ class Arm64JITCore final : public CPUBackend, public Arm64Emitter {
DEF_OP(UMulH);
DEF_OP(Or);
DEF_OP(And);
DEF_OP(Andn);
DEF_OP(Xor);
DEF_OP(Lshl);
DEF_OP(Lshr);
Expand Down
29 changes: 25 additions & 4 deletions External/FEXCore/Source/Interface/Core/JIT/x86_64/ALUOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ tags: backend|x86-64
#include <xbyak/xbyak.h>

namespace FEXCore::CPU {

#define GRS(Node) (IROp->Size <= 4 ? GetSrc<RA_32>(Node) : GetSrc<RA_64>(Node))
#define GRD(Node) (IROp->Size <= 4 ? GetDst<RA_32>(Node) : GetDst<RA_64>(Node))
#define GRCMP(Node) (Op->CompareSize == 4 ? GetSrc<RA_32>(Node) : GetSrc<RA_64>(Node))

#define DEF_OP(x) void X86JITCore::Op_##x(FEXCore::IR::IROp_Header *IROp, uint32_t Node)
DEF_OP(TruncElementPair) {
auto Op = IROp->C<IR::IROp_TruncElementPair>();
Expand Down Expand Up @@ -417,6 +422,25 @@ DEF_OP(And) {
mov(Dst, rax);
}

DEF_OP(Andn) {
auto Op = IROp->C<IR::IROp_Andn>();
const auto& Lhs = Op->Header.Args[0];
const auto& Rhs = Op->Header.Args[1];
auto Dst = GRD(Node);

uint64_t Const{};
if (IsInlineConstant(Rhs, &Const)) {
mov(Dst, GRS(Lhs.ID()));
and_(Dst, ~Const);
} else {
const auto Temp = IROp->Size <= 4 ? Xbyak::Reg{rax.cvt32()} : Xbyak::Reg{rax};
mov(Temp, GRS(Rhs.ID()));
not_(Temp);
and_(Temp, GRS(Lhs.ID()));
mov(Dst, Temp);
}
}

DEF_OP(Xor) {
auto Op = IROp->C<IR::IROp_Xor>();
auto Dst = GetDst<RA_64>(Node);
Expand Down Expand Up @@ -1048,10 +1072,6 @@ DEF_OP(Sbfe) {
}
}

#define GRS(Node) (IROp->Size <= 4 ? GetSrc<RA_32>(Node) : GetSrc<RA_64>(Node))
#define GRD(Node) (IROp->Size <= 4 ? GetDst<RA_32>(Node) : GetDst<RA_64>(Node))
#define GRCMP(Node) (Op->CompareSize == 4 ? GetSrc<RA_32>(Node) : GetSrc<RA_64>(Node))

DEF_OP(Select) {
auto Op = IROp->C<IR::IROp_Select>();
auto Dst = GRD(Node);
Expand Down Expand Up @@ -1221,6 +1241,7 @@ void X86JITCore::RegisterALUHandlers() {
REGISTER_OP(UMULH, UMulH);
REGISTER_OP(OR, Or);
REGISTER_OP(AND, And);
REGISTER_OP(ANDN, Andn);
REGISTER_OP(XOR, Xor);
REGISTER_OP(LSHL, Lshl);
REGISTER_OP(LSHR, Lshr);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ class X86JITCore final : public CPUBackend, public Xbyak::CodeGenerator {
DEF_OP(UMulH);
DEF_OP(Or);
DEF_OP(And);
DEF_OP(Andn);
DEF_OP(Xor);
DEF_OP(Lshl);
DEF_OP(Lshr);
Expand Down
10 changes: 4 additions & 6 deletions External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2134,8 +2134,7 @@ void OpDispatchBuilder::ANDNBMIOp(OpcodeArgs) {
auto* Src1 = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, -1);
auto* Src2 = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, -1);

// TODO: This can be replaced with a BIC IR op once it's implemented.
auto Dest = _And(_Not(Src1), Src2);
auto Dest = _Andn(Src2, Src1);

StoreResult(GPRClass, Op, Dest, -1);
GenerateFlags_Logical(Op, Dest, Src1, Src2);
Expand Down Expand Up @@ -2633,8 +2632,7 @@ void OpDispatchBuilder::BTROp(OpcodeArgs) {
Result = _Lshr(Dest, BitSelect);

OrderedNode *BitMask = _Lshl(_Constant(1), BitSelect);
BitMask = _Not(BitMask);
Dest = _And(Dest, BitMask);
Dest = _Andn(Dest, BitMask);
StoreResult(GPRClass, Op, Dest, -1);
}
else {
Expand All @@ -2655,10 +2653,10 @@ void OpDispatchBuilder::BTROp(OpcodeArgs) {
// Now add the addresses together and load the memory
OrderedNode *MemoryLocation = _Add(Dest, Src);
OrderedNode *BitMask = _Lshl(_Constant(1), BitSelect);
BitMask = _Not(BitMask);

if (DestIsLockedMem(Op)) {
HandledLock = true;
BitMask = _Not(BitMask);
// XXX: Technically this can optimize to an AArch64 ldclralb
// We don't current support this IR op though
Result = _AtomicFetchAnd(MemoryLocation, BitMask, 1);
Expand All @@ -2670,7 +2668,7 @@ void OpDispatchBuilder::BTROp(OpcodeArgs) {

// Now shift in to the correct bit location
Result = _Lshr(Value, BitSelect);
Value = _And(Value, BitMask);
Value = _Andn(Value, BitMask);
_StoreMemAutoTSO(GPRClass, 1, MemoryLocation, Value, 1);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,7 @@ void OpDispatchBuilder::SetX87TopTag(OrderedNode *Value, uint32_t Tag) {
OrderedNode *Mask = _Constant(0b11);
auto TopOffset = _Lshl(Value, _Constant(1));
Mask = _Lshl(Mask, TopOffset);
// XXX: This Neg can be removed if we support BIC
Mask = _Not(Mask);
OrderedNode *NewFTW = _And(FTW, Mask);
OrderedNode *NewFTW = _Andn(FTW, Mask);
if (Tag != 0) {
auto TagVal = _Lshl(_Constant(Tag), TopOffset);
NewFTW = _Or(NewFTW, TagVal);
Expand Down
9 changes: 9 additions & 0 deletions External/FEXCore/Source/Interface/IR/IR.json
Original file line number Diff line number Diff line change
Expand Up @@ -948,6 +948,15 @@
"SSAArgs": "2"
},

"Andn": {
"Desc": ["Integer binary AND NOT. Performs the equivalent of Src1 & ~Src2"],
"OpClass": "ALU",
"HasDest": true,
"DestClass": "GPR",
"DestSize": "std::max<uint8_t>(4, GetOpSize(ssa0))",
"SSAArgs": "2"
},

"Xor": {
"Desc": ["Integer binary exclusive or"
],
Expand Down

0 comments on commit 34b2f93

Please sign in to comment.