Skip to content

Commit

Permalink
Merge pull request #3568 from lioncash/const
Browse files Browse the repository at this point in the history
X87: Simplify constant loading for FLD family
  • Loading branch information
Sonicadvance1 authored Apr 11, 2024
2 parents 1ba678f + 4cb2432 commit 271700e
Show file tree
Hide file tree
Showing 23 changed files with 151 additions and 228 deletions.
6 changes: 6 additions & 0 deletions FEXCore/Source/Interface/Core/CPUBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ constexpr static uint64_t NamedVectorConstants[FEXCore::IR::NamedVectorConstant:
{0x0706'0504'FFFF'FFFFULL, 0x0F0E'0D0C'0B0A'0908ULL}, // NAMED_VECTOR_BLENDPS_1110B
{0x8040'2010'0804'0201ULL, 0x8040'2010'0804'0201ULL}, // NAMED_VECTOR_MOVMASKB
{0x8040'2010'0804'0201ULL, 0x8040'2010'0804'0201ULL}, // NAMED_VECTOR_MOVMASKB_UPPER
{0x8000'0000'0000'0000ULL, 0x0000'0000'0000'3FFFULL}, // NAMED_VECTOR_X87_ONE
{0xD49A'784B'CD1B'8AFEULL, 0x0000'0000'0000'4000ULL}, // NAMED_VECTOR_X87_LOG2_10
{0xB8AA'3B29'5C17'F0BCULL, 0x0000'0000'0000'3FFFULL}, // NAMED_VECTOR_X87_LOG2_E
{0xC90F'DAA2'2168'C235ULL, 0x0000'0000'0000'4000ULL}, // NAMED_VECTOR_X87_PI
{0x9A20'9A84'FBCF'F799ULL, 0x0000'0000'0000'3FFDULL}, // NAMED_VECTOR_X87_LOG10_2
{0xB172'17F7'D1CF'79ACULL, 0x0000'0000'0000'3FFEULL}, // NAMED_VECTOR_X87_LOG_2
};

constexpr static auto PSHUFLW_LUT {
Expand Down
14 changes: 7 additions & 7 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6647,13 +6647,13 @@ constexpr uint16_t PF_F2 = 3;
{OPD(0xD9, 0xE4), 1, &OpDispatchBuilder::FTST},
{OPD(0xD9, 0xE5), 1, &OpDispatchBuilder::X87FXAM},
// E6 = Invalid
{OPD(0xD9, 0xE8), 1, &OpDispatchBuilder::FLD_Const<0x8000'0000'0000'0000, 0b0'011'1111'1111'1111>}, // 1.0
{OPD(0xD9, 0xE9), 1, &OpDispatchBuilder::FLD_Const<0xD49A'784B'CD1B'8AFE, 0x4000>}, // log2l(10)
{OPD(0xD9, 0xEA), 1, &OpDispatchBuilder::FLD_Const<0xB8AA'3B29'5C17'F0BC, 0x3FFF>}, // log2l(e)
{OPD(0xD9, 0xEB), 1, &OpDispatchBuilder::FLD_Const<0xC90F'DAA2'2168'C235, 0x4000>}, // pi
{OPD(0xD9, 0xEC), 1, &OpDispatchBuilder::FLD_Const<0x9A20'9A84'FBCF'F799, 0x3FFD>}, // log10l(2)
{OPD(0xD9, 0xED), 1, &OpDispatchBuilder::FLD_Const<0xB172'17F7'D1CF'79AC, 0x3FFE>}, // log(2)
{OPD(0xD9, 0xEE), 1, &OpDispatchBuilder::FLD_Const<0, 0>}, // 0.0
{OPD(0xD9, 0xE8), 1, &OpDispatchBuilder::FLD_Const<NamedVectorConstant::NAMED_VECTOR_X87_ONE>}, // 1.0
{OPD(0xD9, 0xE9), 1, &OpDispatchBuilder::FLD_Const<NamedVectorConstant::NAMED_VECTOR_X87_LOG2_10>}, // log2l(10)
{OPD(0xD9, 0xEA), 1, &OpDispatchBuilder::FLD_Const<NamedVectorConstant::NAMED_VECTOR_X87_LOG2_E>}, // log2l(e)
{OPD(0xD9, 0xEB), 1, &OpDispatchBuilder::FLD_Const<NamedVectorConstant::NAMED_VECTOR_X87_PI>}, // pi
{OPD(0xD9, 0xEC), 1, &OpDispatchBuilder::FLD_Const<NamedVectorConstant::NAMED_VECTOR_X87_LOG10_2>}, // log10l(2)
{OPD(0xD9, 0xED), 1, &OpDispatchBuilder::FLD_Const<NamedVectorConstant::NAMED_VECTOR_X87_LOG_2>}, // log(2)
{OPD(0xD9, 0xEE), 1, &OpDispatchBuilder::FLD_Const<NamedVectorConstant::NAMED_VECTOR_ZERO>}, // 0.0

// EF = Invalid
{OPD(0xD9, 0xF0), 1, &OpDispatchBuilder::X87UnaryOp<IR::OP_F80F2XM1>},
Expand Down
2 changes: 1 addition & 1 deletion FEXCore/Source/Interface/Core/OpcodeDispatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -703,7 +703,7 @@ friend class FEXCore::IR::PassManager;
OrderedNode *ReconstructX87StateFromFSW(OrderedNode *FSW);
template<size_t width>
void FLD(OpcodeArgs);
template<uint64_t Lower, uint32_t Upper>
template<NamedVectorConstant constant>
void FLD_Const(OpcodeArgs);

void FBLD(OpcodeArgs);
Expand Down
32 changes: 12 additions & 20 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,36 +196,34 @@ void OpDispatchBuilder::FBSTP(OpcodeArgs) {
SetX87Top(top);
}

template<uint64_t Lower, uint32_t Upper>
template<NamedVectorConstant constant>
void OpDispatchBuilder::FLD_Const(OpcodeArgs) {
// Update TOP
auto orig_top = GetX87Top();
auto top = _And(OpSize::i32Bit, _Sub(OpSize::i32Bit, orig_top, _Constant(1)), _Constant(7));
SetX87ValidTag(top, true);
SetX87Top(top);

auto low = _Constant(Lower);
auto high = _Constant(Upper);
OrderedNode *data = _VCastFromGPR(16, 8, low);
data = _VInsGPR(16, 8, 1, data, high);
OrderedNode *data = LoadAndCacheNamedVectorConstant(16, constant);

// Write to ST[TOP]
_StoreContextIndexed(data, top, 16, MMBaseOffset(), 16, FPRClass);
}

template
void OpDispatchBuilder::FLD_Const<0x8000'0000'0000'0000ULL, 0b0'011'1111'1111'1111ULL>(OpcodeArgs); // 1.0
void OpDispatchBuilder::FLD_Const<NamedVectorConstant::NAMED_VECTOR_X87_ONE>(OpcodeArgs); // 1.0
template
void OpDispatchBuilder::FLD_Const<0xD49A'784B'CD1B'8AFEULL, 0x4000ULL>(OpcodeArgs); // log2l(10)
void OpDispatchBuilder::FLD_Const<NamedVectorConstant::NAMED_VECTOR_X87_LOG2_10>(OpcodeArgs); // log2l(10)
template
void OpDispatchBuilder::FLD_Const<0xB8AA'3B29'5C17'F0BCULL, 0x3FFFULL>(OpcodeArgs); // log2l(e)
void OpDispatchBuilder::FLD_Const<NamedVectorConstant::NAMED_VECTOR_X87_LOG2_E>(OpcodeArgs); // log2l(e)
template
void OpDispatchBuilder::FLD_Const<0xC90F'DAA2'2168'C235ULL, 0x4000ULL>(OpcodeArgs); // pi
void OpDispatchBuilder::FLD_Const<NamedVectorConstant::NAMED_VECTOR_X87_PI>(OpcodeArgs); // pi
template
void OpDispatchBuilder::FLD_Const<0x9A20'9A84'FBCF'F799ULL, 0x3FFDULL>(OpcodeArgs); // log10l(2)
void OpDispatchBuilder::FLD_Const<NamedVectorConstant::NAMED_VECTOR_X87_LOG10_2>(OpcodeArgs); // log10l(2)
template
void OpDispatchBuilder::FLD_Const<0xB172'17F7'D1CF'79ACULL, 0x3FFEULL>(OpcodeArgs); // log(2)
void OpDispatchBuilder::FLD_Const<NamedVectorConstant::NAMED_VECTOR_X87_LOG_2>(OpcodeArgs); // log(2)
template
void OpDispatchBuilder::FLD_Const<0, 0>(OpcodeArgs); // 0.0
void OpDispatchBuilder::FLD_Const<NamedVectorConstant::NAMED_VECTOR_ZERO>(OpcodeArgs); // 0.0

void OpDispatchBuilder::FILD(OpcodeArgs) {
// Update TOP
Expand Down Expand Up @@ -958,10 +956,7 @@ void OpDispatchBuilder::X87FYL2X(OpcodeArgs) {
OrderedNode *st1 = _LoadContextIndexed(top, 16, MMBaseOffset(), 16, FPRClass);

if (Plus1) {
auto low = _Constant(0x8000'0000'0000'0000ULL);
auto high = _Constant(0b0'011'1111'1111'1111);
OrderedNode *data = _VCastFromGPR(16, 8, low);
data = _VInsGPR(16, 8, 1, data, high);
OrderedNode *data = LoadAndCacheNamedVectorConstant(16, NamedVectorConstant::NAMED_VECTOR_X87_ONE);
st0 = _F80Add(st0, data);
}

Expand All @@ -981,10 +976,7 @@ void OpDispatchBuilder::X87TAN(OpcodeArgs) {

auto result = _F80TAN(a);

auto low = _Constant(0x8000'0000'0000'0000ULL);
auto high = _Constant(0b0'011'1111'1111'1111ULL);
OrderedNode *data = _VCastFromGPR(16, 8, low);
data = _VInsGPR(16, 8, 1, data, high);
OrderedNode *data = LoadAndCacheNamedVectorConstant(16, NamedVectorConstant::NAMED_VECTOR_X87_ONE);

// TODO: ACCURACY: should check source is in range –2^63 to +2^63
SetRFLAG<FEXCore::X86State::X87FLAG_C2_LOC>(_Constant(0));
Expand Down
75 changes: 37 additions & 38 deletions FEXCore/Source/Interface/IR/IRDumper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,45 +191,44 @@ static void PrintArg(fextl::stringstream *out, [[maybe_unused]] IRListView const
}

static void PrintArg(fextl::stringstream *out, [[maybe_unused]] IRListView const* IR, FEXCore::IR::NamedVectorConstant Arg) {
switch (Arg) {
case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_INCREMENTAL_U16_INDEX: {
*out << "u16_incremental_index";
break;
}
case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_INCREMENTAL_U16_INDEX_UPPER: {
*out << "u16_incremental_index_upper";
break;
}
case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_PADDSUBPS_INVERT: {
*out << "addsubps_invert";
break;
}
case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_PADDSUBPS_INVERT_UPPER: {
*out << "addsubps_invert_upper";
break;
}
case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_PADDSUBPD_INVERT: {
*out << "addsubpd_invert";
break;
*out << [Arg] {
// clang-format off
switch (Arg) {
case NamedVectorConstant::NAMED_VECTOR_INCREMENTAL_U16_INDEX:
return "u16_incremental_index";
case NamedVectorConstant::NAMED_VECTOR_INCREMENTAL_U16_INDEX_UPPER:
return "u16_incremental_index_upper";
case NamedVectorConstant::NAMED_VECTOR_PADDSUBPS_INVERT:
return "addsubps_invert";
case NamedVectorConstant::NAMED_VECTOR_PADDSUBPS_INVERT_UPPER:
return "addsubps_invert_upper";
case NamedVectorConstant::NAMED_VECTOR_PADDSUBPD_INVERT:
return "addsubpd_invert";
case NamedVectorConstant::NAMED_VECTOR_PADDSUBPD_INVERT_UPPER:
return "addsubpd_invert_upper";
case NamedVectorConstant::NAMED_VECTOR_MOVMSKPS_SHIFT:
return "movmskps_shift";
case NamedVectorConstant::NAMED_VECTOR_AESKEYGENASSIST_SWIZZLE:
return "aeskeygenassist_swizzle";
case NamedVectorConstant::NAMED_VECTOR_ZERO:
return "vectorzero";
case NamedVectorConstant::NAMED_VECTOR_X87_ONE:
return "x87_1_0";
case NamedVectorConstant::NAMED_VECTOR_X87_LOG2_10:
return "x87_log2_10";
case NamedVectorConstant::NAMED_VECTOR_X87_LOG2_E:
return "x87_log2_e";
case NamedVectorConstant::NAMED_VECTOR_X87_PI:
return "x87_pi";
case NamedVectorConstant::NAMED_VECTOR_X87_LOG10_2:
return "x87_log10_2";
case NamedVectorConstant::NAMED_VECTOR_X87_LOG_2:
return "x87_log2";
default:
return "<Unknown Named Vector Constant>";
}
case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_PADDSUBPD_INVERT_UPPER: {
*out << "addsubpd_invert_upper";
break;
}
case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_MOVMSKPS_SHIFT: {
*out << "movmskps_shift";
break;
}
case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_AESKEYGENASSIST_SWIZZLE: {
*out << "aeskeygenassist_swizzle";
break;
}
case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO: {
*out << "vectorzero";
break;
}
default: *out << "<Unknown Named Vector Constant>"; break;
}
// clang-format on
}();
}

static void PrintArg(fextl::stringstream *out, [[maybe_unused]] IRListView const* IR, FEXCore::IR::OpSize Arg) {
Expand Down
8 changes: 8 additions & 0 deletions FEXCore/include/FEXCore/IR/IR.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,14 @@ enum NamedVectorConstant : uint8_t {
NAMED_VECTOR_BLENDPS_1110B,
NAMED_VECTOR_MOVMASKB,
NAMED_VECTOR_MOVMASKB_UPPER,

NAMED_VECTOR_X87_ONE,
NAMED_VECTOR_X87_LOG2_10,
NAMED_VECTOR_X87_LOG2_E,
NAMED_VECTOR_X87_PI,
NAMED_VECTOR_X87_LOG10_2,
NAMED_VECTOR_X87_LOG_2,

NAMED_VECTOR_CONST_POOL_MAX,
// Beginning of named constants that don't have a constant pool backing.
NAMED_VECTOR_ZERO = NAMED_VECTOR_CONST_POOL_MAX,
Expand Down
4 changes: 2 additions & 2 deletions unittests/InstructionCountCI/Crypto/H0F3A.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
"0x66 0x0f 0x3a 0xdf"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #2112]",
"ldr q2, [x28, #2160]",
"movi v3.2d, #0x0",
"mov v16.16b, v17.16b",
"unimplemented (Unimplemented)",
Expand All @@ -68,7 +68,7 @@
"0x66 0x0f 0x3a 0xdf"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #2112]",
"ldr q2, [x28, #2160]",
"movi v3.2d, #0x0",
"mov v16.16b, v17.16b",
"unimplemented (Unimplemented)",
Expand Down
6 changes: 3 additions & 3 deletions unittests/InstructionCountCI/FEXOpt/libnss.json
Original file line number Diff line number Diff line change
Expand Up @@ -197,10 +197,10 @@
"ldr q3, [x11, #272]",
"ldr q4, [x11]",
"ldr q5, [x11, #16]",
"ldr x0, [x28, #1712]",
"ldr x0, [x28, #1760]",
"ldr q6, [x0, #2832]",
"tbl v2.16b, {v2.16b}, v6.16b",
"ldr x0, [x28, #1712]",
"ldr x0, [x28, #1760]",
"ldr q7, [x0, #432]",
"tbl v3.16b, {v3.16b}, v7.16b",
"ldr q8, [x11, #32]",
Expand Down Expand Up @@ -281,7 +281,7 @@
"mov v9.s[2], w25",
"mov v9.s[1], w20",
"mov v9.s[0], w22",
"ldr x0, [x28, #1712]",
"ldr x0, [x28, #1760]",
"ldr q10, [x0, #224]",
"tbl v4.16b, {v4.16b}, v10.16b",
"mov w20, v9.s[1]",
Expand Down
2 changes: 1 addition & 1 deletion unittests/InstructionCountCI/FlagM/Secondary.json
Original file line number Diff line number Diff line change
Expand Up @@ -1612,7 +1612,7 @@
"Comment": "0x0f 0xd7",
"ExpectedArm64ASM": [
"ldr d2, [x28, #768]",
"ldr d3, [x28, #2224]",
"ldr d3, [x28, #2272]",
"cmlt v2.16b, v2.16b, #0",
"and v2.16b, v2.16b, v3.16b",
"addp v2.16b, v2.16b, v2.16b",
Expand Down
2 changes: 1 addition & 1 deletion unittests/InstructionCountCI/FlagM/Secondary_OpSize.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
"ExpectedInstructionCount": 7,
"Comment": "0x66 0x0f 0xd7",
"ExpectedArm64ASM": [
"ldr q2, [x28, #2224]",
"ldr q2, [x28, #2272]",
"cmlt v3.16b, v16.16b, #0",
"and v2.16b, v3.16b, v2.16b",
"addp v2.16b, v2.16b, v2.16b",
Expand Down
2 changes: 1 addition & 1 deletion unittests/InstructionCountCI/FlagM/VEX_map1.json
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
"Map 1 0b01 0xd7 256-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #2224]",
"ldr q2, [x28, #2272]",
"cmlt v3.16b, v16.16b, #0",
"and v2.16b, v3.16b, v2.16b",
"addp v2.16b, v2.16b, v2.16b",
Expand Down
Loading

0 comments on commit 271700e

Please sign in to comment.