diff --git a/FEXCore/Source/Interface/Core/CPUBackend.cpp b/FEXCore/Source/Interface/Core/CPUBackend.cpp index c1ec9794cf..0ff4e6918e 100644 --- a/FEXCore/Source/Interface/Core/CPUBackend.cpp +++ b/FEXCore/Source/Interface/Core/CPUBackend.cpp @@ -29,6 +29,12 @@ constexpr static uint64_t NamedVectorConstants[FEXCore::IR::NamedVectorConstant: {0x0706'0504'FFFF'FFFFULL, 0x0F0E'0D0C'0B0A'0908ULL}, // NAMED_VECTOR_BLENDPS_1110B {0x8040'2010'0804'0201ULL, 0x8040'2010'0804'0201ULL}, // NAMED_VECTOR_MOVMASKB {0x8040'2010'0804'0201ULL, 0x8040'2010'0804'0201ULL}, // NAMED_VECTOR_MOVMASKB_UPPER + {0x8000'0000'0000'0000ULL, 0x0000'0000'0000'3FFFULL}, // NAMED_VECTOR_X87_ONE + {0xD49A'784B'CD1B'8AFEULL, 0x0000'0000'0000'4000ULL}, // NAMED_VECTOR_X87_LOG2_10 + {0xB8AA'3B29'5C17'F0BCULL, 0x0000'0000'0000'3FFFULL}, // NAMED_VECTOR_X87_LOG2_E + {0xC90F'DAA2'2168'C235ULL, 0x0000'0000'0000'4000ULL}, // NAMED_VECTOR_X87_PI + {0x9A20'9A84'FBCF'F799ULL, 0x0000'0000'0000'3FFDULL}, // NAMED_VECTOR_X87_LOG10_2 + {0xB172'17F7'D1CF'79ACULL, 0x0000'0000'0000'3FFEULL}, // NAMED_VECTOR_X87_LOG_2 }; constexpr static auto PSHUFLW_LUT { diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 25fd1673a4..4346c5f1ba 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -6647,13 +6647,13 @@ constexpr uint16_t PF_F2 = 3; {OPD(0xD9, 0xE4), 1, &OpDispatchBuilder::FTST}, {OPD(0xD9, 0xE5), 1, &OpDispatchBuilder::X87FXAM}, // E6 = Invalid - {OPD(0xD9, 0xE8), 1, &OpDispatchBuilder::FLD_Const<0x8000'0000'0000'0000, 0b0'011'1111'1111'1111>}, // 1.0 - {OPD(0xD9, 0xE9), 1, &OpDispatchBuilder::FLD_Const<0xD49A'784B'CD1B'8AFE, 0x4000>}, // log2l(10) - {OPD(0xD9, 0xEA), 1, &OpDispatchBuilder::FLD_Const<0xB8AA'3B29'5C17'F0BC, 0x3FFF>}, // log2l(e) - {OPD(0xD9, 0xEB), 1, &OpDispatchBuilder::FLD_Const<0xC90F'DAA2'2168'C235, 0x4000>}, // pi - {OPD(0xD9, 0xEC), 1, &OpDispatchBuilder::FLD_Const<0x9A20'9A84'FBCF'F799, 0x3FFD>}, // log10l(2) - {OPD(0xD9, 0xED), 1, &OpDispatchBuilder::FLD_Const<0xB172'17F7'D1CF'79AC, 0x3FFE>}, // log(2) - {OPD(0xD9, 0xEE), 1, &OpDispatchBuilder::FLD_Const<0, 0>}, // 0.0 + {OPD(0xD9, 0xE8), 1, &OpDispatchBuilder::FLD_Const}, // 1.0 + {OPD(0xD9, 0xE9), 1, &OpDispatchBuilder::FLD_Const}, // log2l(10) + {OPD(0xD9, 0xEA), 1, &OpDispatchBuilder::FLD_Const}, // log2l(e) + {OPD(0xD9, 0xEB), 1, &OpDispatchBuilder::FLD_Const}, // pi + {OPD(0xD9, 0xEC), 1, &OpDispatchBuilder::FLD_Const}, // log10l(2) + {OPD(0xD9, 0xED), 1, &OpDispatchBuilder::FLD_Const}, // log(2) + {OPD(0xD9, 0xEE), 1, &OpDispatchBuilder::FLD_Const}, // 0.0 // EF = Invalid {OPD(0xD9, 0xF0), 1, &OpDispatchBuilder::X87UnaryOp}, diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index 6ec1827883..1b610d5378 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -703,7 +703,7 @@ friend class FEXCore::IR::PassManager; OrderedNode *ReconstructX87StateFromFSW(OrderedNode *FSW); template void FLD(OpcodeArgs); - template + template void FLD_Const(OpcodeArgs); void FBLD(OpcodeArgs); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp index 2c50414940..9751766a89 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp @@ -196,7 +196,7 @@ void OpDispatchBuilder::FBSTP(OpcodeArgs) { SetX87Top(top); } -template +template void OpDispatchBuilder::FLD_Const(OpcodeArgs) { // Update TOP auto orig_top = GetX87Top(); @@ -204,28 +204,26 @@ void OpDispatchBuilder::FLD_Const(OpcodeArgs) { SetX87ValidTag(top, true); SetX87Top(top); - auto low = _Constant(Lower); - auto high = _Constant(Upper); - OrderedNode *data = _VCastFromGPR(16, 8, low); - data = _VInsGPR(16, 8, 1, data, high); + OrderedNode *data = LoadAndCacheNamedVectorConstant(16, constant); + // Write to ST[TOP] _StoreContextIndexed(data, top, 16, MMBaseOffset(), 16, FPRClass); } template -void OpDispatchBuilder::FLD_Const<0x8000'0000'0000'0000ULL, 0b0'011'1111'1111'1111ULL>(OpcodeArgs); // 1.0 +void OpDispatchBuilder::FLD_Const(OpcodeArgs); // 1.0 template -void OpDispatchBuilder::FLD_Const<0xD49A'784B'CD1B'8AFEULL, 0x4000ULL>(OpcodeArgs); // log2l(10) +void OpDispatchBuilder::FLD_Const(OpcodeArgs); // log2l(10) template -void OpDispatchBuilder::FLD_Const<0xB8AA'3B29'5C17'F0BCULL, 0x3FFFULL>(OpcodeArgs); // log2l(e) +void OpDispatchBuilder::FLD_Const(OpcodeArgs); // log2l(e) template -void OpDispatchBuilder::FLD_Const<0xC90F'DAA2'2168'C235ULL, 0x4000ULL>(OpcodeArgs); // pi +void OpDispatchBuilder::FLD_Const(OpcodeArgs); // pi template -void OpDispatchBuilder::FLD_Const<0x9A20'9A84'FBCF'F799ULL, 0x3FFDULL>(OpcodeArgs); // log10l(2) +void OpDispatchBuilder::FLD_Const(OpcodeArgs); // log10l(2) template -void OpDispatchBuilder::FLD_Const<0xB172'17F7'D1CF'79ACULL, 0x3FFEULL>(OpcodeArgs); // log(2) +void OpDispatchBuilder::FLD_Const(OpcodeArgs); // log(2) template -void OpDispatchBuilder::FLD_Const<0, 0>(OpcodeArgs); // 0.0 +void OpDispatchBuilder::FLD_Const(OpcodeArgs); // 0.0 void OpDispatchBuilder::FILD(OpcodeArgs) { // Update TOP @@ -958,10 +956,7 @@ void OpDispatchBuilder::X87FYL2X(OpcodeArgs) { OrderedNode *st1 = _LoadContextIndexed(top, 16, MMBaseOffset(), 16, FPRClass); if (Plus1) { - auto low = _Constant(0x8000'0000'0000'0000ULL); - auto high = _Constant(0b0'011'1111'1111'1111); - OrderedNode *data = _VCastFromGPR(16, 8, low); - data = _VInsGPR(16, 8, 1, data, high); + OrderedNode *data = LoadAndCacheNamedVectorConstant(16, NamedVectorConstant::NAMED_VECTOR_X87_ONE); st0 = _F80Add(st0, data); } @@ -981,10 +976,7 @@ void OpDispatchBuilder::X87TAN(OpcodeArgs) { auto result = _F80TAN(a); - auto low = _Constant(0x8000'0000'0000'0000ULL); - auto high = _Constant(0b0'011'1111'1111'1111ULL); - OrderedNode *data = _VCastFromGPR(16, 8, low); - data = _VInsGPR(16, 8, 1, data, high); + OrderedNode *data = LoadAndCacheNamedVectorConstant(16, NamedVectorConstant::NAMED_VECTOR_X87_ONE); // TODO: ACCURACY: should check source is in range –2^63 to +2^63 SetRFLAG(_Constant(0)); diff --git a/FEXCore/Source/Interface/IR/IRDumper.cpp b/FEXCore/Source/Interface/IR/IRDumper.cpp index b95e348328..3c6d3c538f 100644 --- a/FEXCore/Source/Interface/IR/IRDumper.cpp +++ b/FEXCore/Source/Interface/IR/IRDumper.cpp @@ -191,45 +191,44 @@ static void PrintArg(fextl::stringstream *out, [[maybe_unused]] IRListView const } static void PrintArg(fextl::stringstream *out, [[maybe_unused]] IRListView const* IR, FEXCore::IR::NamedVectorConstant Arg) { - switch (Arg) { - case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_INCREMENTAL_U16_INDEX: { - *out << "u16_incremental_index"; - break; - } - case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_INCREMENTAL_U16_INDEX_UPPER: { - *out << "u16_incremental_index_upper"; - break; - } - case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_PADDSUBPS_INVERT: { - *out << "addsubps_invert"; - break; - } - case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_PADDSUBPS_INVERT_UPPER: { - *out << "addsubps_invert_upper"; - break; - } - case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_PADDSUBPD_INVERT: { - *out << "addsubpd_invert"; - break; + *out << [Arg] { + // clang-format off + switch (Arg) { + case NamedVectorConstant::NAMED_VECTOR_INCREMENTAL_U16_INDEX: + return "u16_incremental_index"; + case NamedVectorConstant::NAMED_VECTOR_INCREMENTAL_U16_INDEX_UPPER: + return "u16_incremental_index_upper"; + case NamedVectorConstant::NAMED_VECTOR_PADDSUBPS_INVERT: + return "addsubps_invert"; + case NamedVectorConstant::NAMED_VECTOR_PADDSUBPS_INVERT_UPPER: + return "addsubps_invert_upper"; + case NamedVectorConstant::NAMED_VECTOR_PADDSUBPD_INVERT: + return "addsubpd_invert"; + case NamedVectorConstant::NAMED_VECTOR_PADDSUBPD_INVERT_UPPER: + return "addsubpd_invert_upper"; + case NamedVectorConstant::NAMED_VECTOR_MOVMSKPS_SHIFT: + return "movmskps_shift"; + case NamedVectorConstant::NAMED_VECTOR_AESKEYGENASSIST_SWIZZLE: + return "aeskeygenassist_swizzle"; + case NamedVectorConstant::NAMED_VECTOR_ZERO: + return "vectorzero"; + case NamedVectorConstant::NAMED_VECTOR_X87_ONE: + return "x87_1_0"; + case NamedVectorConstant::NAMED_VECTOR_X87_LOG2_10: + return "x87_log2_10"; + case NamedVectorConstant::NAMED_VECTOR_X87_LOG2_E: + return "x87_log2_e"; + case NamedVectorConstant::NAMED_VECTOR_X87_PI: + return "x87_pi"; + case NamedVectorConstant::NAMED_VECTOR_X87_LOG10_2: + return "x87_log10_2"; + case NamedVectorConstant::NAMED_VECTOR_X87_LOG_2: + return "x87_log2"; + default: + return ""; } - case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_PADDSUBPD_INVERT_UPPER: { - *out << "addsubpd_invert_upper"; - break; - } - case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_MOVMSKPS_SHIFT: { - *out << "movmskps_shift"; - break; - } - case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_AESKEYGENASSIST_SWIZZLE: { - *out << "aeskeygenassist_swizzle"; - break; - } - case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO: { - *out << "vectorzero"; - break; - } - default: *out << ""; break; - } + // clang-format on + }(); } static void PrintArg(fextl::stringstream *out, [[maybe_unused]] IRListView const* IR, FEXCore::IR::OpSize Arg) { diff --git a/FEXCore/include/FEXCore/IR/IR.h b/FEXCore/include/FEXCore/IR/IR.h index a09c26885d..a6179b97af 100644 --- a/FEXCore/include/FEXCore/IR/IR.h +++ b/FEXCore/include/FEXCore/IR/IR.h @@ -59,6 +59,14 @@ enum NamedVectorConstant : uint8_t { NAMED_VECTOR_BLENDPS_1110B, NAMED_VECTOR_MOVMASKB, NAMED_VECTOR_MOVMASKB_UPPER, + + NAMED_VECTOR_X87_ONE, + NAMED_VECTOR_X87_LOG2_10, + NAMED_VECTOR_X87_LOG2_E, + NAMED_VECTOR_X87_PI, + NAMED_VECTOR_X87_LOG10_2, + NAMED_VECTOR_X87_LOG_2, + NAMED_VECTOR_CONST_POOL_MAX, // Beginning of named constants that don't have a constant pool backing. NAMED_VECTOR_ZERO = NAMED_VECTOR_CONST_POOL_MAX, diff --git a/unittests/InstructionCountCI/Crypto/H0F3A.json b/unittests/InstructionCountCI/Crypto/H0F3A.json index 238782658e..f427a56faa 100644 --- a/unittests/InstructionCountCI/Crypto/H0F3A.json +++ b/unittests/InstructionCountCI/Crypto/H0F3A.json @@ -55,7 +55,7 @@ "0x66 0x0f 0x3a 0xdf" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2112]", + "ldr q2, [x28, #2160]", "movi v3.2d, #0x0", "mov v16.16b, v17.16b", "unimplemented (Unimplemented)", @@ -68,7 +68,7 @@ "0x66 0x0f 0x3a 0xdf" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2112]", + "ldr q2, [x28, #2160]", "movi v3.2d, #0x0", "mov v16.16b, v17.16b", "unimplemented (Unimplemented)", diff --git a/unittests/InstructionCountCI/FEXOpt/libnss.json b/unittests/InstructionCountCI/FEXOpt/libnss.json index 9f114b9080..a70c7e8559 100644 --- a/unittests/InstructionCountCI/FEXOpt/libnss.json +++ b/unittests/InstructionCountCI/FEXOpt/libnss.json @@ -197,10 +197,10 @@ "ldr q3, [x11, #272]", "ldr q4, [x11]", "ldr q5, [x11, #16]", - "ldr x0, [x28, #1712]", + "ldr x0, [x28, #1760]", "ldr q6, [x0, #2832]", "tbl v2.16b, {v2.16b}, v6.16b", - "ldr x0, [x28, #1712]", + "ldr x0, [x28, #1760]", "ldr q7, [x0, #432]", "tbl v3.16b, {v3.16b}, v7.16b", "ldr q8, [x11, #32]", @@ -281,7 +281,7 @@ "mov v9.s[2], w25", "mov v9.s[1], w20", "mov v9.s[0], w22", - "ldr x0, [x28, #1712]", + "ldr x0, [x28, #1760]", "ldr q10, [x0, #224]", "tbl v4.16b, {v4.16b}, v10.16b", "mov w20, v9.s[1]", diff --git a/unittests/InstructionCountCI/FlagM/Secondary.json b/unittests/InstructionCountCI/FlagM/Secondary.json index 232921e309..4084ceb643 100644 --- a/unittests/InstructionCountCI/FlagM/Secondary.json +++ b/unittests/InstructionCountCI/FlagM/Secondary.json @@ -1612,7 +1612,7 @@ "Comment": "0x0f 0xd7", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "ldr d3, [x28, #2224]", + "ldr d3, [x28, #2272]", "cmlt v2.16b, v2.16b, #0", "and v2.16b, v2.16b, v3.16b", "addp v2.16b, v2.16b, v2.16b", diff --git a/unittests/InstructionCountCI/FlagM/Secondary_OpSize.json b/unittests/InstructionCountCI/FlagM/Secondary_OpSize.json index ce9df6ffec..24169c7eae 100644 --- a/unittests/InstructionCountCI/FlagM/Secondary_OpSize.json +++ b/unittests/InstructionCountCI/FlagM/Secondary_OpSize.json @@ -38,7 +38,7 @@ "ExpectedInstructionCount": 7, "Comment": "0x66 0x0f 0xd7", "ExpectedArm64ASM": [ - "ldr q2, [x28, #2224]", + "ldr q2, [x28, #2272]", "cmlt v3.16b, v16.16b, #0", "and v2.16b, v3.16b, v2.16b", "addp v2.16b, v2.16b, v2.16b", diff --git a/unittests/InstructionCountCI/FlagM/VEX_map1.json b/unittests/InstructionCountCI/FlagM/VEX_map1.json index bdcff553c1..8fe066e27b 100644 --- a/unittests/InstructionCountCI/FlagM/VEX_map1.json +++ b/unittests/InstructionCountCI/FlagM/VEX_map1.json @@ -72,7 +72,7 @@ "Map 1 0b01 0xd7 256-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2224]", + "ldr q2, [x28, #2272]", "cmlt v3.16b, v16.16b, #0", "and v2.16b, v3.16b, v2.16b", "addp v2.16b, v2.16b, v2.16b", diff --git a/unittests/InstructionCountCI/FlagM/x87.json b/unittests/InstructionCountCI/FlagM/x87.json index cbcc041829..fc64135471 100644 --- a/unittests/InstructionCountCI/FlagM/x87.json +++ b/unittests/InstructionCountCI/FlagM/x87.json @@ -4502,7 +4502,7 @@ ] }, "fld1": { - "ExpectedInstructionCount": 15, + "ExpectedInstructionCount": 12, "Comment": [ "0xd9 11b 0xe8 /5" ], @@ -4516,16 +4516,13 @@ "orr w21, w22, w21", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "mov x21, #0x8000000000000000", - "mov w22, #0x3fff", - "fmov d2, x21", - "mov v2.d[1], x22", + "ldr q2, [x28, #2304]", "add x0, x28, x20, lsl #4", "str q2, [x0, #768]" ] }, "fldl2t": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 12, "Comment": [ "0xd9 11b 0xe9 /5" ], @@ -4539,19 +4536,13 @@ "orr w21, w22, w21", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "mov x21, #0x8afe", - "movk x21, #0xcd1b, lsl #16", - "movk x21, #0x784b, lsl #32", - "movk x21, #0xd49a, lsl #48", - "mov w22, #0x4000", - "fmov d2, x21", - "mov v2.d[1], x22", + "ldr q2, [x28, #2320]", "add x0, x28, x20, lsl #4", "str q2, [x0, #768]" ] }, "fldl2e": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 12, "Comment": [ "0xd9 11b 0xea /5" ], @@ -4565,19 +4556,13 @@ "orr w21, w22, w21", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "mov x21, #0xf0bc", - "movk x21, #0x5c17, lsl #16", - "movk x21, #0x3b29, lsl #32", - "movk x21, #0xb8aa, lsl #48", - "mov w22, #0x3fff", - "fmov d2, x21", - "mov v2.d[1], x22", + "ldr q2, [x28, #2336]", "add x0, x28, x20, lsl #4", "str q2, [x0, #768]" ] }, "fldpi": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 12, "Comment": [ "0xd9 11b 0xeb /5" ], @@ -4591,19 +4576,13 @@ "orr w21, w22, w21", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "mov x21, #0xc235", - "movk x21, #0x2168, lsl #16", - "movk x21, #0xdaa2, lsl #32", - "movk x21, #0xc90f, lsl #48", - "mov w22, #0x4000", - "fmov d2, x21", - "mov v2.d[1], x22", + "ldr q2, [x28, #2352]", "add x0, x28, x20, lsl #4", "str q2, [x0, #768]" ] }, "fldlg2": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 12, "Comment": [ "0xd9 11b 0xec /5" ], @@ -4617,19 +4596,13 @@ "orr w21, w22, w21", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "mov x21, #0xf799", - "movk x21, #0xfbcf, lsl #16", - "movk x21, #0x9a84, lsl #32", - "movk x21, #0x9a20, lsl #48", - "mov w22, #0x3ffd", - "fmov d2, x21", - "mov v2.d[1], x22", + "ldr q2, [x28, #2368]", "add x0, x28, x20, lsl #4", "str q2, [x0, #768]" ] }, "fldln2": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 12, "Comment": [ "0xd9 11b 0xed /5" ], @@ -4643,19 +4616,13 @@ "orr w21, w22, w21", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "mov x21, #0x79ac", - "movk x21, #0xd1cf, lsl #16", - "movk x21, #0x17f7, lsl #32", - "movk x21, #0xb172, lsl #48", - "mov w22, #0x3ffe", - "fmov d2, x21", - "mov v2.d[1], x22", + "ldr q2, [x28, #2384]", "add x0, x28, x20, lsl #4", "str q2, [x0, #768]" ] }, "fldz": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 12, "Comment": [ "0xd9 11b 0xee /5" ], @@ -4669,9 +4636,7 @@ "orr w21, w22, w21", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "mov w21, #0x0", - "fmov d2, x21", - "mov v2.d[1], x21", + "movi v2.2d, #0x0", "add x0, x28, x20, lsl #4", "str q2, [x0, #768]" ] @@ -4771,7 +4736,7 @@ ] }, "fptan": { - "ExpectedInstructionCount": 49, + "ExpectedInstructionCount": 46, "Comment": [ "0xd9 11b 0xf2 /6" ], @@ -4815,10 +4780,7 @@ "eor v2.16b, v2.16b, v2.16b", "mov v2.d[0], x0", "mov v2.h[4], w1", - "mov x21, #0x8000000000000000", - "mov w23, #0x3fff", - "fmov d3, x21", - "mov v3.d[1], x23", + "ldr q3, [x28, #2304]", "mov w21, #0x0", "strb w21, [x28, #746]", "add x0, x28, x20, lsl #4", @@ -5082,7 +5044,7 @@ ] }, "fyl2xp1": { - "ExpectedInstructionCount": 79, + "ExpectedInstructionCount": 76, "Comment": [ "0xd9 11b 0xf9 /7" ], @@ -5100,10 +5062,7 @@ "ldr q2, [x0, #768]", "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "mov x20, #0x8000000000000000", - "mov w22, #0x3fff", - "fmov d4, x20", - "mov v4.d[1], x22", + "ldr q4, [x28, #2304]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", diff --git a/unittests/InstructionCountCI/H0F38.json b/unittests/InstructionCountCI/H0F38.json index 65009d764d..2fb8062559 100644 --- a/unittests/InstructionCountCI/H0F38.json +++ b/unittests/InstructionCountCI/H0F38.json @@ -624,7 +624,7 @@ "0x66 0x0f 0x38 0x41" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2000]", + "ldr q2, [x28, #2048]", "zip1 v3.8h, v2.8h, v17.8h", "zip2 v2.8h, v2.8h, v17.8h", "umin v2.4s, v3.4s, v2.4s", diff --git a/unittests/InstructionCountCI/H0F3A.json b/unittests/InstructionCountCI/H0F3A.json index d83ae4c04a..752bc3d179 100644 --- a/unittests/InstructionCountCI/H0F3A.json +++ b/unittests/InstructionCountCI/H0F3A.json @@ -315,7 +315,7 @@ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2128]", + "ldr q2, [x28, #2176]", "tbx v16.16b, {v17.16b}, v2.16b" ] }, @@ -325,7 +325,7 @@ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2144]", + "ldr q2, [x28, #2192]", "tbx v16.16b, {v17.16b}, v2.16b" ] }, @@ -344,7 +344,7 @@ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2160]", + "ldr q2, [x28, #2208]", "tbx v16.16b, {v17.16b}, v2.16b" ] }, @@ -364,7 +364,7 @@ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2176]", + "ldr q2, [x28, #2224]", "tbx v16.16b, {v17.16b}, v2.16b" ] }, @@ -383,7 +383,7 @@ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2192]", + "ldr q2, [x28, #2240]", "tbx v16.16b, {v17.16b}, v2.16b" ] }, @@ -393,7 +393,7 @@ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2208]", + "ldr q2, [x28, #2256]", "tbx v16.16b, {v17.16b}, v2.16b" ] }, @@ -462,7 +462,7 @@ "0x66 0x0f 0x3a 0x0e" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #1744]", + "ldr x0, [x28, #1792]", "ldr q2, [x0, #3440]", "tbx v16.16b, {v17.16b}, v2.16b" ] diff --git a/unittests/InstructionCountCI/PrimaryGroup.json b/unittests/InstructionCountCI/PrimaryGroup.json index bc6ada6e86..750f446f03 100644 --- a/unittests/InstructionCountCI/PrimaryGroup.json +++ b/unittests/InstructionCountCI/PrimaryGroup.json @@ -2868,7 +2868,7 @@ "mov x0, x6", "mov x1, x20", "mov x2, x7", - "ldr x3, [x28, #2288]", + "ldr x3, [x28, #2432]", "str x30, [sp, #-16]!", "blr x3", "ldr x30, [sp], #16", @@ -2879,7 +2879,7 @@ "mov x0, x6", "mov x1, x20", "mov x2, x7", - "ldr x3, [x28, #2304]", + "ldr x3, [x28, #2448]", "str x30, [sp, #-16]!", "blr x3", "ldr x30, [sp], #16", @@ -2940,7 +2940,7 @@ "mov x0, x6", "mov x1, x20", "mov x2, x7", - "ldr x3, [x28, #2296]", + "ldr x3, [x28, #2440]", "str x30, [sp, #-16]!", "blr x3", "ldr x30, [sp], #16", @@ -2953,7 +2953,7 @@ "mov x0, x6", "mov x1, x20", "mov x2, x7", - "ldr x3, [x28, #2312]", + "ldr x3, [x28, #2456]", "str x30, [sp, #-16]!", "blr x3", "ldr x30, [sp], #16", diff --git a/unittests/InstructionCountCI/Secondary.json b/unittests/InstructionCountCI/Secondary.json index b0bfd92f46..8978954d99 100644 --- a/unittests/InstructionCountCI/Secondary.json +++ b/unittests/InstructionCountCI/Secondary.json @@ -646,7 +646,7 @@ "Comment": "0x0f 0x50", "ExpectedArm64ASM": [ "ushr v2.4s, v16.4s, #31", - "ldr q3, [x28, #2096]", + "ldr q3, [x28, #2144]", "ushl v2.4s, v2.4s, v3.4s", "addv s2, v2.4s", "mov w4, v2.s[0]" @@ -657,7 +657,7 @@ "Comment": "0x0f 0x50", "ExpectedArm64ASM": [ "ushr v2.4s, v16.4s, #31", - "ldr q3, [x28, #2096]", + "ldr q3, [x28, #2144]", "ushl v2.4s, v2.4s, v3.4s", "addv s2, v2.4s", "mov w4, v2.s[0]" @@ -1041,7 +1041,7 @@ "Comment": "0x0f 0x70", "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", - "ldr x0, [x28, #1696]", + "ldr x0, [x28, #1744]", "ldr d3, [x0, #16]", "tbl v2.8b, {v2.16b}, v3.8b", "str d2, [x28, #768]" @@ -1052,7 +1052,7 @@ "Comment": "0x0f 0x70", "ExpectedArm64ASM": [ "ldr d2, [x4]", - "ldr x0, [x28, #1696]", + "ldr x0, [x28, #1744]", "ldr d3, [x0, #16]", "tbl v2.8b, {v2.16b}, v3.8b", "str d2, [x28, #768]" @@ -3306,7 +3306,7 @@ "ExpectedInstructionCount": 3, "Comment": "0x0f 0xc6", "ExpectedArm64ASM": [ - "ldr x0, [x28, #1720]", + "ldr x0, [x28, #1768]", "ldr q2, [x0, #16]", "tbl v16.16b, {v16.16b, v17.16b}, v2.16b" ] @@ -3315,7 +3315,7 @@ "ExpectedInstructionCount": 5, "Comment": "0x0f 0xc6", "ExpectedArm64ASM": [ - "ldr x0, [x28, #1720]", + "ldr x0, [x28, #1768]", "ldr q2, [x0, #16]", "mov v0.16b, v17.16b", "mov v1.16b, v16.16b", @@ -3327,7 +3327,7 @@ "Comment": "0x0f 0xc6", "ExpectedArm64ASM": [ "ldr q2, [x4]", - "ldr x0, [x28, #1720]", + "ldr x0, [x28, #1768]", "ldr q3, [x0, #16]", "mov v0.16b, v16.16b", "mov v1.16b, v2.16b", @@ -3425,7 +3425,7 @@ "Comment": "0x0f 0xd7", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "ldr d3, [x28, #2224]", + "ldr d3, [x28, #2272]", "cmlt v2.16b, v2.16b, #0", "and v2.16b, v2.16b, v3.16b", "addp v2.16b, v2.16b, v2.16b", diff --git a/unittests/InstructionCountCI/Secondary_OpSize.json b/unittests/InstructionCountCI/Secondary_OpSize.json index 04f5f7f531..2d3b36f05d 100644 --- a/unittests/InstructionCountCI/Secondary_OpSize.json +++ b/unittests/InstructionCountCI/Secondary_OpSize.json @@ -522,7 +522,7 @@ "0x66 0x0f 0x70" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #1712]", + "ldr x0, [x28, #1760]", "ldr q2, [x0, #16]", "tbl v16.16b, {v17.16b}, v2.16b" ] @@ -536,7 +536,7 @@ ], "ExpectedArm64ASM": [ "ldr q2, [x4]", - "ldr x0, [x28, #1712]", + "ldr x0, [x28, #1760]", "ldr q3, [x0, #16]", "tbl v16.16b, {v2.16b}, v3.16b" ] @@ -1014,7 +1014,7 @@ "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0xd0", "ExpectedArm64ASM": [ - "ldr q2, [x28, #2064]", + "ldr q2, [x28, #2112]", "eor v2.16b, v17.16b, v2.16b", "fadd v16.2d, v16.2d, v2.2d" ] @@ -1070,7 +1070,7 @@ "ExpectedInstructionCount": 7, "Comment": "0x66 0x0f 0xd7", "ExpectedArm64ASM": [ - "ldr q2, [x28, #2224]", + "ldr q2, [x28, #2272]", "cmlt v3.16b, v16.16b, #0", "and v2.16b, v3.16b, v2.16b", "addp v2.16b, v2.16b, v2.16b", diff --git a/unittests/InstructionCountCI/Secondary_REP.json b/unittests/InstructionCountCI/Secondary_REP.json index bbee4a15cf..3f38dc715e 100644 --- a/unittests/InstructionCountCI/Secondary_REP.json +++ b/unittests/InstructionCountCI/Secondary_REP.json @@ -354,7 +354,7 @@ "0xf3 0x0f 0x70" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #1704]", + "ldr x0, [x28, #1752]", "ldr q2, [x0, #16]", "tbl v16.16b, {v17.16b}, v2.16b" ] diff --git a/unittests/InstructionCountCI/Secondary_REPNE.json b/unittests/InstructionCountCI/Secondary_REPNE.json index 08665778b0..d1481c81e9 100644 --- a/unittests/InstructionCountCI/Secondary_REPNE.json +++ b/unittests/InstructionCountCI/Secondary_REPNE.json @@ -296,7 +296,7 @@ "0xf2 0x0f 0x70" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #1696]", + "ldr x0, [x28, #1744]", "ldr q2, [x0, #16]", "tbl v16.16b, {v17.16b}, v2.16b" ] @@ -452,7 +452,7 @@ "ExpectedInstructionCount": 3, "Comment": "0xf2 0x0f 0xd0", "ExpectedArm64ASM": [ - "ldr q2, [x28, #2032]", + "ldr q2, [x28, #2080]", "eor v2.16b, v17.16b, v2.16b", "fadd v16.4s, v16.4s, v2.4s" ] diff --git a/unittests/InstructionCountCI/VEX_map1.json b/unittests/InstructionCountCI/VEX_map1.json index 62351da972..395e02f157 100644 --- a/unittests/InstructionCountCI/VEX_map1.json +++ b/unittests/InstructionCountCI/VEX_map1.json @@ -2755,7 +2755,7 @@ "Map 1 0b00 0xC6 128-bit" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #1720]", + "ldr x0, [x28, #1768]", "ldr q2, [x0, #16]", "tbl v16.16b, {v17.16b, v18.16b}, v2.16b" ] @@ -2824,7 +2824,7 @@ "Map 1 0b00 0xC6 128-bit" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #1720]", + "ldr x0, [x28, #1768]", "ldr q2, [x0, #32]", "tbl v16.16b, {v17.16b, v18.16b}, v2.16b" ] @@ -2893,7 +2893,7 @@ "Map 1 0b00 0xC6 128-bit" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #1720]", + "ldr x0, [x28, #1768]", "ldr q2, [x0, #48]", "tbl v16.16b, {v17.16b, v18.16b}, v2.16b" ] @@ -4338,7 +4338,7 @@ "Map 1 0b01 0xd0 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2064]", + "ldr q2, [x28, #2112]", "eor v2.16b, v18.16b, v2.16b", "fadd v16.2d, v17.2d, v2.2d" ] @@ -4361,7 +4361,7 @@ "Map 1 0b11 0xd0 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2032]", + "ldr q2, [x28, #2080]", "eor v2.16b, v18.16b, v2.16b", "fadd v16.4s, v17.4s, v2.4s" ] @@ -4498,7 +4498,7 @@ "Map 1 0b01 0xd7 256-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2224]", + "ldr q2, [x28, #2272]", "cmlt v3.16b, v16.16b, #0", "and v2.16b, v3.16b, v2.16b", "addp v2.16b, v2.16b, v2.16b", diff --git a/unittests/InstructionCountCI/VEX_map2.json b/unittests/InstructionCountCI/VEX_map2.json index dd6c291801..b2ccc16d47 100644 --- a/unittests/InstructionCountCI/VEX_map2.json +++ b/unittests/InstructionCountCI/VEX_map2.json @@ -1575,7 +1575,7 @@ "Map 2 0b01 0x41 256-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2000]", + "ldr q2, [x28, #2048]", "zip1 v3.8h, v2.8h, v17.8h", "zip2 v2.8h, v2.8h, v17.8h", "umin v2.4s, v3.4s, v2.4s", diff --git a/unittests/InstructionCountCI/VEX_map3.json b/unittests/InstructionCountCI/VEX_map3.json index bcb21cc5cb..2a58bbef8b 100644 --- a/unittests/InstructionCountCI/VEX_map3.json +++ b/unittests/InstructionCountCI/VEX_map3.json @@ -4799,7 +4799,7 @@ "Map 3 0b01 0xdf 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2112]", + "ldr q2, [x28, #2160]", "movi v3.2d, #0x0", "mov v16.16b, v17.16b", "unimplemented (Unimplemented)", @@ -4812,7 +4812,7 @@ "Map 3 0b01 0xdf 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2112]", + "ldr q2, [x28, #2160]", "movi v3.2d, #0x0", "mov v16.16b, v17.16b", "unimplemented (Unimplemented)", diff --git a/unittests/InstructionCountCI/x87.json b/unittests/InstructionCountCI/x87.json index dbf0ab8339..224a3be8ce 100644 --- a/unittests/InstructionCountCI/x87.json +++ b/unittests/InstructionCountCI/x87.json @@ -4501,7 +4501,7 @@ ] }, "fld1": { - "ExpectedInstructionCount": 15, + "ExpectedInstructionCount": 12, "Comment": [ "0xd9 11b 0xe8 /5" ], @@ -4515,16 +4515,13 @@ "orr w21, w22, w21", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "mov x21, #0x8000000000000000", - "mov w22, #0x3fff", - "fmov d2, x21", - "mov v2.d[1], x22", + "ldr q2, [x28, #2304]", "add x0, x28, x20, lsl #4", "str q2, [x0, #768]" ] }, "fldl2t": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 12, "Comment": [ "0xd9 11b 0xe9 /5" ], @@ -4538,19 +4535,13 @@ "orr w21, w22, w21", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "mov x21, #0x8afe", - "movk x21, #0xcd1b, lsl #16", - "movk x21, #0x784b, lsl #32", - "movk x21, #0xd49a, lsl #48", - "mov w22, #0x4000", - "fmov d2, x21", - "mov v2.d[1], x22", + "ldr q2, [x28, #2320]", "add x0, x28, x20, lsl #4", "str q2, [x0, #768]" ] }, "fldl2e": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 12, "Comment": [ "0xd9 11b 0xea /5" ], @@ -4564,19 +4555,13 @@ "orr w21, w22, w21", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "mov x21, #0xf0bc", - "movk x21, #0x5c17, lsl #16", - "movk x21, #0x3b29, lsl #32", - "movk x21, #0xb8aa, lsl #48", - "mov w22, #0x3fff", - "fmov d2, x21", - "mov v2.d[1], x22", + "ldr q2, [x28, #2336]", "add x0, x28, x20, lsl #4", "str q2, [x0, #768]" ] }, "fldpi": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 12, "Comment": [ "0xd9 11b 0xeb /5" ], @@ -4590,19 +4575,13 @@ "orr w21, w22, w21", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "mov x21, #0xc235", - "movk x21, #0x2168, lsl #16", - "movk x21, #0xdaa2, lsl #32", - "movk x21, #0xc90f, lsl #48", - "mov w22, #0x4000", - "fmov d2, x21", - "mov v2.d[1], x22", + "ldr q2, [x28, #2352]", "add x0, x28, x20, lsl #4", "str q2, [x0, #768]" ] }, "fldlg2": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 12, "Comment": [ "0xd9 11b 0xec /5" ], @@ -4616,19 +4595,13 @@ "orr w21, w22, w21", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "mov x21, #0xf799", - "movk x21, #0xfbcf, lsl #16", - "movk x21, #0x9a84, lsl #32", - "movk x21, #0x9a20, lsl #48", - "mov w22, #0x3ffd", - "fmov d2, x21", - "mov v2.d[1], x22", + "ldr q2, [x28, #2368]", "add x0, x28, x20, lsl #4", "str q2, [x0, #768]" ] }, "fldln2": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 12, "Comment": [ "0xd9 11b 0xed /5" ], @@ -4642,19 +4615,13 @@ "orr w21, w22, w21", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "mov x21, #0x79ac", - "movk x21, #0xd1cf, lsl #16", - "movk x21, #0x17f7, lsl #32", - "movk x21, #0xb172, lsl #48", - "mov w22, #0x3ffe", - "fmov d2, x21", - "mov v2.d[1], x22", + "ldr q2, [x28, #2384]", "add x0, x28, x20, lsl #4", "str q2, [x0, #768]" ] }, "fldz": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 12, "Comment": [ "0xd9 11b 0xee /5" ], @@ -4668,9 +4635,7 @@ "orr w21, w22, w21", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "mov w21, #0x0", - "fmov d2, x21", - "mov v2.d[1], x21", + "movi v2.2d, #0x0", "add x0, x28, x20, lsl #4", "str q2, [x0, #768]" ] @@ -4770,7 +4735,7 @@ ] }, "fptan": { - "ExpectedInstructionCount": 49, + "ExpectedInstructionCount": 46, "Comment": [ "0xd9 11b 0xf2 /6" ], @@ -4814,10 +4779,7 @@ "eor v2.16b, v2.16b, v2.16b", "mov v2.d[0], x0", "mov v2.h[4], w1", - "mov x21, #0x8000000000000000", - "mov w23, #0x3fff", - "fmov d3, x21", - "mov v3.d[1], x23", + "ldr q3, [x28, #2304]", "mov w21, #0x0", "strb w21, [x28, #746]", "add x0, x28, x20, lsl #4", @@ -5081,7 +5043,7 @@ ] }, "fyl2xp1": { - "ExpectedInstructionCount": 79, + "ExpectedInstructionCount": 76, "Comment": [ "0xd9 11b 0xf9 /7" ], @@ -5099,10 +5061,7 @@ "ldr q2, [x0, #768]", "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "mov x20, #0x8000000000000000", - "mov w22, #0x3fff", - "fmov d4, x20", - "mov v4.d[1], x22", + "ldr q4, [x28, #2304]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]",