From 1465df874b6c20643a0b3c75c7f36ff0d5c10505 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Sun, 24 Nov 2024 00:34:31 -0800 Subject: [PATCH 1/3] OpcodeDispatcher: Fixes 80-bit loads Ensures reads don't go past the end of the page boundary. SVE masked loads can make this more effective but `VLoadVectorMasked` isn't setup to be efficient for this case yet. --- FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp | 8 ++++++++ FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index fab35fb0b1..abb3197a91 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -4328,6 +4328,14 @@ Ref OpDispatchBuilder::LoadSource_WithOpSize(RegisterClassType Class, const X86T } if ((IsOperandMem(Operand, true) && LoadData) || ForceLoad) { + if (OpSize == OpSize::f80Bit) { + Ref MemSrc = LoadEffectiveAddress(A, true); + + // For X87 extended doubles, Split the load. + auto Res = _LoadMem(Class, OpSize::i64Bit, MemSrc, Align == OpSize::iInvalid ? OpSize : Align); + return _VLoadVectorElement(OpSize::i128Bit, OpSize::i16Bit, Res, 4, _Add(OpSize::i64Bit, MemSrc, _InlineConstant(8))); + } + return _LoadMemAutoTSO(Class, OpSize, A, Align == OpSize::iInvalid ? OpSize : Align); } else { return LoadEffectiveAddress(A, false, AllowUpperGarbage); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp index b165dd1dfb..e325eb1d84 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp @@ -63,7 +63,7 @@ void OpDispatchBuilder::SetX87Top(Ref Value) { void OpDispatchBuilder::FLD(OpcodeArgs, IR::OpSize Width) { const auto ReadWidth = (Width == OpSize::f80Bit) ? OpSize::i128Bit : Width; - Ref Data = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], ReadWidth, Op->Flags); + Ref Data = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], Width, Op->Flags); Ref ConvertedData = Data; // Convert to 80bit float if (Width == OpSize::i32Bit || Width == OpSize::i64Bit) { @@ -79,7 +79,7 @@ void OpDispatchBuilder::FLDFromStack(OpcodeArgs) { void OpDispatchBuilder::FBLD(OpcodeArgs) { // Read from memory - Ref Data = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], OpSize::i128Bit, Op->Flags); + Ref Data = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], OpSize::f80Bit, Op->Flags); Ref ConvertedData = _F80BCDLoad(Data); _PushStack(ConvertedData, Data, OpSize::i128Bit, true); } From c84503c271c8dc6b6b79198d1539f6db197d6ed5 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Sun, 24 Nov 2024 00:05:27 -0800 Subject: [PATCH 2/3] unittests/ASM: Adds x87 loadstore tests for edge of pages These tests ensure that FEX's x87 80-bit loadstores don't read/write past the end of the page. --- unittests/ASM/X87/LoadAtBoundary.asm | 32 ++++++++++++++++++++++++++ unittests/ASM/X87/StoreAtBoundary.asm | 33 +++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 unittests/ASM/X87/LoadAtBoundary.asm create mode 100644 unittests/ASM/X87/StoreAtBoundary.asm diff --git a/unittests/ASM/X87/LoadAtBoundary.asm b/unittests/ASM/X87/LoadAtBoundary.asm new file mode 100644 index 0000000000..55fb09d104 --- /dev/null +++ b/unittests/ASM/X87/LoadAtBoundary.asm @@ -0,0 +1,32 @@ +%ifdef CONFIG +{ + "RegData": { + "MM7": ["0x5354555657584142", "0x0000000000005152"], + "MM6": ["0xe94de5eae34fc1c0", "0x0000000000004039"] + }, + "MemoryRegions": { + "0x100000000": "4096" + } +} +%endif + +finit ; enters x87 state + +mov rax, 0x100000000 +mov rbx, 0x4142434445464748 +mov rcx, 0x5152535455565758 +mov rdx, (0x100000000 + 0x1000 - 16) + +mov [rdx], rbx +mov [rdx + 8], rcx + +mov rdx, 0x100000000 + 0x1000 + +; Do an 80-bit load at the edge of a page. +; Ensuring tword loads don't extend past the end of a page. +fld tword [rdx - 10] + +; Do an 80-bit BCD load at the edge of a page. +fbld [rdx - 10] + +hlt diff --git a/unittests/ASM/X87/StoreAtBoundary.asm b/unittests/ASM/X87/StoreAtBoundary.asm new file mode 100644 index 0000000000..d78710936f --- /dev/null +++ b/unittests/ASM/X87/StoreAtBoundary.asm @@ -0,0 +1,33 @@ +%ifdef CONFIG +{ + "MemoryRegions": { + "0x100000000": "4096" + } +} +%endif + +finit ; enters x87 state + +mov rax, 0x100000000 +mov rbx, 0x4142434445464748 +mov rcx, 0x5152535455565758 +mov rdx, (0x100000000 + 0x1000 - 16) + +mov [rdx], rbx +mov [rdx + 8], rcx + +mov rdx, 0x100000000 + 0x1000 + +; Load the data in to an x87 register for storing. +fld tword [rdx - 16] +fld tword [rdx - 16] + +; Do an 80-bit store at the edge of a page. +; Ensuring tword stores don't extend past the end of a page. +; If storing past the end of the page, then an unhandled SIGSEGV will occur. +fstp tword [rdx - 10] + +; Do an 80-bit bcd store at the edge of a page. +fbstp [rdx - 10] + +hlt From f0d25d413bebea01ed6032688bca72f2f5d62d02 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Sun, 24 Nov 2024 00:38:54 -0800 Subject: [PATCH 3/3] InstcounCI: Update --- unittests/InstructionCountCI/FlagM/x87.json | 12 ++++++++---- unittests/InstructionCountCI/x87.json | 12 ++++++++---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/unittests/InstructionCountCI/FlagM/x87.json b/unittests/InstructionCountCI/FlagM/x87.json index 8d4974144b..c1aef6b98b 100644 --- a/unittests/InstructionCountCI/FlagM/x87.json +++ b/unittests/InstructionCountCI/FlagM/x87.json @@ -6869,12 +6869,14 @@ ] }, "fld tword [rax]": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb !11b /5" ], "ExpectedArm64ASM": [ - "ldr q2, [x4]", + "ldr d2, [x4]", + "add x20, x4, #0x8 (8)", + "ld1 {v2.h}[4], [x20]", "ldrb w20, [x28, #1019]", "mov w21, #0x1", "sub w20, w20, #0x1 (1)", @@ -16690,12 +16692,14 @@ ] }, "fbld tword [rax]": { - "ExpectedInstructionCount": 40, + "ExpectedInstructionCount": 42, "Comment": [ "0xdf !11b /4" ], "ExpectedArm64ASM": [ - "ldr q2, [x4]", + "ldr d2, [x4]", + "add x20, x4, #0x8 (8)", + "ld1 {v2.h}[4], [x20]", "mrs x0, nzcv", "str w0, [x28, #1000]", "stp x4, x7, [x28, #280]", diff --git a/unittests/InstructionCountCI/x87.json b/unittests/InstructionCountCI/x87.json index 665ec21db4..ec354fbc47 100644 --- a/unittests/InstructionCountCI/x87.json +++ b/unittests/InstructionCountCI/x87.json @@ -6868,12 +6868,14 @@ ] }, "fld tword [rax]": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb !11b /5" ], "ExpectedArm64ASM": [ - "ldr q2, [x4]", + "ldr d2, [x4]", + "add x20, x4, #0x8 (8)", + "ld1 {v2.h}[4], [x20]", "ldrb w20, [x28, #1019]", "mov w21, #0x1", "sub w20, w20, #0x1 (1)", @@ -16532,12 +16534,14 @@ ] }, "fbld tword [rax]": { - "ExpectedInstructionCount": 40, + "ExpectedInstructionCount": 42, "Comment": [ "0xdf !11b /4" ], "ExpectedArm64ASM": [ - "ldr q2, [x4]", + "ldr d2, [x4]", + "add x20, x4, #0x8 (8)", + "ld1 {v2.h}[4], [x20]", "mrs x0, nzcv", "str w0, [x28, #1000]", "stp x4, x7, [x28, #280]",