Skip to content

Commit

Permalink
Merge pull request #3775 from Sonicadvance1/avx_bugfixes
Browse files Browse the repository at this point in the history
AVX128: Some quick bugfixes
  • Loading branch information
Sonicadvance1 authored Jun 28, 2024
2 parents 4e5da49 + 98d62a7 commit 739ac0f
Show file tree
Hide file tree
Showing 12 changed files with 212 additions and 121 deletions.
28 changes: 22 additions & 6 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -750,7 +750,7 @@ void OpDispatchBuilder::AVX128_VectorShiftImmImpl(OpcodeArgs, size_t ElementSize
Result.Low = Low;

if (!Is128Bit) {
DeriveOp(High, IROp, _VUShrI(OpSize::i128Bit, ElementSize, Src.Low, ShiftConstant));
DeriveOp(High, IROp, _VUShrI(OpSize::i128Bit, ElementSize, Src.High, ShiftConstant));
Result.High = High;
}
}
Expand Down Expand Up @@ -1980,13 +1980,29 @@ void OpDispatchBuilder::AVX128_VPMADDWD(OpcodeArgs) {

template<size_t ElementSize>
void OpDispatchBuilder::AVX128_VBLEND(OpcodeArgs) {
const auto SrcSize = GetSrcSize(Op);
const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;
const uint64_t Selector = Op->Src[2].Literal();

///< TODO: VBLEND implementation can be more optimal.
AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), ElementSize, [this, Selector](size_t _ElementSize, Ref Src1, Ref Src2) {
auto ZeroRegister = LoadZeroVector(OpSize::i128Bit);
return VBLENDOpImpl(OpSize::i128Bit, ElementSize, Src1, Src2, ZeroRegister, Selector);
});
///< High Selector shift depends on element size:
/// i16Bit: Reuses same bits, no shift
/// i32Bit: Shift by 4
/// i64Bit: Shift by 2
constexpr uint64_t SelectorShift = ElementSize == OpSize::i64Bit ? 2 : ElementSize == OpSize::i32Bit ? 4 : 0;

auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);
auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, !Is128Bit);

RefPair Result {};
auto ZeroRegister = LoadZeroVector(OpSize::i128Bit);
Result.Low = VBLENDOpImpl(OpSize::i128Bit, ElementSize, Src1.Low, Src2.Low, ZeroRegister, Selector);

if (Is128Bit) {
Result.High = ZeroRegister;
} else {
Result.High = VBLENDOpImpl(OpSize::i128Bit, ElementSize, Src1.High, Src2.High, ZeroRegister, Selector >> SelectorShift);
}
AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}

template<size_t ElementSize>
Expand Down
6 changes: 5 additions & 1 deletion unittests/ASM/VEX/vblendps.asm
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
"XMM6": ["0x11111111BBBBBBBB", "0x33333333DDDDDDDD", "0x55555555FFFFFFFF", "0x7777777788888888"],
"XMM7": ["0xAAAAAAAA22222222", "0xCCCCCCCC44444444", "0xEEEEEEEE66666666", "0x9999999988888888"],
"XMM8": ["0x11111111BBBBBBBB", "0x33333333DDDDDDDD", "0x0000000000000000", "0x0000000000000000"],
"XMM9": ["0xAAAAAAAA22222222", "0xCCCCCCCC44444444", "0x0000000000000000", "0x0000000000000000"]
"XMM9": ["0xAAAAAAAA22222222", "0xCCCCCCCC44444444", "0x0000000000000000", "0x0000000000000000"],
"XMM10": ["0x1111111122222222", "0xccccccccdddddddd", "0xeeeeeeeeffffffff", "0x9999999988888888"]
},
"MemoryRegions": {
"0x100000000": "4096"
Expand All @@ -36,6 +37,9 @@ vblendps ymm7, ymm0, ymm1, 0b01010101
vblendps xmm8, xmm0, xmm1, 0b10101010
vblendps xmm9, xmm0, xmm1, 0b01010101

; Different sources between upper and lower selectors
vblendps ymm10, ymm0, ymm1, 0x3

hlt

align 32
Expand Down
9 changes: 8 additions & 1 deletion unittests/ASM/VEX/vpslld_imm.asm
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
"XMM6": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM7": ["0x6364000067680000", "0x7374000077780000", "0x6364000067680000", "0x7374000077780000"],
"XMM8": ["0x4243440046474800", "0x5253540056575800", "0x4243440046474800", "0x5253540056575800"],
"XMM9": ["0xC2C4C6C8CACCCED0", "0xE2E4E6E8EAECEEF0", "0xC2C4C6C8CACCCED0", "0xE2E4E6E8EAECEEF0"]
"XMM9": ["0xC2C4C6C8CACCCED0", "0xE2E4E6E8EAECEEF0", "0xC2C4C6C8CACCCED0", "0xE2E4E6E8EAECEEF0"],
"XMM10": ["0x848688008c8e9000", "0xa4a6a800acaeb000", "0xc4c6c800ccced000", "0xe4e6e800eceef000"]
},
"MemoryRegions": {
"0x100000000": "4096"
Expand All @@ -21,6 +22,7 @@ lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]
vmovapd ymm10, [rel .data2]

vpslld xmm2, xmm0, 32
vpslld xmm3, xmm1, 16
Expand All @@ -32,6 +34,8 @@ vpslld ymm7, ymm1, 16
vpslld ymm8, ymm0, 8
vpslld ymm9, ymm1, 1

vpslld ymm10, ymm10, 0x9

hlt

align 32
Expand All @@ -45,3 +49,6 @@ dq 0x6162636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778

.data2:
dq 0x4142434445464748, 0x5152535455565758, 0x6162636465666768, 0x7172737475767778
9 changes: 8 additions & 1 deletion unittests/ASM/VEX/vpsllq_imm.asm
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
"XMM6": ["0x4546474800000000", "0x5556575800000000", "0x4546474800000000", "0x5556575800000000"],
"XMM7": ["0x6364656667680000", "0x7374757677780000", "0x6364656667680000", "0x7374757677780000"],
"XMM8": ["0x4243444546474800", "0x5253545556575800", "0x4243444546474800", "0x5253545556575800"],
"XMM9": ["0xC2C4C6C8CACCCED0", "0xE2E4E6E8EAECEEF0", "0xC2C4C6C8CACCCED0", "0xE2E4E6E8EAECEEF0"]
"XMM9": ["0xC2C4C6C8CACCCED0", "0xE2E4E6E8EAECEEF0", "0xC2C4C6C8CACCCED0", "0xE2E4E6E8EAECEEF0"],
"XMM10": ["0x8486888a8c8e9000", "0xa4a6a8aaacaeb000", "0xc4c6c8caccced000", "0xe4e6e8eaeceef000"]
},
"MemoryRegions": {
"0x100000000": "4096"
Expand All @@ -21,6 +22,7 @@ lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]
vmovapd ymm10, [rel .data2]

vpsllq xmm2, xmm0, 32
vpsllq xmm3, xmm1, 16
Expand All @@ -32,6 +34,8 @@ vpsllq ymm7, ymm1, 16
vpsllq ymm8, ymm0, 8
vpsllq ymm9, ymm1, 1

vpsllq ymm10, ymm10, 0x9

hlt

align 32
Expand All @@ -45,3 +49,6 @@ dq 0x6162636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778

.data2:
dq 0x4142434445464748, 0x5152535455565758, 0x6162636465666768, 0x7172737475767778
9 changes: 8 additions & 1 deletion unittests/ASM/VEX/vpsllw_imm.asm
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
"XMM6": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM7": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM8": ["0x4200440046004800", "0x5200540056005800", "0x4200440046004800", "0x5200540056005800"],
"XMM9": ["0xC2C4C6C8CACCCED0", "0xE2E4E6E8EAECEEF0", "0xC2C4C6C8CACCCED0", "0xE2E4E6E8EAECEEF0"]
"XMM9": ["0xC2C4C6C8CACCCED0", "0xE2E4E6E8EAECEEF0", "0xC2C4C6C8CACCCED0", "0xE2E4E6E8EAECEEF0"],
"XMM10": ["0x840088008c009000", "0xa400a800ac00b000", "0xc400c800cc00d000", "0xe400e800ec00f000"]
},
"MemoryRegions": {
"0x100000000": "4096"
Expand All @@ -21,6 +22,7 @@ lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]
vmovapd ymm10, [rel .data2]

vpsllw xmm2, xmm0, 32
vpsllw xmm3, xmm1, 16
Expand All @@ -32,6 +34,8 @@ vpsllw ymm7, ymm1, 16
vpsllw ymm8, ymm0, 8
vpsllw ymm9, ymm1, 1

vpsllw ymm10, ymm10, 0x9

hlt

align 32
Expand All @@ -45,3 +49,6 @@ dq 0x6162636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778

.data2:
dq 0x4142434445464748, 0x5152535455565758, 0x6162636465666768, 0x7172737475767778
9 changes: 8 additions & 1 deletion unittests/ASM/VEX/vpsrad_imm.asm
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
"XMM9": ["0x0000616200006566", "0x0000717200007576", "0x0000616200006566", "0x0000717200007576"],
"XMM10": ["0x0041424300454647", "0x0051525300555657", "0x0041424300454647", "0x0051525300555657"],
"XMM11": ["0x30B131B232B333B4", "0x38B939BA3ABB3BBC", "0x30B131B232B333B4", "0x38B939BA3ABB3BBC"],
"XMM12": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000"]
"XMM12": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000"],
"XMM13": ["0x0020a1210022a323", "0x0028a929002aab2b", "0x0030b1310032b333", "0x0038b939003abb3b"]
},
"MemoryRegions": {
"0x100000000": "4096"
Expand All @@ -24,6 +25,7 @@ lea rdx, [rel .data]
vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm13, [rel .data2]

vpsrad xmm3, xmm0, 32
vpsrad xmm4, xmm1, 16
Expand All @@ -37,6 +39,8 @@ vpsrad ymm10, ymm0, 8
vpsrad ymm11, ymm1, 1
vpsrad ymm12, ymm2, 32

vpsrad ymm13, ymm13, 0x9

hlt

align 32
Expand All @@ -55,3 +59,6 @@ dq 0x8000800080008000
dq 0x7000700070007000
dq 0x8000800080008000
dq 0x7000700070007000

.data2:
dq 0x4142434445464748, 0x5152535455565758, 0x6162636465666768, 0x7172737475767778
8 changes: 7 additions & 1 deletion unittests/ASM/VEX/vpsraw_imm.asm
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
"XMM9": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM10": ["0x0041004300450047", "0x0051005300550057", "0x0041004300450047", "0x0051005300550057"],
"XMM11": ["0x30B131B232B333B4", "0x38B939BA3ABB3BBC", "0x30B131B232B333B4", "0x38B939BA3ABB3BBC"],
"XMM12": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000"]
"XMM12": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000"],
"XMM13": ["0x0020002100220023", "0x00280029002a002b", "0x0030003100320033", "0x00380039003a003b"]
},
"MemoryRegions": {
"0x100000000": "4096"
Expand All @@ -24,6 +25,7 @@ lea rdx, [rel .data]
vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm13, [rel .data2]

vpsraw xmm3, xmm0, 32
vpsraw xmm4, xmm1, 16
Expand All @@ -36,6 +38,7 @@ vpsraw ymm9, ymm1, 16
vpsraw ymm10, ymm0, 8
vpsraw ymm11, ymm1, 1
vpsraw ymm12, ymm2, 16
vpsraw ymm13, ymm13, 0x9

hlt

Expand All @@ -55,3 +58,6 @@ dq 0x8000800080008000
dq 0x7000700070007000
dq 0x8000800080008000
dq 0x7000700070007000

.data2:
dq 0x4142434445464748, 0x5152535455565758, 0x6162636465666768, 0x7172737475767778
9 changes: 8 additions & 1 deletion unittests/ASM/VEX/vpsrld_imm.asm
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
"XMM6": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM7": ["0x0000616200006566", "0x0000717200007576", "0x0000616200006566", "0x0000717200007576"],
"XMM8": ["0x0041424300454647", "0x0051525300555657", "0x0041424300454647", "0x0051525300555657"],
"XMM9": ["0x30B131B232B333B4", "0x38B939BA3ABB3BBC", "0x30B131B232B333B4", "0x38B939BA3ABB3BBC"]
"XMM9": ["0x30B131B232B333B4", "0x38B939BA3ABB3BBC", "0x30B131B232B333B4", "0x38B939BA3ABB3BBC"],
"XMM10": ["0x0020a1210022a323", "0x0028a929002aab2b", "0x0030b1310032b333", "0x0038b939003abb3b"]
},
"MemoryRegions": {
"0x100000000": "4096"
Expand All @@ -21,6 +22,7 @@ lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]
vmovapd ymm10, [rel .data2]

vpsrld xmm2, xmm0, 32
vpsrld xmm3, xmm1, 16
Expand All @@ -32,6 +34,8 @@ vpsrld ymm7, ymm1, 16
vpsrld ymm8, ymm0, 8
vpsrld ymm9, ymm1, 1

vpsrld ymm10, ymm10, 0x9

hlt

align 32
Expand All @@ -45,3 +49,6 @@ dq 0x6162636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778

.data2:
dq 0x4142434445464748, 0x5152535455565758, 0x6162636465666768, 0x7172737475767778
9 changes: 8 additions & 1 deletion unittests/ASM/VEX/vpsrlq_imm.asm
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
"XMM8": ["0x0000000041424344", "0x0000000051525354", "0x0000000041424344", "0x0000000051525354"],
"XMM9": ["0x0000616263646566", "0x0000717273747576", "0x0000616263646566", "0x0000717273747576"],
"XMM10": ["0x0041424344454647", "0x0051525354555657", "0x0041424344454647", "0x0051525354555657"],
"XMM11": ["0x30B131B232B333B4", "0x38B939BA3ABB3BBC", "0x30B131B232B333B4", "0x38B939BA3ABB3BBC"]
"XMM11": ["0x30B131B232B333B4", "0x38B939BA3ABB3BBC", "0x30B131B232B333B4", "0x38B939BA3ABB3BBC"],
"XMM12": ["0x0020a121a222a323", "0x0028a929aa2aab2b", "0x0030b131b232b333", "0x0038b939ba3abb3b"]
},
"MemoryRegions": {
"0x100000000": "4096"
Expand All @@ -23,6 +24,7 @@ lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]
vmovapd ymm12, [rel .data2]

vpsrlq xmm2, xmm0, 64
vpsrlq xmm3, xmm0, 32
Expand All @@ -36,6 +38,8 @@ vpsrlq ymm9, ymm1, 16
vpsrlq ymm10, ymm0, 8
vpsrlq ymm11, ymm1, 1

vpsrlq ymm12, ymm12, 0x9

hlt

align 32
Expand All @@ -49,3 +53,6 @@ dq 0x6162636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778

.data2:
dq 0x4142434445464748, 0x5152535455565758, 0x6162636465666768, 0x7172737475767778
9 changes: 8 additions & 1 deletion unittests/ASM/VEX/vpsrlw_imm.asm
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
"XMM6": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM7": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM8": ["0x0041004300450047", "0x0051005300550057", "0x0041004300450047", "0x0051005300550057"],
"XMM9": ["0x30B131B232B333B4", "0x38B939BA3ABB3BBC", "0x30B131B232B333B4", "0x38B939BA3ABB3BBC"]
"XMM9": ["0x30B131B232B333B4", "0x38B939BA3ABB3BBC", "0x30B131B232B333B4", "0x38B939BA3ABB3BBC"],
"XMM10": ["0x0020002100220023", "0x00280029002a002b", "0x0030003100320033", "0x00380039003a003b"]
},
"MemoryRegions": {
"0x100000000": "4096"
Expand All @@ -21,6 +22,7 @@ lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]
vmovapd ymm10, [rel .data2]

vpsrlw xmm2, xmm0, 32
vpsrlw xmm3, xmm1, 16
Expand All @@ -32,6 +34,8 @@ vpsrlw ymm7, ymm1, 16
vpsrlw ymm8, ymm0, 8
vpsrlw ymm9, ymm1, 1

vpsrlw ymm10, ymm10, 0x9

hlt

align 32
Expand All @@ -45,3 +49,6 @@ dq 0x6162636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778

.data2:
dq 0x4142434445464748, 0x5152535455565758, 0x6162636465666768, 0x7172737475767778
Loading

0 comments on commit 739ac0f

Please sign in to comment.