diff --git a/Core/MIPS/ARM/ArmCompVFPU.cpp b/Core/MIPS/ARM/ArmCompVFPU.cpp index dc17a2be787d..089ace124cec 100644 --- a/Core/MIPS/ARM/ArmCompVFPU.cpp +++ b/Core/MIPS/ARM/ArmCompVFPU.cpp @@ -2301,8 +2301,16 @@ namespace MIPSComp VectorSize sz = GetVecSize(op); int n = GetNumVectorElements(sz); - u8 sregs[4], dregs[4]; + // This is a hack that modifies prefixes. We eat them later, so just overwrite. + // S prefix forces the negate flags. + js.prefixS |= 0x000F0000; + // T prefix forces constants on and regnum to 1. + // That means negate still works, and abs activates a different constant. + js.prefixT = (js.prefixT & ~0x000000FF) | 0x00000055 | 0x0000F000; + + u8 sregs[4], tregs[4], dregs[4]; GetVectorRegsPrefixS(sregs, sz, _VS); + GetVectorRegsPrefixT(tregs, sz, _VS); GetVectorRegsPrefixD(dregs, sz, _VD); MIPSReg tempregs[4]; @@ -2314,10 +2322,9 @@ namespace MIPSComp } } - MOVI2F(S0, 1.0f, SCRATCHREG1); for (int i = 0; i < n; ++i) { - fpr.MapDirtyInV(tempregs[i], sregs[i]); - VSUB(fpr.V(tempregs[i]), S0, fpr.V(sregs[i])); + fpr.MapDirtyInInV(tempregs[i], sregs[i], tregs[i]); + VADD(fpr.V(tempregs[i]), fpr.V(tregs[i]), fpr.V(sregs[i])); } for (int i = 0; i < n; ++i) { diff --git a/Core/MIPS/ARM/ArmCompVFPUNEON.cpp b/Core/MIPS/ARM/ArmCompVFPUNEON.cpp index 945d9938495d..1b5ebbe939fb 100644 --- a/Core/MIPS/ARM/ArmCompVFPUNEON.cpp +++ b/Core/MIPS/ARM/ArmCompVFPUNEON.cpp @@ -1433,12 +1433,21 @@ void ArmJit::CompNEON_Vocp(MIPSOpcode op) { DISABLE; } + // TODO: Handle T prefix. Right now it uses 1.0f always. + + // This is a hack that modifies prefixes. We eat them later, so just overwrite. + // S prefix forces the negate flags. + js.prefixS |= 0x000F0000; + // T prefix forces constants on and regnum to 1. + // That means negate still works, and abs activates a different constant. + js.prefixT = (js.prefixT & ~0x000000FF) | 0x00000055 | 0x0000F000; + VectorSize sz = GetVecSize(op); int n = GetNumVectorElements(sz); MappedRegs regs = NEONMapDirtyIn(op, sz, sz); MOVI2F_neon(Q0, 1.0f, R0); - VSUB(F_32, regs.vd, Q0, regs.vs); + VADD(F_32, regs.vd, Q0, regs.vs); NEONApplyPrefixD(regs.vd); fpr.ReleaseSpillLocksAndDiscardTemps(); diff --git a/Core/MIPS/ARM64/Arm64CompVFPU.cpp b/Core/MIPS/ARM64/Arm64CompVFPU.cpp index a5e6a85fdc06..871c7e99ea79 100644 --- a/Core/MIPS/ARM64/Arm64CompVFPU.cpp +++ b/Core/MIPS/ARM64/Arm64CompVFPU.cpp @@ -1952,8 +1952,16 @@ namespace MIPSComp { VectorSize sz = GetVecSize(op); int n = GetNumVectorElements(sz); - u8 sregs[4], dregs[4]; + // This is a hack that modifies prefixes. We eat them later, so just overwrite. + // S prefix forces the negate flags. + js.prefixS |= 0x000F0000; + // T prefix forces constants on and regnum to 1. + // That means negate still works, and abs activates a different constant. + js.prefixT = (js.prefixT & ~0x000000FF) | 0x00000055 | 0x0000F000; + + u8 sregs[4], tregs[4], dregs[4]; GetVectorRegsPrefixS(sregs, sz, _VS); + GetVectorRegsPrefixT(tregs, sz, _VS); GetVectorRegsPrefixD(dregs, sz, _VD); MIPSReg tempregs[4]; @@ -1967,8 +1975,8 @@ namespace MIPSComp { fp.MOVI2F(S0, 1.0f, SCRATCH1); for (int i = 0; i < n; ++i) { - fpr.MapDirtyInV(tempregs[i], sregs[i]); - fp.FSUB(fpr.V(tempregs[i]), S0, fpr.V(sregs[i])); + fpr.MapDirtyInInV(tempregs[i], sregs[i], tregs[i]); + fp.FADD(fpr.V(tempregs[i]), fpr.V(tregs[i]), fpr.V(sregs[i])); } for (int i = 0; i < n; ++i) { diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index ef5f3d2282f4..f3019f9f5047 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -1846,22 +1846,30 @@ namespace MIPSComp { VectorSize sz = GetVecSize(op); int n = GetNumVectorElements(sz); - u8 sregs[4], dregs[4]; + // This is a hack that modifies prefixes. We eat them later, so just overwrite. + // S prefix forces the negate flags. + js.prefixS |= 0x000F0000; + // T prefix forces constants on and regnum to 1. + // That means negate still works, and abs activates a different constant. + js.prefixT = (js.prefixT & ~0x000000FF) | 0x00000055 | 0x0000F000; + + u8 sregs[4], tregs[4], dregs[4]; GetVectorRegsPrefixS(sregs, sz, _VS); + // There's no bits for t, so just reuse s. It'll be constants only. + GetVectorRegsPrefixT(tregs, sz, _VS); GetVectorRegsPrefixD(dregs, sz, _VD); u8 tempregs[4]; for (int i = 0; i < n; ++i) { if (!IsOverlapSafe(dregs[i], n, sregs)) { - tempregs[i] = IRVTEMP_PFX_T + i; // using IRTEMP0 for other things + tempregs[i] = IRVTEMP_0 + i; } else { tempregs[i] = dregs[i]; } } - ir.Write(IROp::SetConstF, IRVTEMP_0, ir.AddConstantFloat(1.0f)); for (int i = 0; i < n; ++i) { - ir.Write(IROp::FSub, tempregs[i], IRVTEMP_0, sregs[i]); + ir.Write(IROp::FAdd, tempregs[i], tregs[i], sregs[i]); } for (int i = 0; i < n; ++i) { if (dregs[i] != tempregs[i]) { diff --git a/Core/MIPS/MIPSIntVFPU.cpp b/Core/MIPS/MIPSIntVFPU.cpp index b44cb3a92208..30ded3b73774 100644 --- a/Core/MIPS/MIPSIntVFPU.cpp +++ b/Core/MIPS/MIPSIntVFPU.cpp @@ -544,16 +544,24 @@ namespace MIPSInt void Int_Vocp(MIPSOpcode op) { - float s[4], d[4]; + float s[4], t[4], d[4]; int vd = _VD; int vs = _VS; VectorSize sz = GetVecSize(op); ReadVector(s, sz, vs); - ApplySwizzleS(s, sz); - for (int i = 0; i < GetNumVectorElements(sz); i++) - { - // Always positive NaN. - d[i] = my_isnan(s[i]) ? fabsf(s[i]) : 1.0f - s[i]; + + // S prefix forces the negate flags. + u32 sprefix = currentMIPS->vfpuCtrl[VFPU_CTRL_SPREFIX]; + ApplyPrefixST(s, sprefix | 0x000F0000, sz); + + // T prefix forces constants on and regnum to 1. + // That means negate still works, and abs activates a different constant. + u32 tprefix = currentMIPS->vfpuCtrl[VFPU_CTRL_TPREFIX]; + ApplyPrefixST(t, (tprefix & ~0x000000FF) | 0x00000055 | 0x0000F000, sz); + + for (int i = 0; i < GetNumVectorElements(sz); i++) { + // Always positive NaN. Note that s is always negated from the registers. + d[i] = my_isnan(s[i]) ? fabsf(s[i]) : t[i] + s[i]; } ApplyPrefixD(d, sz); WriteVector(d, sz, vd); diff --git a/Core/MIPS/x86/CompVFPU.cpp b/Core/MIPS/x86/CompVFPU.cpp index d74a1f8e3aa2..10a1e6e9bca4 100644 --- a/Core/MIPS/x86/CompVFPU.cpp +++ b/Core/MIPS/x86/CompVFPU.cpp @@ -2026,12 +2026,24 @@ void Jit::Comp_Vocp(MIPSOpcode op) { VectorSize sz = GetVecSize(op); int n = GetNumVectorElements(sz); - u8 sregs[4], dregs[4]; + // This is a hack that modifies prefixes. We eat them later, so just overwrite. + // S prefix forces the negate flags. + js.prefixS |= 0x000F0000; + // T prefix forces constants on and regnum to 1. + // That means negate still works, and abs activates a different constant. + js.prefixT = (js.prefixT & ~0x000000FF) | 0x00000055 | 0x0000F000; + + u8 sregs[4], tregs[4], dregs[4]; + // Actually uses the T prefixes (despite being VS.) GetVectorRegsPrefixS(sregs, sz, _VS); + if (js.prefixT != 0x0000F055) + GetVectorRegsPrefixT(tregs, sz, _VS); GetVectorRegsPrefixD(dregs, sz, _VD); // Flush SIMD. fpr.SimpleRegsV(sregs, sz, 0); + if (js.prefixT != 0x0000F055) + fpr.SimpleRegsV(tregs, sz, 0); fpr.SimpleRegsV(dregs, sz, MAP_NOINIT | MAP_DIRTY); X64Reg tempxregs[4]; @@ -2048,11 +2060,17 @@ void Jit::Comp_Vocp(MIPSOpcode op) { } } - MOV(PTRBITS, R(TEMPREG), ImmPtr(&one)); - MOVSS(XMM1, MatR(TEMPREG)); + if (js.prefixT == 0x0000F055) { + MOV(PTRBITS, R(TEMPREG), ImmPtr(&one)); + MOVSS(XMM1, MatR(TEMPREG)); + } for (int i = 0; i < n; ++i) { - MOVSS(XMM0, R(XMM1)); - SUBSS(XMM0, fpr.V(sregs[i])); + if (js.prefixT == 0x0000F055) { + MOVSS(XMM0, R(XMM1)); + } else { + MOVSS(XMM0, fpr.V(tregs[i])); + } + ADDSS(XMM0, fpr.V(sregs[i])); MOVSS(tempxregs[i], R(XMM0)); }