Jit: Correct vocp prefix handling.

See hrydgard#5549. Matches tests for various prefix settings.
unknownbrackets · Feb 23, 2019 · 6178a1f · 6178a1f
1 parent c6c2260
commit 6178a1f
Show file tree

Hide file tree

Showing 6 changed files with 81 additions and 23 deletions.
diff --git a/Core/MIPS/ARM/ArmCompVFPU.cpp b/Core/MIPS/ARM/ArmCompVFPU.cpp
@@ -2301,8 +2301,16 @@ namespace MIPSComp
 		VectorSize sz = GetVecSize(op);
 		int n = GetNumVectorElements(sz);
 
-		u8 sregs[4], dregs[4];
+		// This is a hack that modifies prefixes.  We eat them later, so just overwrite.
+		// S prefix forces the negate flags.
+		js.prefixS |= 0x000F0000;
+		// T prefix forces constants on and regnum to 1.
+		// That means negate still works, and abs activates a different constant.
+		js.prefixT = (js.prefixT & ~0x000000FF) | 0x00000055 | 0x0000F000;
+
+		u8 sregs[4], tregs[4], dregs[4];
 		GetVectorRegsPrefixS(sregs, sz, _VS);
+		GetVectorRegsPrefixT(tregs, sz, _VS);
 		GetVectorRegsPrefixD(dregs, sz, _VD);
 
 		MIPSReg tempregs[4];
@@ -2314,10 +2322,9 @@ namespace MIPSComp
 			}
 		}
 
-		MOVI2F(S0, 1.0f, SCRATCHREG1);
 		for (int i = 0; i < n; ++i) {
-			fpr.MapDirtyInV(tempregs[i], sregs[i]);
-			VSUB(fpr.V(tempregs[i]), S0, fpr.V(sregs[i]));
+			fpr.MapDirtyInInV(tempregs[i], sregs[i], tregs[i]);
+			VADD(fpr.V(tempregs[i]), fpr.V(tregs[i]), fpr.V(sregs[i]));
 		}
 
 		for (int i = 0; i < n; ++i) {

diff --git a/Core/MIPS/ARM/ArmCompVFPUNEON.cpp b/Core/MIPS/ARM/ArmCompVFPUNEON.cpp
@@ -1433,12 +1433,21 @@ void ArmJit::CompNEON_Vocp(MIPSOpcode op) {
 		DISABLE;
 	}
 
+	// TODO: Handle T prefix.  Right now it uses 1.0f always.
+
+	// This is a hack that modifies prefixes.  We eat them later, so just overwrite.
+	// S prefix forces the negate flags.
+	js.prefixS |= 0x000F0000;
+	// T prefix forces constants on and regnum to 1.
+	// That means negate still works, and abs activates a different constant.
+	js.prefixT = (js.prefixT & ~0x000000FF) | 0x00000055 | 0x0000F000;
+
 	VectorSize sz = GetVecSize(op);
 	int n = GetNumVectorElements(sz);
 
 	MappedRegs regs = NEONMapDirtyIn(op, sz, sz);
 	MOVI2F_neon(Q0, 1.0f, R0);
-	VSUB(F_32, regs.vd, Q0, regs.vs);
+	VADD(F_32, regs.vd, Q0, regs.vs);
 	NEONApplyPrefixD(regs.vd);
 
 	fpr.ReleaseSpillLocksAndDiscardTemps();

diff --git a/Core/MIPS/ARM64/Arm64CompVFPU.cpp b/Core/MIPS/ARM64/Arm64CompVFPU.cpp
@@ -1952,8 +1952,16 @@ namespace MIPSComp {
 		VectorSize sz = GetVecSize(op);
 		int n = GetNumVectorElements(sz);
 
-		u8 sregs[4], dregs[4];
+		// This is a hack that modifies prefixes.  We eat them later, so just overwrite.
+		// S prefix forces the negate flags.
+		js.prefixS |= 0x000F0000;
+		// T prefix forces constants on and regnum to 1.
+		// That means negate still works, and abs activates a different constant.
+		js.prefixT = (js.prefixT & ~0x000000FF) | 0x00000055 | 0x0000F000;
+
+		u8 sregs[4], tregs[4], dregs[4];
 		GetVectorRegsPrefixS(sregs, sz, _VS);
+		GetVectorRegsPrefixT(tregs, sz, _VS);
 		GetVectorRegsPrefixD(dregs, sz, _VD);
 
 		MIPSReg tempregs[4];
@@ -1967,8 +1975,8 @@ namespace MIPSComp {
 
 		fp.MOVI2F(S0, 1.0f, SCRATCH1);
 		for (int i = 0; i < n; ++i) {
-			fpr.MapDirtyInV(tempregs[i], sregs[i]);
-			fp.FSUB(fpr.V(tempregs[i]), S0, fpr.V(sregs[i]));
+			fpr.MapDirtyInInV(tempregs[i], sregs[i], tregs[i]);
+			fp.FADD(fpr.V(tempregs[i]), fpr.V(tregs[i]), fpr.V(sregs[i]));
 		}
 
 		for (int i = 0; i < n; ++i) {

diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp
@@ -1846,22 +1846,30 @@ namespace MIPSComp {
 		VectorSize sz = GetVecSize(op);
 		int n = GetNumVectorElements(sz);
 
-		u8 sregs[4], dregs[4];
+		// This is a hack that modifies prefixes.  We eat them later, so just overwrite.
+		// S prefix forces the negate flags.
+		js.prefixS |= 0x000F0000;
+		// T prefix forces constants on and regnum to 1.
+		// That means negate still works, and abs activates a different constant.
+		js.prefixT = (js.prefixT & ~0x000000FF) | 0x00000055 | 0x0000F000;
+
+		u8 sregs[4], tregs[4], dregs[4];
 		GetVectorRegsPrefixS(sregs, sz, _VS);
+		// There's no bits for t, so just reuse s.  It'll be constants only.
+		GetVectorRegsPrefixT(tregs, sz, _VS);
 		GetVectorRegsPrefixD(dregs, sz, _VD);
 
 		u8 tempregs[4];
 		for (int i = 0; i < n; ++i) {
 			if (!IsOverlapSafe(dregs[i], n, sregs)) {
-				tempregs[i] = IRVTEMP_PFX_T + i;   // using IRTEMP0 for other things
+				tempregs[i] = IRVTEMP_0 + i;
 			} else {
 				tempregs[i] = dregs[i];
 			}
 		}
 
-		ir.Write(IROp::SetConstF, IRVTEMP_0, ir.AddConstantFloat(1.0f));
 		for (int i = 0; i < n; ++i) {
-			ir.Write(IROp::FSub, tempregs[i], IRVTEMP_0, sregs[i]);
+			ir.Write(IROp::FAdd, tempregs[i], tregs[i], sregs[i]);
 		}
 		for (int i = 0; i < n; ++i) {
 			if (dregs[i] != tempregs[i]) {

diff --git a/Core/MIPS/MIPSIntVFPU.cpp b/Core/MIPS/MIPSIntVFPU.cpp
@@ -544,16 +544,24 @@ namespace MIPSInt
 
 	void Int_Vocp(MIPSOpcode op)
 	{
-		float s[4], d[4];
+		float s[4], t[4], d[4];
 		int vd = _VD;
 		int vs = _VS;
 		VectorSize sz = GetVecSize(op);
 		ReadVector(s, sz, vs);
-		ApplySwizzleS(s, sz);
-		for (int i = 0; i < GetNumVectorElements(sz); i++)
-		{
-			// Always positive NaN.
-			d[i] = my_isnan(s[i]) ? fabsf(s[i]) : 1.0f - s[i];
+
+		// S prefix forces the negate flags.
+		u32 sprefix = currentMIPS->vfpuCtrl[VFPU_CTRL_SPREFIX];
+		ApplyPrefixST(s, sprefix | 0x000F0000, sz);
+
+		// T prefix forces constants on and regnum to 1.
+		// That means negate still works, and abs activates a different constant.
+		u32 tprefix = currentMIPS->vfpuCtrl[VFPU_CTRL_TPREFIX];
+		ApplyPrefixST(t, (tprefix & ~0x000000FF) | 0x00000055 | 0x0000F000, sz);
+
+		for (int i = 0; i < GetNumVectorElements(sz); i++) {
+			// Always positive NaN.  Note that s is always negated from the registers.
+			d[i] = my_isnan(s[i]) ? fabsf(s[i]) : t[i] + s[i];
 		}
 		ApplyPrefixD(d, sz);
 		WriteVector(d, sz, vd);

diff --git a/Core/MIPS/x86/CompVFPU.cpp b/Core/MIPS/x86/CompVFPU.cpp
@@ -2026,12 +2026,24 @@ void Jit::Comp_Vocp(MIPSOpcode op) {
 	VectorSize sz = GetVecSize(op);
 	int n = GetNumVectorElements(sz);
 
-	u8 sregs[4], dregs[4];
+	// This is a hack that modifies prefixes.  We eat them later, so just overwrite.
+	// S prefix forces the negate flags.
+	js.prefixS |= 0x000F0000;
+	// T prefix forces constants on and regnum to 1.
+	// That means negate still works, and abs activates a different constant.
+	js.prefixT = (js.prefixT & ~0x000000FF) | 0x00000055 | 0x0000F000;
+
+	u8 sregs[4], tregs[4], dregs[4];
+	// Actually uses the T prefixes (despite being VS.)
 	GetVectorRegsPrefixS(sregs, sz, _VS);
+	if (js.prefixT != 0x0000F055)
+		GetVectorRegsPrefixT(tregs, sz, _VS);
 	GetVectorRegsPrefixD(dregs, sz, _VD);
 
 	// Flush SIMD.
 	fpr.SimpleRegsV(sregs, sz, 0);
+	if (js.prefixT != 0x0000F055)
+		fpr.SimpleRegsV(tregs, sz, 0);
 	fpr.SimpleRegsV(dregs, sz, MAP_NOINIT | MAP_DIRTY);
 
 	X64Reg tempxregs[4];
@@ -2048,11 +2060,17 @@ void Jit::Comp_Vocp(MIPSOpcode op) {
 		}
 	}
 
-	MOV(PTRBITS, R(TEMPREG), ImmPtr(&one));
-	MOVSS(XMM1, MatR(TEMPREG));
+	if (js.prefixT == 0x0000F055) {
+		MOV(PTRBITS, R(TEMPREG), ImmPtr(&one));
+		MOVSS(XMM1, MatR(TEMPREG));
+	}
 	for (int i = 0; i < n; ++i) {
-		MOVSS(XMM0, R(XMM1));
-		SUBSS(XMM0, fpr.V(sregs[i]));
+		if (js.prefixT == 0x0000F055) {
+			MOVSS(XMM0, R(XMM1));
+		} else {
+			MOVSS(XMM0, fpr.V(tregs[i]));
+		}
+		ADDSS(XMM0, fpr.V(sregs[i]));
 		MOVSS(tempxregs[i], R(XMM0));
 	}