Skip to content

Commit

Permalink
Jit: Correct vocp prefix handling.
Browse files Browse the repository at this point in the history
See hrydgard#5549.  Matches tests for various prefix settings.
  • Loading branch information
unknownbrackets committed Feb 23, 2019
1 parent c6c2260 commit 6178a1f
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 23 deletions.
15 changes: 11 additions & 4 deletions Core/MIPS/ARM/ArmCompVFPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2301,8 +2301,16 @@ namespace MIPSComp
VectorSize sz = GetVecSize(op);
int n = GetNumVectorElements(sz);

u8 sregs[4], dregs[4];
// This is a hack that modifies prefixes. We eat them later, so just overwrite.
// S prefix forces the negate flags.
js.prefixS |= 0x000F0000;
// T prefix forces constants on and regnum to 1.
// That means negate still works, and abs activates a different constant.
js.prefixT = (js.prefixT & ~0x000000FF) | 0x00000055 | 0x0000F000;

u8 sregs[4], tregs[4], dregs[4];
GetVectorRegsPrefixS(sregs, sz, _VS);
GetVectorRegsPrefixT(tregs, sz, _VS);
GetVectorRegsPrefixD(dregs, sz, _VD);

MIPSReg tempregs[4];
Expand All @@ -2314,10 +2322,9 @@ namespace MIPSComp
}
}

MOVI2F(S0, 1.0f, SCRATCHREG1);
for (int i = 0; i < n; ++i) {
fpr.MapDirtyInV(tempregs[i], sregs[i]);
VSUB(fpr.V(tempregs[i]), S0, fpr.V(sregs[i]));
fpr.MapDirtyInInV(tempregs[i], sregs[i], tregs[i]);
VADD(fpr.V(tempregs[i]), fpr.V(tregs[i]), fpr.V(sregs[i]));
}

for (int i = 0; i < n; ++i) {
Expand Down
11 changes: 10 additions & 1 deletion Core/MIPS/ARM/ArmCompVFPUNEON.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1433,12 +1433,21 @@ void ArmJit::CompNEON_Vocp(MIPSOpcode op) {
DISABLE;
}

// TODO: Handle T prefix. Right now it uses 1.0f always.

// This is a hack that modifies prefixes. We eat them later, so just overwrite.
// S prefix forces the negate flags.
js.prefixS |= 0x000F0000;
// T prefix forces constants on and regnum to 1.
// That means negate still works, and abs activates a different constant.
js.prefixT = (js.prefixT & ~0x000000FF) | 0x00000055 | 0x0000F000;

VectorSize sz = GetVecSize(op);
int n = GetNumVectorElements(sz);

MappedRegs regs = NEONMapDirtyIn(op, sz, sz);
MOVI2F_neon(Q0, 1.0f, R0);
VSUB(F_32, regs.vd, Q0, regs.vs);
VADD(F_32, regs.vd, Q0, regs.vs);
NEONApplyPrefixD(regs.vd);

fpr.ReleaseSpillLocksAndDiscardTemps();
Expand Down
14 changes: 11 additions & 3 deletions Core/MIPS/ARM64/Arm64CompVFPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1952,8 +1952,16 @@ namespace MIPSComp {
VectorSize sz = GetVecSize(op);
int n = GetNumVectorElements(sz);

u8 sregs[4], dregs[4];
// This is a hack that modifies prefixes. We eat them later, so just overwrite.
// S prefix forces the negate flags.
js.prefixS |= 0x000F0000;
// T prefix forces constants on and regnum to 1.
// That means negate still works, and abs activates a different constant.
js.prefixT = (js.prefixT & ~0x000000FF) | 0x00000055 | 0x0000F000;

u8 sregs[4], tregs[4], dregs[4];
GetVectorRegsPrefixS(sregs, sz, _VS);
GetVectorRegsPrefixT(tregs, sz, _VS);
GetVectorRegsPrefixD(dregs, sz, _VD);

MIPSReg tempregs[4];
Expand All @@ -1967,8 +1975,8 @@ namespace MIPSComp {

fp.MOVI2F(S0, 1.0f, SCRATCH1);
for (int i = 0; i < n; ++i) {
fpr.MapDirtyInV(tempregs[i], sregs[i]);
fp.FSUB(fpr.V(tempregs[i]), S0, fpr.V(sregs[i]));
fpr.MapDirtyInInV(tempregs[i], sregs[i], tregs[i]);
fp.FADD(fpr.V(tempregs[i]), fpr.V(tregs[i]), fpr.V(sregs[i]));
}

for (int i = 0; i < n; ++i) {
Expand Down
16 changes: 12 additions & 4 deletions Core/MIPS/IR/IRCompVFPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1846,22 +1846,30 @@ namespace MIPSComp {
VectorSize sz = GetVecSize(op);
int n = GetNumVectorElements(sz);

u8 sregs[4], dregs[4];
// This is a hack that modifies prefixes. We eat them later, so just overwrite.
// S prefix forces the negate flags.
js.prefixS |= 0x000F0000;
// T prefix forces constants on and regnum to 1.
// That means negate still works, and abs activates a different constant.
js.prefixT = (js.prefixT & ~0x000000FF) | 0x00000055 | 0x0000F000;

u8 sregs[4], tregs[4], dregs[4];
GetVectorRegsPrefixS(sregs, sz, _VS);
// There's no bits for t, so just reuse s. It'll be constants only.
GetVectorRegsPrefixT(tregs, sz, _VS);
GetVectorRegsPrefixD(dregs, sz, _VD);

u8 tempregs[4];
for (int i = 0; i < n; ++i) {
if (!IsOverlapSafe(dregs[i], n, sregs)) {
tempregs[i] = IRVTEMP_PFX_T + i; // using IRTEMP0 for other things
tempregs[i] = IRVTEMP_0 + i;
} else {
tempregs[i] = dregs[i];
}
}

ir.Write(IROp::SetConstF, IRVTEMP_0, ir.AddConstantFloat(1.0f));
for (int i = 0; i < n; ++i) {
ir.Write(IROp::FSub, tempregs[i], IRVTEMP_0, sregs[i]);
ir.Write(IROp::FAdd, tempregs[i], tregs[i], sregs[i]);
}
for (int i = 0; i < n; ++i) {
if (dregs[i] != tempregs[i]) {
Expand Down
20 changes: 14 additions & 6 deletions Core/MIPS/MIPSIntVFPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -544,16 +544,24 @@ namespace MIPSInt

void Int_Vocp(MIPSOpcode op)
{
float s[4], d[4];
float s[4], t[4], d[4];
int vd = _VD;
int vs = _VS;
VectorSize sz = GetVecSize(op);
ReadVector(s, sz, vs);
ApplySwizzleS(s, sz);
for (int i = 0; i < GetNumVectorElements(sz); i++)
{
// Always positive NaN.
d[i] = my_isnan(s[i]) ? fabsf(s[i]) : 1.0f - s[i];

// S prefix forces the negate flags.
u32 sprefix = currentMIPS->vfpuCtrl[VFPU_CTRL_SPREFIX];
ApplyPrefixST(s, sprefix | 0x000F0000, sz);

// T prefix forces constants on and regnum to 1.
// That means negate still works, and abs activates a different constant.
u32 tprefix = currentMIPS->vfpuCtrl[VFPU_CTRL_TPREFIX];
ApplyPrefixST(t, (tprefix & ~0x000000FF) | 0x00000055 | 0x0000F000, sz);

for (int i = 0; i < GetNumVectorElements(sz); i++) {
// Always positive NaN. Note that s is always negated from the registers.
d[i] = my_isnan(s[i]) ? fabsf(s[i]) : t[i] + s[i];
}
ApplyPrefixD(d, sz);
WriteVector(d, sz, vd);
Expand Down
28 changes: 23 additions & 5 deletions Core/MIPS/x86/CompVFPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2026,12 +2026,24 @@ void Jit::Comp_Vocp(MIPSOpcode op) {
VectorSize sz = GetVecSize(op);
int n = GetNumVectorElements(sz);

u8 sregs[4], dregs[4];
// This is a hack that modifies prefixes. We eat them later, so just overwrite.
// S prefix forces the negate flags.
js.prefixS |= 0x000F0000;
// T prefix forces constants on and regnum to 1.
// That means negate still works, and abs activates a different constant.
js.prefixT = (js.prefixT & ~0x000000FF) | 0x00000055 | 0x0000F000;

u8 sregs[4], tregs[4], dregs[4];
// Actually uses the T prefixes (despite being VS.)
GetVectorRegsPrefixS(sregs, sz, _VS);
if (js.prefixT != 0x0000F055)
GetVectorRegsPrefixT(tregs, sz, _VS);
GetVectorRegsPrefixD(dregs, sz, _VD);

// Flush SIMD.
fpr.SimpleRegsV(sregs, sz, 0);
if (js.prefixT != 0x0000F055)
fpr.SimpleRegsV(tregs, sz, 0);
fpr.SimpleRegsV(dregs, sz, MAP_NOINIT | MAP_DIRTY);

X64Reg tempxregs[4];
Expand All @@ -2048,11 +2060,17 @@ void Jit::Comp_Vocp(MIPSOpcode op) {
}
}

MOV(PTRBITS, R(TEMPREG), ImmPtr(&one));
MOVSS(XMM1, MatR(TEMPREG));
if (js.prefixT == 0x0000F055) {
MOV(PTRBITS, R(TEMPREG), ImmPtr(&one));
MOVSS(XMM1, MatR(TEMPREG));
}
for (int i = 0; i < n; ++i) {
MOVSS(XMM0, R(XMM1));
SUBSS(XMM0, fpr.V(sregs[i]));
if (js.prefixT == 0x0000F055) {
MOVSS(XMM0, R(XMM1));
} else {
MOVSS(XMM0, fpr.V(tregs[i]));
}
ADDSS(XMM0, fpr.V(sregs[i]));
MOVSS(tempxregs[i], R(XMM0));
}

Expand Down

0 comments on commit 6178a1f

Please sign in to comment.