Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle vrot overlap and vscl/vmscl prefixes more accurately #16302

Merged
merged 2 commits into from
Oct 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 30 additions & 9 deletions Core/MIPS/IR/IRCompVFPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1185,8 +1185,8 @@ namespace MIPSComp {
int vt = _VT;
u8 sregs[4], dregs[4], treg;
GetVectorRegsPrefixS(sregs, sz, vs);
// TODO: Prefixes seem strange...
GetVectorRegsPrefixT(&treg, V_Single, vt);
// T prefixes handled by interp.
GetVectorRegs(&treg, V_Single, vt);
GetVectorRegsPrefixD(dregs, sz, vd);

bool overlap = false;
Expand Down Expand Up @@ -1848,33 +1848,54 @@ namespace MIPSComp {
int imm = (op >> 16) & 0x1f;
VectorSize sz = GetVecSize(op);
int n = GetNumVectorElements(sz);
int sineLane = (imm >> 2) & 3;
int cosineLane = imm & 3;
bool negSin = (imm & 0x10) ? true : false;
bool broadcastSine = sineLane == cosineLane;

char d[4] = { '0', '0', '0', '0' };
if (((imm >> 2) & 3) == (imm & 3)) {
if (broadcastSine) {
for (int i = 0; i < 4; i++)
d[i] = 's';
}
d[(imm >> 2) & 3] = 's';
d[imm & 3] = 'c';
d[sineLane] = 's';
d[cosineLane] = 'c';

u8 dregs[4];
GetVectorRegs(dregs, sz, vd);
u8 sreg[1];
GetVectorRegs(sreg, V_Single, vs);

// If there's overlap, sin is calculated without it, but cosine uses the result.
// This corresponds with prefix handling, where cosine doesn't get in prefixes.
if (broadcastSine || !IsOverlapSafe(n, dregs, 1, sreg)) {
ir.Write(IROp::FSin, IRVTEMP_0, sreg[0]);
if (negSin)
ir.Write(IROp::FNeg, IRVTEMP_0, IRVTEMP_0);
}

for (int i = 0; i < n; i++) {
switch (d[i]) {
case '0':
ir.Write(IROp::SetConstF, dregs[i], ir.AddConstantFloat(0.0f));
break;
case 's':
ir.Write(IROp::FSin, dregs[i], sreg[0]);
if (negSin) {
ir.Write(IROp::FNeg, dregs[i], dregs[i]);
if (broadcastSine || !IsOverlapSafe(n, dregs, 1, sreg)) {
ir.Write(IROp::FMov, dregs[i], IRVTEMP_0);
} else {
ir.Write(IROp::FSin, dregs[i], sreg[0]);
if (negSin) {
ir.Write(IROp::FNeg, dregs[i], dregs[i]);
}
}
break;
case 'c':
ir.Write(IROp::FCos, dregs[i], sreg[0]);
if (IsOverlapSafe(n, dregs, 1, sreg))
ir.Write(IROp::FCos, dregs[i], sreg[0]);
else if (dregs[sineLane] == sreg[0])
ir.Write(IROp::FCos, dregs[i], IRVTEMP_0);
else
ir.Write(IROp::SetConstF, dregs[i], ir.AddConstantFloat(1.0f));
break;
}
}
Expand Down
28 changes: 24 additions & 4 deletions Core/MIPS/MIPSIntVFPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -540,8 +540,10 @@ namespace MIPSInt
ApplySwizzleS(&s[(n - 1) * 4], V_Quad);
// T prefix applies only for the last row, and is used per element.
// This is like vscl, but instead of zzzz it uses xxxx.
int tlane = (vt >> 5) & 3;
t[tlane] = t[0];
u32 tprefixRemove = VFPU_ANY_SWIZZLE();
u32 tprefixAdd = VFPU_SWIZZLE(0, 0, 0, 0);
u32 tprefixAdd = VFPU_SWIZZLE(tlane, tlane, tlane, tlane);
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);

for (int b = 0; b < n; b++) {
Expand Down Expand Up @@ -1518,9 +1520,10 @@ namespace MIPSInt

// T prefix forces swizzle (zzzz for some reason, so we force V_Quad.)
// That means negate still works, but constants are a bit weird.
t[2] = V(vt);
int tlane = (vt >> 5) & 3;
t[tlane] = V(vt);
u32 tprefixRemove = VFPU_ANY_SWIZZLE();
u32 tprefixAdd = VFPU_SWIZZLE(2, 2, 2, 2);
u32 tprefixAdd = VFPU_SWIZZLE(tlane, tlane, tlane, tlane);
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);

int n = GetNumVectorElements(sz);
Expand Down Expand Up @@ -1607,7 +1610,24 @@ namespace MIPSInt
} else {
d[sineLane] = sine;
}
d[cosineLane] = cosine;

if (((vd >> 2) & 7) == ((vs >> 2) & 7)) {
u8 dregs[4]{};
GetVectorRegs(dregs, sz, vd);
// Calculate cosine based on sine/zero result.
bool written = false;
for (int i = 0; i < 4; i++) {
if (vs == dregs[i]) {
d[cosineLane] = vfpu_cos(d[i]);
written = true;
break;
}
}
if (!written)
d[cosineLane] = cosine;
} else {
d[cosineLane] = cosine;
}

// D prefix works, just not for x.
currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] &= 0xFFEFC;
Expand Down
4 changes: 4 additions & 0 deletions Core/MIPS/MIPSVFPUUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,10 @@ void WriteMatrix(const float *rd, MatrixSize size, int reg) {
}

int GetVectorOverlap(int vec1, VectorSize size1, int vec2, VectorSize size2) {
// Different matrices? Can't overlap, return early.
if (((vec1 >> 2) & 7) != ((vec2 >> 2) & 7))
return 0;

int n1 = GetNumVectorElements(size1);
int n2 = GetNumVectorElements(size2);
u8 regs1[4];
Expand Down