Skip to content

Commit

Permalink
Merge pull request #16302 from unknownbrackets/vrot-overlap
Browse files Browse the repository at this point in the history
Handle vrot overlap and vscl/vmscl prefixes more accurately
  • Loading branch information
hrydgard authored Oct 30, 2022
2 parents 1d97e7d + bbdc8a8 commit ba32ef5
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 13 deletions.
39 changes: 30 additions & 9 deletions Core/MIPS/IR/IRCompVFPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1185,8 +1185,8 @@ namespace MIPSComp {
int vt = _VT;
u8 sregs[4], dregs[4], treg;
GetVectorRegsPrefixS(sregs, sz, vs);
// TODO: Prefixes seem strange...
GetVectorRegsPrefixT(&treg, V_Single, vt);
// T prefixes handled by interp.
GetVectorRegs(&treg, V_Single, vt);
GetVectorRegsPrefixD(dregs, sz, vd);

bool overlap = false;
Expand Down Expand Up @@ -1848,33 +1848,54 @@ namespace MIPSComp {
int imm = (op >> 16) & 0x1f;
VectorSize sz = GetVecSize(op);
int n = GetNumVectorElements(sz);
int sineLane = (imm >> 2) & 3;
int cosineLane = imm & 3;
bool negSin = (imm & 0x10) ? true : false;
bool broadcastSine = sineLane == cosineLane;

char d[4] = { '0', '0', '0', '0' };
if (((imm >> 2) & 3) == (imm & 3)) {
if (broadcastSine) {
for (int i = 0; i < 4; i++)
d[i] = 's';
}
d[(imm >> 2) & 3] = 's';
d[imm & 3] = 'c';
d[sineLane] = 's';
d[cosineLane] = 'c';

u8 dregs[4];
GetVectorRegs(dregs, sz, vd);
u8 sreg[1];
GetVectorRegs(sreg, V_Single, vs);

// If there's overlap, sin is calculated without it, but cosine uses the result.
// This corresponds with prefix handling, where cosine doesn't get in prefixes.
if (broadcastSine || !IsOverlapSafe(n, dregs, 1, sreg)) {
ir.Write(IROp::FSin, IRVTEMP_0, sreg[0]);
if (negSin)
ir.Write(IROp::FNeg, IRVTEMP_0, IRVTEMP_0);
}

for (int i = 0; i < n; i++) {
switch (d[i]) {
case '0':
ir.Write(IROp::SetConstF, dregs[i], ir.AddConstantFloat(0.0f));
break;
case 's':
ir.Write(IROp::FSin, dregs[i], sreg[0]);
if (negSin) {
ir.Write(IROp::FNeg, dregs[i], dregs[i]);
if (broadcastSine || !IsOverlapSafe(n, dregs, 1, sreg)) {
ir.Write(IROp::FMov, dregs[i], IRVTEMP_0);
} else {
ir.Write(IROp::FSin, dregs[i], sreg[0]);
if (negSin) {
ir.Write(IROp::FNeg, dregs[i], dregs[i]);
}
}
break;
case 'c':
ir.Write(IROp::FCos, dregs[i], sreg[0]);
if (IsOverlapSafe(n, dregs, 1, sreg))
ir.Write(IROp::FCos, dregs[i], sreg[0]);
else if (dregs[sineLane] == sreg[0])
ir.Write(IROp::FCos, dregs[i], IRVTEMP_0);
else
ir.Write(IROp::SetConstF, dregs[i], ir.AddConstantFloat(1.0f));
break;
}
}
Expand Down
28 changes: 24 additions & 4 deletions Core/MIPS/MIPSIntVFPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -540,8 +540,10 @@ namespace MIPSInt
ApplySwizzleS(&s[(n - 1) * 4], V_Quad);
// T prefix applies only for the last row, and is used per element.
// This is like vscl, but instead of zzzz it uses xxxx.
int tlane = (vt >> 5) & 3;
t[tlane] = t[0];
u32 tprefixRemove = VFPU_ANY_SWIZZLE();
u32 tprefixAdd = VFPU_SWIZZLE(0, 0, 0, 0);
u32 tprefixAdd = VFPU_SWIZZLE(tlane, tlane, tlane, tlane);
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);

for (int b = 0; b < n; b++) {
Expand Down Expand Up @@ -1518,9 +1520,10 @@ namespace MIPSInt

// T prefix forces swizzle (zzzz for some reason, so we force V_Quad.)
// That means negate still works, but constants are a bit weird.
t[2] = V(vt);
int tlane = (vt >> 5) & 3;
t[tlane] = V(vt);
u32 tprefixRemove = VFPU_ANY_SWIZZLE();
u32 tprefixAdd = VFPU_SWIZZLE(2, 2, 2, 2);
u32 tprefixAdd = VFPU_SWIZZLE(tlane, tlane, tlane, tlane);
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);

int n = GetNumVectorElements(sz);
Expand Down Expand Up @@ -1607,7 +1610,24 @@ namespace MIPSInt
} else {
d[sineLane] = sine;
}
d[cosineLane] = cosine;

if (((vd >> 2) & 7) == ((vs >> 2) & 7)) {
u8 dregs[4]{};
GetVectorRegs(dregs, sz, vd);
// Calculate cosine based on sine/zero result.
bool written = false;
for (int i = 0; i < 4; i++) {
if (vs == dregs[i]) {
d[cosineLane] = vfpu_cos(d[i]);
written = true;
break;
}
}
if (!written)
d[cosineLane] = cosine;
} else {
d[cosineLane] = cosine;
}

// D prefix works, just not for x.
currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] &= 0xFFEFC;
Expand Down
4 changes: 4 additions & 0 deletions Core/MIPS/MIPSVFPUUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,10 @@ void WriteMatrix(const float *rd, MatrixSize size, int reg) {
}

int GetVectorOverlap(int vec1, VectorSize size1, int vec2, VectorSize size2) {
// Different matrices? Can't overlap, return early.
if (((vec1 >> 2) & 7) != ((vec2 >> 2) & 7))
return 0;

int n1 = GetNumVectorElements(size1);
int n2 = GetNumVectorElements(size2);
u8 regs1[4];
Expand Down

0 comments on commit ba32ef5

Please sign in to comment.