From 3f997518f36097cf36570a8a7eca8b9b0997b26e Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 29 Oct 2022 22:24:05 -0700 Subject: [PATCH 1/2] irjit: Handle vrot overlap more correctly. Sine ignores overlap, cosine does not. --- Core/MIPS/IR/IRCompVFPU.cpp | 35 ++++++++++++++++++++++++++++------- Core/MIPS/MIPSIntVFPU.cpp | 19 ++++++++++++++++++- Core/MIPS/MIPSVFPUUtils.cpp | 4 ++++ 3 files changed, 50 insertions(+), 8 deletions(-) diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index 4fffc6765dfc..42cc1970a178 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -1848,33 +1848,54 @@ namespace MIPSComp { int imm = (op >> 16) & 0x1f; VectorSize sz = GetVecSize(op); int n = GetNumVectorElements(sz); + int sineLane = (imm >> 2) & 3; + int cosineLane = imm & 3; bool negSin = (imm & 0x10) ? true : false; + bool broadcastSine = sineLane == cosineLane; char d[4] = { '0', '0', '0', '0' }; - if (((imm >> 2) & 3) == (imm & 3)) { + if (broadcastSine) { for (int i = 0; i < 4; i++) d[i] = 's'; } - d[(imm >> 2) & 3] = 's'; - d[imm & 3] = 'c'; + d[sineLane] = 's'; + d[cosineLane] = 'c'; u8 dregs[4]; GetVectorRegs(dregs, sz, vd); u8 sreg[1]; GetVectorRegs(sreg, V_Single, vs); + + // If there's overlap, sin is calculated without it, but cosine uses the result. + // This corresponds with prefix handling, where cosine doesn't get in prefixes. + if (broadcastSine || !IsOverlapSafe(n, dregs, 1, sreg)) { + ir.Write(IROp::FSin, IRVTEMP_0, sreg[0]); + if (negSin) + ir.Write(IROp::FNeg, IRVTEMP_0, IRVTEMP_0); + } + for (int i = 0; i < n; i++) { switch (d[i]) { case '0': ir.Write(IROp::SetConstF, dregs[i], ir.AddConstantFloat(0.0f)); break; case 's': - ir.Write(IROp::FSin, dregs[i], sreg[0]); - if (negSin) { - ir.Write(IROp::FNeg, dregs[i], dregs[i]); + if (broadcastSine || !IsOverlapSafe(n, dregs, 1, sreg)) { + ir.Write(IROp::FMov, dregs[i], IRVTEMP_0); + } else { + ir.Write(IROp::FSin, dregs[i], sreg[0]); + if (negSin) { + ir.Write(IROp::FNeg, dregs[i], dregs[i]); + } } break; case 'c': - ir.Write(IROp::FCos, dregs[i], sreg[0]); + if (IsOverlapSafe(n, dregs, 1, sreg)) + ir.Write(IROp::FCos, dregs[i], sreg[0]); + else if (dregs[sineLane] == sreg[0]) + ir.Write(IROp::FCos, dregs[i], IRVTEMP_0); + else + ir.Write(IROp::SetConstF, dregs[i], ir.AddConstantFloat(1.0f)); break; } } diff --git a/Core/MIPS/MIPSIntVFPU.cpp b/Core/MIPS/MIPSIntVFPU.cpp index c8df6b21dc36..5e6d5ad54cd7 100644 --- a/Core/MIPS/MIPSIntVFPU.cpp +++ b/Core/MIPS/MIPSIntVFPU.cpp @@ -1607,7 +1607,24 @@ namespace MIPSInt } else { d[sineLane] = sine; } - d[cosineLane] = cosine; + + if (((vd >> 2) & 7) == ((vs >> 2) & 7)) { + u8 dregs[4]{}; + GetVectorRegs(dregs, sz, vd); + // Calculate cosine based on sine/zero result. + bool written = false; + for (int i = 0; i < 4; i++) { + if (vs == dregs[i]) { + d[cosineLane] = vfpu_cos(d[i]); + written = true; + break; + } + } + if (!written) + d[cosineLane] = cosine; + } else { + d[cosineLane] = cosine; + } // D prefix works, just not for x. currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] &= 0xFFEFC; diff --git a/Core/MIPS/MIPSVFPUUtils.cpp b/Core/MIPS/MIPSVFPUUtils.cpp index 0f1d6688abe4..e7f7bf7a7f35 100644 --- a/Core/MIPS/MIPSVFPUUtils.cpp +++ b/Core/MIPS/MIPSVFPUUtils.cpp @@ -343,6 +343,10 @@ void WriteMatrix(const float *rd, MatrixSize size, int reg) { } int GetVectorOverlap(int vec1, VectorSize size1, int vec2, VectorSize size2) { + // Different matrices? Can't overlap, return early. + if (((vec1 >> 2) & 7) != ((vec2 >> 2) & 7)) + return 0; + int n1 = GetNumVectorElements(size1); int n2 = GetNumVectorElements(size2); u8 regs1[4]; From bbdc8a8f9847ba777056e8f11f9406cc1c8e18c0 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 29 Oct 2022 22:43:30 -0700 Subject: [PATCH 2/2] interp: Correct vscl/vmscl t prefix handling. This makes more sense. Fixes Dissidia 012 issues. --- Core/MIPS/IR/IRCompVFPU.cpp | 4 ++-- Core/MIPS/MIPSIntVFPU.cpp | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index 42cc1970a178..4694986cf75f 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -1185,8 +1185,8 @@ namespace MIPSComp { int vt = _VT; u8 sregs[4], dregs[4], treg; GetVectorRegsPrefixS(sregs, sz, vs); - // TODO: Prefixes seem strange... - GetVectorRegsPrefixT(&treg, V_Single, vt); + // T prefixes handled by interp. + GetVectorRegs(&treg, V_Single, vt); GetVectorRegsPrefixD(dregs, sz, vd); bool overlap = false; diff --git a/Core/MIPS/MIPSIntVFPU.cpp b/Core/MIPS/MIPSIntVFPU.cpp index 5e6d5ad54cd7..bc1120921e38 100644 --- a/Core/MIPS/MIPSIntVFPU.cpp +++ b/Core/MIPS/MIPSIntVFPU.cpp @@ -540,8 +540,10 @@ namespace MIPSInt ApplySwizzleS(&s[(n - 1) * 4], V_Quad); // T prefix applies only for the last row, and is used per element. // This is like vscl, but instead of zzzz it uses xxxx. + int tlane = (vt >> 5) & 3; + t[tlane] = t[0]; u32 tprefixRemove = VFPU_ANY_SWIZZLE(); - u32 tprefixAdd = VFPU_SWIZZLE(0, 0, 0, 0); + u32 tprefixAdd = VFPU_SWIZZLE(tlane, tlane, tlane, tlane); ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad); for (int b = 0; b < n; b++) { @@ -1518,9 +1520,10 @@ namespace MIPSInt // T prefix forces swizzle (zzzz for some reason, so we force V_Quad.) // That means negate still works, but constants are a bit weird. - t[2] = V(vt); + int tlane = (vt >> 5) & 3; + t[tlane] = V(vt); u32 tprefixRemove = VFPU_ANY_SWIZZLE(); - u32 tprefixAdd = VFPU_SWIZZLE(2, 2, 2, 2); + u32 tprefixAdd = VFPU_SWIZZLE(tlane, tlane, tlane, tlane); ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad); int n = GetNumVectorElements(sz);