From b9eaee254a3532d7ba8f7a340a41c49325cdcaa7 Mon Sep 17 00:00:00 2001 From: water Date: Fri, 4 Mar 2022 21:23:29 -0500 Subject: [PATCH 01/12] temp --- game/common/vu.h | 23 + .../opengl_renderer/DirectRenderer2.cpp | 77 +- .../opengl_renderer/DirectRenderer2.h | 8 +- .../opengl_renderer/GenericProgram.cpp | 2022 ++--------------- .../opengl_renderer/GenericRenderer.cpp | 2 +- .../opengl_renderer/GenericRenderer.h | 12 +- .../graphics/opengl_renderer/MercRenderer.cpp | 2 +- 7 files changed, 280 insertions(+), 1866 deletions(-) diff --git a/game/common/vu.h b/game/common/vu.h index fa89e4a8ed..9c8e0bedff 100644 --- a/game/common/vu.h +++ b/game/common/vu.h @@ -353,6 +353,17 @@ struct alignas(16) Vf { } } + void ftoi4_check(Mask /*mask*/, const Vf& a) { + for (int i = 0; i < 3; i++) { + data[i] = a.data[i]; + } + + for (int i = 3; i < 4; i++) { + s32 val = a.data[i] * 16.f; + memcpy(&data[i], &val, 4); + } + } + void ftoi12(Mask mask, const Vf& a) { for (int i = 0; i < 4; i++) { if ((u64)mask & (1 << i)) { @@ -365,6 +376,18 @@ struct alignas(16) Vf { } } + void ftoi12_check(Mask mask, const Vf& a) { + for (int i = 0; i < 4; i++) { + if ((u64)mask & (1 << i)) { + if (std::isnan(a.data[i])) { + ASSERT(false); + } + s32 val = a.data[i] * 4096.f; + memcpy(&data[i], &val, 4); + } + } + } + void ftoi0(Mask mask, const Vf& a) { for (int i = 0; i < 4; i++) { if ((u64)mask & (1 << i)) { diff --git a/game/graphics/opengl_renderer/DirectRenderer2.cpp b/game/graphics/opengl_renderer/DirectRenderer2.cpp index b7c8bf5493..8ae2c61000 100644 --- a/game/graphics/opengl_renderer/DirectRenderer2.cpp +++ b/game/graphics/opengl_renderer/DirectRenderer2.cpp @@ -6,8 +6,8 @@ DirectRenderer2::DirectRenderer2(u32 max_verts, u32 max_inds, u32 max_draws, - const std::string& name) - : m_name(name) { + const std::string& name, bool use_ftoi_mod) + : m_name(name), m_use_ftoi_mod(use_ftoi_mod) { // allocate buffers m_vertices.vertices.resize(max_verts); m_vertices.indices.resize(max_inds); @@ -426,7 +426,11 @@ void DirectRenderer2::render_gif_data(const u8* data, handle_rgbaq_packed(data + offset); break; case GifTag::RegisterDescriptor::XYZF2: - handle_xyzf2_packed(data + offset, render_state, prof); + if (m_use_ftoi_mod) { + handle_xyzf2_mod_packed(data + offset, render_state, prof); + } else { + handle_xyzf2_packed(data + offset, render_state, prof); + } break; case GifTag::RegisterDescriptor::PRIM: ASSERT(false); // handle_prim_packed(data + offset, render_state, prof); @@ -717,6 +721,73 @@ void DirectRenderer2::handle_xyzf2_packed(const u8* data, vert.flags = m_state.vertex_flags; } +void DirectRenderer2::handle_xyzf2_mod_packed(const u8* data, + SharedRenderState* render_state, + ScopedProfilerNode& prof) { + if (m_vertices.close_to_full()) { + m_stats.flush_due_to_full++; + flush_pending(render_state, prof); + } + + float x; + float y; + memcpy(&x, data, 4); + memcpy(&y, data + 4, 4); + + u64 upper; + memcpy(&upper, data + 8, 8); + u32 zi = (upper >> 4) & 0xffffff; + float z; + memcpy(&z, &upper, 4); + + u8 f = (upper >> 36); + bool adc = !(upper & (1ull << 47)); + + if (m_state.next_vertex_starts_strip) { + m_state.next_vertex_starts_strip = false; + m_state.strip_warmup = 0; + } + + // push the vertex + auto& vert = m_vertices.vertices[m_vertices.next_vertex++]; + m_state.strip_warmup++; + if (adc && m_state.strip_warmup >= 3) { + m_vertices.indices[m_vertices.next_index++] = m_vertices.next_vertex - 1; + m_vertices.indices[m_vertices.next_index++] = m_vertices.next_vertex - 2; + m_vertices.indices[m_vertices.next_index++] = m_vertices.next_vertex - 3; + } + + if (!m_current_state_has_open_draw) { + m_current_state_has_open_draw = true; + if (m_next_free_draw >= m_draw_buffer.size()) { + ASSERT(false); + } + // pick a texture unit to use + u8 tex_unit = 0; + if (m_next_free_draw > 0) { + tex_unit = (m_draw_buffer[m_next_free_draw - 1].tex_unit + 1) % TEX_UNITS; + } + auto& draw = m_draw_buffer[m_next_free_draw++]; + draw.mode = m_state.as_mode; + draw.start_index = m_vertices.next_index; + draw.tbp = m_state.tbp; + draw.fix = m_state.gs_alpha.fix(); + // associate this draw with this texture unit. + draw.tex_unit = tex_unit; + m_state.tex_unit = tex_unit; + } + + // todo move to shader or something. + vert.xyz[0] = x * 16.f ; + vert.xyz[1] = y * 16.f; + vert.xyz[2] = z ; + vert.rgba = m_state.rgba; + vert.stq = math::Vector(m_state.s, m_state.t, m_state.Q); + vert.tex_unit = m_state.tex_unit; + vert.fog = f; + vert.flags = m_state.vertex_flags; +} + void DirectRenderer2::handle_alpha1(u64 val) { GsAlpha reg(val); if (m_state.gs_alpha != reg) { diff --git a/game/graphics/opengl_renderer/DirectRenderer2.h b/game/graphics/opengl_renderer/DirectRenderer2.h index e9dc6c7609..29369225f0 100644 --- a/game/graphics/opengl_renderer/DirectRenderer2.h +++ b/game/graphics/opengl_renderer/DirectRenderer2.h @@ -7,7 +7,7 @@ class DirectRenderer2 { public: - DirectRenderer2(u32 max_verts, u32 max_inds, u32 max_draws, const std::string& name); + DirectRenderer2(u32 max_verts, u32 max_inds, u32 max_draws, const std::string& name, bool use_ftoi_mod); void init_shaders(ShaderLibrary& shaders); void reset_state(); void render_gif_data(const u8* data, SharedRenderState* render_state, ScopedProfilerNode& prof); @@ -133,7 +133,13 @@ class DirectRenderer2 { // packed void handle_st_packed(const u8* data); void handle_rgbaq_packed(const u8* data); + void handle_xyzf2_packed(const u8* data, SharedRenderState* render_state, ScopedProfilerNode& prof); + + bool m_use_ftoi_mod = false; + void handle_xyzf2_mod_packed(const u8* data, + SharedRenderState* render_state, + ScopedProfilerNode& prof); }; diff --git a/game/graphics/opengl_renderer/GenericProgram.cpp b/game/graphics/opengl_renderer/GenericProgram.cpp index 6d67da416d..347042fb02 100644 --- a/game/graphics/opengl_renderer/GenericProgram.cpp +++ b/game/graphics/opengl_renderer/GenericProgram.cpp @@ -62,25 +62,28 @@ u16 clip(const Vf& vector, float val, u16 old_clip) { bool clipping_hack = true; +constexpr float kFogFloatOffset = 3071.f; + // clang-format off void GenericRenderer::mscal0() { // L4: // iaddiu vi01, vi00, 0x381 | nop vu.vi01 = 0x381; /* 897 */ // lq.xyzw vf01, 0(vi01) | nop - lq_buffer(Mask::xyzw, vu.vf01, vu.vi01); + lq_buffer(Mask::xyzw, gen.fog, vu.vi01); // lq.xyzw vf02, 1(vi01) | nop - lq_buffer(Mask::xyzw, vu.vf02, vu.vi01 + 1); + lq_buffer(Mask::xyzw, gen.adgif_tmpl, vu.vi01 + 1); // lq.xyzw vf03, 2(vi01) | nop - lq_buffer(Mask::xyzw, vu.vf03, vu.vi01 + 2); + // lq_buffer(Mask::xyzw, vu.vf03, vu.vi01 + 2); + vu.vf03.fill(0); // lq.xyzw vf04, 3(vi01) | nop - lq_buffer(Mask::xyzw, vu.vf04, vu.vi01 + 3); + lq_buffer(Mask::xyzw, gen.hvdf_off, vu.vi01 + 3); // lq.xyzw vf05, 4(vi01) | nop - lq_buffer(Mask::xyzw, vu.vf05, vu.vi01 + 4); + lq_buffer(Mask::xyzw, gen.hmge_scale, vu.vi01 + 4); // lq.xyzw vf06, 5(vi01) | nop - lq_buffer(Mask::xyzw, vu.vf06, vu.vi01 + 5); + // lq_buffer(Mask::xyzw, vu.vf06, vu.vi01 + 5); not used // lq.xyzw vf07, 6(vi01) | nop - lq_buffer(Mask::xyzw, vu.vf07, vu.vi01 + 6); + lq_buffer(Mask::xyzw, gen.guard, vu.vi01 + 6); // L5: // iaddiu vi13, vi00, 0x363 | nop vu.vi13 = 0x363; /* 867 */ @@ -169,269 +172,7 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S ASSERT(false); } - // BRANCH! - // b L4 | nop 0 - bc = true; - // nop | nop 1 - - if (bc) { goto L4; } - - // BRANCH! - // b L5 | nop 2 - bc = true; - // nop | nop 3 - - if (bc) { goto L5; } - - // BRANCH! - // b L84 | nop 4 - bc = true; - // nop | nop 5 - - if (bc) { goto L84; } - - // BRANCH! - // b L33 | nop 6 - bc = true; - // nop | nop 7 - - if (bc) { goto L33; } - - // BRANCH! - // b L8 | nop 8 - bc = true; - // nop | nop 9 - - if (bc) { goto L8; } - - // BRANCH! - // b L1 | nop 10 - bc = true; - // nop | nop 11 - - if (bc) { goto L1; } - - // BRANCH! - // b L6 | nop 12 - bc = true; - // nop | nop 13 - - if (bc) { goto L6; } - - L1: - // iaddiu vi02, vi00, 0x381 | nop 14 - vu.vi02 = 0x381; /* 897 */ - // lq.xyzw vf31, 7(vi02) | nop 15 - lq_buffer(Mask::xyzw, vu.vf31, vu.vi02 + 7); - // isubiu vi02, vi13, 0x363 | addw.z vf22, vf00, vf00 16 - vu.vf22.add(Mask::z, vu.vf00, vu.vf00.w()); vu.vi02 = vu.vi13 - 0x363; /* 867 */ - // iaddiu vi13, vi13, 0x1e | addw.z vf23, vf00, vf00 17 - vu.vf23.add(Mask::z, vu.vf00, vu.vf00.w()); vu.vi13 = vu.vi13 + 0x1e; /* 30 */ - // BRANCH! - // ibne vi00, vi02, L2 | addw.z vf24, vf00, vf00 18 - vu.vf24.add(Mask::z, vu.vf00, vu.vf00.w()); bc = (vu.vi02 != 0); - // lq.xyzw vf03, 899(vi00) | addw.z vf25, vf00, vf00 19 - vu.vf25.add(Mask::z, vu.vf00, vu.vf00.w()); lq_buffer(Mask::xyzw, vu.vf03, 899); - if (bc) { goto L2; } - - // iaddiu vi13, vi00, 0x345 | nop 20 - vu.vi13 = 0x345; /* 837 */ - L2: - // ilw.x vi01, 5(vi13) | nop 21 - ilw_buffer(Mask::x, vu.vi01, vu.vi13 + 5); - // iaddi vi07, vi12, 0xa | nop 22 - vu.vi07 = vu.vi12 + 10; - // iaddi vi05, vi01, -0x1 | nop 23 - vu.vi05 = vu.vi01 + -1; - // lq.xyzw vf17, 4(vi13) | nop 24 - lq_buffer(Mask::xyzw, vu.vf17, vu.vi13 + 4); - // sq.xyzw vf31, 5(vi13) | nop 25 - sq_buffer(Mask::xyzw, vu.vf31, vu.vi13 + 5); - // sq.xyzw vf31, 6(vi13) | nop 26 - sq_buffer(Mask::xyzw, vu.vf31, vu.vi13 + 6); - // 0.0078125 | nop :i 27 - vu.I = 0.0078125; - // move.xyzw vf13, vf17 | muli.xyz vf17, vf17, I 28 - vu.vf17.mul(Mask::xyz, vu.vf17, vu.I); vu.vf13.move(Mask::xyzw, vu.vf17); - // move.xyzw vf14, vf17 | nop 29 - vu.vf14.move(Mask::xyzw, vu.vf17); - // move.xyzw vf15, vf17 | nop 30 - vu.vf15.move(Mask::xyzw, vu.vf17); - // move.xyzw vf16, vf17 | nop 31 - vu.vf16.move(Mask::xyzw, vu.vf17); - // sq.xyzw vf03, 4(vi13) | nop 32 - sq_buffer(Mask::xyzw, vu.vf03, vu.vi13 + 4); - // isw.w vi01, 5(vi13) | nop 33 - isw_buffer(Mask::w, vu.vi01, vu.vi13 + 5); - // isw.w vi00, 6(vi13) | nop 34 - isw_buffer(Mask::w, vu.vi00, vu.vi13 + 6); - L3: - // lq.xyz vf13, 0(vi07) | nop 35 - lq_buffer(Mask::xyz, vu.vf13, vu.vi07); - // lq.xyz vf14, 3(vi07) | nop 36 - lq_buffer(Mask::xyz, vu.vf14, vu.vi07 + 3); - // lq.xyz vf15, 6(vi07) | nop 37 - lq_buffer(Mask::xyz, vu.vf15, vu.vi07 + 6); - // lq.xyz vf16, 9(vi07) | nop 38 - lq_buffer(Mask::xyz, vu.vf16, vu.vi07 + 9); - // iaddi vi07, vi07, 0xc | itof0.xyz vf13, vf13 39 - vu.vf13.itof0(Mask::xyz, vu.vf13); vu.vi07 = vu.vi07 + 12; - // iaddi vi05, vi05, -0x4 | itof0.xyz vf14, vf14 40 - vu.vf14.itof0(Mask::xyz, vu.vf14); vu.vi05 = vu.vi05 + -4; - // nop | itof0.xyz vf15, vf15 41 - vu.vf15.itof0(Mask::xyz, vu.vf15); - // nop | itof0.xyz vf16, vf16 42 - vu.vf16.itof0(Mask::xyz, vu.vf16); - // nop | mul.xyz vf13, vf13, vf17 43 - vu.vf13.mul(Mask::xyz, vu.vf13, vu.vf17); - // nop | mul.xyz vf14, vf14, vf17 44 - vu.vf14.mul(Mask::xyz, vu.vf14, vu.vf17); - // nop | mul.xyz vf15, vf15, vf17 45 - vu.vf15.mul(Mask::xyz, vu.vf15, vu.vf17); - // nop | mul.xyz vf16, vf16, vf17 46 - vu.vf16.mul(Mask::xyz, vu.vf16, vu.vf17); - // lq.xyzw vf18, -11(vi07) | ftoi0.xyz vf13, vf13 47 - vu.vf13.ftoi0(Mask::xyz, vu.vf13); lq_buffer(Mask::xyzw, vu.vf18, vu.vi07 + -11); - // lq.xyzw vf19, -8(vi07) | ftoi0.xyz vf14, vf14 48 - vu.vf14.ftoi0(Mask::xyz, vu.vf14); lq_buffer(Mask::xyzw, vu.vf19, vu.vi07 + -8); - // lq.xyzw vf20, -5(vi07) | ftoi0.xyz vf15, vf15 49 - vu.vf15.ftoi0(Mask::xyz, vu.vf15); lq_buffer(Mask::xyzw, vu.vf20, vu.vi07 + -5); - // lq.xyzw vf21, -2(vi07) | ftoi0.xyz vf16, vf16 50 - vu.vf16.ftoi0(Mask::xyz, vu.vf16); lq_buffer(Mask::xyzw, vu.vf21, vu.vi07 + -2); - // sq.xyzw vf13, -12(vi07) | itof0.xyzw vf18, vf18 51 - vu.vf18.itof0(Mask::xyzw, vu.vf18); sq_buffer(Mask::xyzw, vu.vf13, vu.vi07 + -12); - // sq.xyzw vf14, -9(vi07) | itof0.xyzw vf19, vf19 52 - vu.vf19.itof0(Mask::xyzw, vu.vf19); sq_buffer(Mask::xyzw, vu.vf14, vu.vi07 + -9); - // sq.xyzw vf15, -6(vi07) | itof0.xyzw vf20, vf20 53 - vu.vf20.itof0(Mask::xyzw, vu.vf20); sq_buffer(Mask::xyzw, vu.vf15, vu.vi07 + -6); - // sq.xyzw vf16, -3(vi07) | itof0.xyzw vf21, vf21 54 - vu.vf21.itof0(Mask::xyzw, vu.vf21); sq_buffer(Mask::xyzw, vu.vf16, vu.vi07 + -3); - // sq.xyzw vf18, -11(vi07) | nop 55 - sq_buffer(Mask::xyzw, vu.vf18, vu.vi07 + -11); - // sq.xyzw vf19, -8(vi07) | nop 56 - sq_buffer(Mask::xyzw, vu.vf19, vu.vi07 + -8); - // sq.xyzw vf20, -5(vi07) | nop 57 - sq_buffer(Mask::xyzw, vu.vf20, vu.vi07 + -5); - // BRANCH! - // ibgez vi05, L3 | nop 58 - bc = ((s16)vu.vi05) >= 0; - // sq.xyzw vf21, -2(vi07) | nop 59 - sq_buffer(Mask::xyzw, vu.vf21, vu.vi07 + -2); - if (bc) { goto L3; } - - // BRANCH! - // b L16 | nop 60 - bc = true; - // nop | nop 61 - - if (bc) { goto L16; } - - L4: - // iaddiu vi01, vi00, 0x381 | nop 62 - vu.vi01 = 0x381; /* 897 */ - // lq.xyzw vf01, 0(vi01) | nop 63 - lq_buffer(Mask::xyzw, vu.vf01, vu.vi01); - // lq.xyzw vf02, 1(vi01) | nop 64 - lq_buffer(Mask::xyzw, vu.vf02, vu.vi01 + 1); - // lq.xyzw vf03, 2(vi01) | nop 65 - lq_buffer(Mask::xyzw, vu.vf03, vu.vi01 + 2); - // lq.xyzw vf04, 3(vi01) | nop 66 - lq_buffer(Mask::xyzw, vu.vf04, vu.vi01 + 3); - // lq.xyzw vf05, 4(vi01) | nop 67 - lq_buffer(Mask::xyzw, vu.vf05, vu.vi01 + 4); - // lq.xyzw vf06, 5(vi01) | nop 68 - lq_buffer(Mask::xyzw, vu.vf06, vu.vi01 + 5); - // lq.xyzw vf07, 6(vi01) | nop 69 - lq_buffer(Mask::xyzw, vu.vf07, vu.vi01 + 6); - L5: - // iaddiu vi13, vi00, 0x363 | nop 70 - vu.vi13 = 0x363; /* 867 */ - // iaddi vi02, vi13, 0x5 | nop 71 - vu.vi02 = vu.vi13 + 5; - // iaddi vi12, vi00, 0x0 | nop 72 - vu.vi12 = 0; - // isw.x vi02, 9(vi01) | nop 73 - isw_buffer(Mask::x, vu.vi02, vu.vi01 + 9); - // isw.y vi02, 9(vi01) | nop 74 - isw_buffer(Mask::y, vu.vi02, vu.vi01 + 9); - // sq.xyzw vf00, 907(vi00) | nop 75 - sq_buffer(Mask::xyzw, vu.vf00, 907); - // sq.xyzw vf00, 914(vi00) | nop 76 - sq_buffer(Mask::xyzw, vu.vf00, 914); - // sq.xyzw vf00, 921(vi00) | nop 77 - sq_buffer(Mask::xyzw, vu.vf00, 921); - // sq.xyzw vf00, 928(vi00) | nop 78 - sq_buffer(Mask::xyzw, vu.vf00, 928); - // sq.xyzw vf00, 935(vi00) | nop 79 - sq_buffer(Mask::xyzw, vu.vf00, 935); - // sq.xyzw vf00, 942(vi00) | nop 80 - sq_buffer(Mask::xyzw, vu.vf00, 942); - // iaddiu vi01, vi00, 0x40f | nop 81 - vu.vi01 = 0x40f; /* 1039 */ - // isw.z vi01, 907(vi00) | nop 82 - isw_buffer(Mask::z, vu.vi01, 907); - // iaddiu vi01, vi00, 0x411 | nop 83 - vu.vi01 = 0x411; /* 1041 */ - // isw.z vi01, 914(vi00) | nop 84 - isw_buffer(Mask::z, vu.vi01, 914); - // iaddiu vi01, vi00, 0x413 | nop 85 - vu.vi01 = 0x413; /* 1043 */ - // isw.z vi01, 921(vi00) | nop 86 - isw_buffer(Mask::z, vu.vi01, 921); - // iaddiu vi01, vi00, 0x415 | nop 87 - vu.vi01 = 0x415; /* 1045 */ - // isw.z vi01, 928(vi00) | nop 88 - isw_buffer(Mask::z, vu.vi01, 928); - // iaddiu vi01, vi00, 0x417 | nop 89 - vu.vi01 = 0x417; /* 1047 */ - // isw.z vi01, 935(vi00) | nop 90 - isw_buffer(Mask::z, vu.vi01, 935); - // iaddiu vi01, vi00, 0x419 | nop :e 91 - vu.vi01 = 0x419; /* 1049 */ - // isw.z vi01, 942(vi00) | nop 92 - isw_buffer(Mask::z, vu.vi01, 942); - return; - - L6: - // iaddiu vi01, vi00, 0x381 | nop 93 - vu.vi01 = 0x381; /* 897 */ - // ilw.z vi13, 9(vi01) | nop 94 - ilw_buffer(Mask::z, vu.vi13, vu.vi01 + 9); - // ilw.w vi12, 9(vi01) | nop 95 - ilw_buffer(Mask::w, vu.vi12, vu.vi01 + 9); - // iaddi vi02, vi13, 0x6 | nop 96 - vu.vi02 = vu.vi13 + 6; - // isw.x vi02, 9(vi01) | nop :e 97 - isw_buffer(Mask::x, vu.vi02, vu.vi01 + 9); - // isw.y vi02, 9(vi01) | nop 98 - isw_buffer(Mask::y, vu.vi02, vu.vi01 + 9); - return; - - // isubiu vi02, vi13, 0x363 | nop 99 - vu.vi02 = vu.vi13 - 0x363; /* 867 */ - // iaddiu vi13, vi13, 0x1e | nop 100 - vu.vi13 = vu.vi13 + 0x1e; /* 30 */ - // BRANCH! - // ibne vi00, vi02, L7 | nop 101 - bc = (vu.vi02 != 0); - // isubiu vi01, vi01, 0x100 | nop 102 - vu.vi01 = vu.vi01 - 0x100; /* 256 */ - if (bc) { goto L7; } - - // iaddiu vi13, vi00, 0x345 | nop 103 - vu.vi13 = 0x345; /* 837 */ - L7: - // iaddi vi03, vi13, 0x7 | nop 104 - vu.vi03 = vu.vi13 + 7; - // iaddi vi03, vi13, 0x7 | nop 105 - vu.vi03 = vu.vi13 + 7; - // isw.x vi03, 906(vi00) | nop 106 - isw_buffer(Mask::x, vu.vi03, 906); - // jr vi15 | nop 107 - ASSERT(false); - // isw.y vi03, 906(vi00) | nop 108 - isw_buffer(Mask::y, vu.vi03, 906); - L8: + L8: // R // isubiu vi02, vi13, 0x363 | addw.z vf22, vf00, vf00 109 vu.vf22.add(Mask::z, vu.vf00, vu.vf00.w()); vu.vi02 = vu.vi13 - 0x363; /* 867 */ // iaddiu vi13, vi13, 0x1e | addw.z vf23, vf00, vf00 110 @@ -445,7 +186,7 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // iaddiu vi13, vi00, 0x345 | nop 113 vu.vi13 = 0x345; /* 837 */ - L9: + L9: // R // iaddi vi03, vi13, 0x7 | nop 114 vu.vi03 = vu.vi13 + 7; // ilw.w vi01, 5(vi13) | nop 115 @@ -455,16 +196,16 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // iaddi vi10, vi12, 0x9 | subw.w vf18, vf00, vf00 117 vu.vf18.sub(Mask::w, vu.vf00, vu.vf00.w()); vu.vi10 = vu.vi12 + 9; // lq.xyzw vf08, 0(vi13) | subw.w vf19, vf00, vf00 118 - vu.vf19.sub(Mask::w, vu.vf00, vu.vf00.w()); lq_buffer(Mask::xyzw, vu.vf08, vu.vi13); + vu.vf19.sub(Mask::w, vu.vf00, vu.vf00.w()); lq_buffer(Mask::xyzw, gen.mat0, vu.vi13); // lq.xyzw vf09, 1(vi13) | subw.w vf20, vf00, vf00 119 - vu.vf20.sub(Mask::w, vu.vf00, vu.vf00.w()); lq_buffer(Mask::xyzw, vu.vf09, vu.vi13 + 1); + vu.vf20.sub(Mask::w, vu.vf00, vu.vf00.w()); lq_buffer(Mask::xyzw, gen.mat1, vu.vi13 + 1); // lq.xyzw vf10, 2(vi13) | subw.w vf21, vf00, vf00 120 vu.vf21.sub(Mask::w, vu.vf00, vu.vf00.w()); lq_buffer(Mask::xyzw, vu.vf10, vu.vi13 + 2); // lq.xyzw vf11, 3(vi13) | ftoi12.z vf22, vf22 121 // fmt::print("a: [{}] [{}]\n", vu.vf22.print(), vu.vf23.print()); - vu.vf22.ftoi12(Mask::z, vu.vf22); lq_buffer(Mask::xyzw, vu.vf11, vu.vi13 + 3); + vu.vf22.ftoi12_check(Mask::z, vu.vf22); lq_buffer(Mask::xyzw, vu.vf11, vu.vi13 + 3); // iadd vi02, vi01, vi01 | ftoi12.z vf23, vf23 122 - vu.vf23.ftoi12(Mask::z, vu.vf23); vu.vi02 = vu.vi01 + vu.vi01; + vu.vf23.ftoi12_check(Mask::z, vu.vf23); vu.vi02 = vu.vi01 + vu.vi01; // iadd vi01, vi01, vi02 | sub.xyzw vf16, vf16, vf16 123 vu.vf16.set_zero(); vu.vi01 = vu.vi01 + vu.vi02; // iaddi vi11, vi00, -0x2 | sub.xyzw vf17, vf17, vf17 124 @@ -476,9 +217,9 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // mtir vi02, vf22.x | mulaw.xyzw ACC, vf11, vf00 127 vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.vi02 = vu.vf22.x_as_u16(); // iaddi vi10, vi10, 0x3 | maddax.xyzw ACC, vf08, vf16 128 - vu.acc.madda(Mask::xyzw, vu.vf08, vu.vf16.x()); vu.vi10 = vu.vi10 + 3; + vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf16.x()); vu.vi10 = vu.vi10 + 3; // lq.xy vf23, 0(vi10) | madday.xyzw ACC, vf09, vf16 129 - vu.acc.madda(Mask::xyzw, vu.vf09, vu.vf16.y()); lq_buffer(Mask::xy, vu.vf23, vu.vi10); + vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf16.y()); lq_buffer(Mask::xy, vu.vf23, vu.vi10); // lq.xyz vf17, 2(vi10) | nop 130 lq_buffer(Mask::xyz, vu.vf17, vu.vi10 + 2); // iand vi06, vi02, vi11 | nop 131 @@ -487,19 +228,19 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S vu.acc.madd(Mask::xyzw, vu.vf12, vu.vf10, vu.vf16.z()); vu.vf22.mfir(Mask::x, vu.vi06); // iadd vi14, vi10, vi01 | ftoi12.z vf24, vf24 133 // fmt::print("b: [{}] [{}]\n", vu.vf24.print(), vu.vf25.print()); - vu.vf24.ftoi12(Mask::z, vu.vf24); vu.vi14 = vu.vi10 + vu.vi01; + vu.vf24.ftoi12_check(Mask::z, vu.vf24); vu.vi14 = vu.vi10 + vu.vi01; // isw.w vi12, 906(vi00) | ftoi12.z vf25, vf25 134 - vu.vf25.ftoi12(Mask::z, vu.vf25); isw_buffer(Mask::w, vu.vi12, 906); + vu.vf25.ftoi12_check(Mask::z, vu.vf25); isw_buffer(Mask::w, vu.vi12, 906); // nop | nop 135 // div Q, vf01.x, vf12.w | itof12.xyz vf18, vf22 136 - vu.vf18.itof12(Mask::xyz, vu.vf22); vu.Q = vu.vf01.x() / vu.vf12.w(); + vu.vf18.itof12(Mask::xyz, vu.vf22); vu.Q = gen.fog.x() / vu.vf12.w(); // mtir vi03, vf23.x | mulaw.xyzw ACC, vf11, vf00 137 vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.vi03 = vu.vf23.x_as_u16(); // iaddi vi10, vi10, 0x3 | maddax.xyzw ACC, vf08, vf17 138 - vu.acc.madda(Mask::xyzw, vu.vf08, vu.vf17.x()); vu.vi10 = vu.vi10 + 3; + vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf17.x()); vu.vi10 = vu.vi10 + 3; // lq.xy vf24, 0(vi10) | madday.xyzw ACC, vf09, vf17 139 - vu.acc.madda(Mask::xyzw, vu.vf09, vu.vf17.y()); lq_buffer(Mask::xy, vu.vf24, vu.vi10); + vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf17.y()); lq_buffer(Mask::xy, vu.vf24, vu.vi10); // lq.xyz vf16, 2(vi10) | nop 140 lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); // iand vi07, vi03, vi11 | nop 141 @@ -513,17 +254,17 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // nop | nop 145 // div Q, vf01.x, vf13.w | itof12.xyz vf19, vf23 146 - vu.vf19.itof12(Mask::xyz, vu.vf23); vu.Q = vu.vf01.x() / vu.vf13.w(); + vu.vf19.itof12(Mask::xyz, vu.vf23); vu.Q = gen.fog.x() / vu.vf13.w(); // nop | add.xyzw vf12, vf12, vf04 147 - vu.vf12.add(Mask::xyzw, vu.vf12, vu.vf04); + vu.vf12.add(Mask::xyzw, vu.vf12, gen.hvdf_off); // mtir vi04, vf24.x | mulaw.xyzw ACC, vf11, vf00 148 vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.vi04 = vu.vf24.x_as_u16(); // iaddi vi10, vi10, 0x3 | maddax.xyzw ACC, vf08, vf16 149 - vu.acc.madda(Mask::xyzw, vu.vf08, vu.vf16.x()); vu.vi10 = vu.vi10 + 3; + vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf16.x()); vu.vi10 = vu.vi10 + 3; // lq.xy vf25, 0(vi10) | madday.xyzw ACC, vf09, vf16 150 - vu.acc.madda(Mask::xyzw, vu.vf09, vu.vf16.y()); lq_buffer(Mask::xy, vu.vf25, vu.vi10); + vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf16.y()); lq_buffer(Mask::xy, vu.vf25, vu.vi10); // lq.xyz vf17, 2(vi10) | miniz.w vf12, vf12, vf01 151 - vu.vf12.mini(Mask::w, vu.vf12, vu.vf01.z()); lq_buffer(Mask::xyz, vu.vf17, vu.vi10 + 2); + vu.vf12.mini(Mask::w, vu.vf12, gen.fog.z()); lq_buffer(Mask::xyz, vu.vf17, vu.vi10 + 2); // iand vi08, vi04, vi11 | nop 152 vu.vi08 = vu.vi04 & vu.vi11; // mfir.x vf24, vi08 | maddz.xyzw vf14, vf10, vf16 153 @@ -533,33 +274,33 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // nop | mul.xyz vf19, vf19, Q 155 vu.vf19.mul(Mask::xyz, vu.vf19, vu.Q); // iaddi vi14, vi14, 0x9 | maxy.w vf12, vf12, vf01 156 - vu.vf12.max(Mask::w, vu.vf12, vu.vf01.y()); vu.vi14 = vu.vi14 + 9; + vu.vf12.max(Mask::w, vu.vf12, gen.fog.y()); vu.vi14 = vu.vi14 + 9; // fmt::print("vf12-1a: [{}]\n", vu.vf12.print()); -L10: +L10: // R // fmt::print("vf12-1b: [{}]\n", vu.vf12.print()); // div Q, vf01.x, vf14.w | itof12.xyz vf20, vf24 157 - vu.vf20.itof12(Mask::xyz, vu.vf24); vu.Q = vu.vf01.x() / vu.vf14.w(); + vu.vf20.itof12(Mask::xyz, vu.vf24); vu.Q = gen.fog.x() / vu.vf14.w(); // BRANCH! // ibeq vi02, vi06, L11 | add.xyzw vf13, vf13, vf04 158 - vu.vf13.add(Mask::xyzw, vu.vf13, vu.vf04); bc = (vu.vi02 == vu.vi06); + vu.vf13.add(Mask::xyzw, vu.vf13, gen.hvdf_off); bc = (vu.vi02 == vu.vi06); // mtir vi05, vf25.x | mulaw.xyzw ACC, vf11, vf00 159 vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.vi05 = vu.vf25.x_as_u16(); if (bc) { goto L11; } // nop | addw.w vf12, vf12, vf01 160 - vu.vf12.add(Mask::w, vu.vf12, vu.vf01.w()); - L11: + vu.vf12.add(Mask::w, vu.vf12, kFogFloatOffset); + L11: // R // iaddi vi10, vi10, 0x3 | maddax.xyzw ACC, vf08, vf17 161 - vu.acc.madda(Mask::xyzw, vu.vf08, vu.vf17.x()); vu.vi10 = vu.vi10 + 3; + vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf17.x()); vu.vi10 = vu.vi10 + 3; // lq.xy vf22, 0(vi10) | madday.xyzw ACC, vf09, vf17 162 - vu.acc.madda(Mask::xyzw, vu.vf09, vu.vf17.y()); lq_buffer(Mask::xy, vu.vf22, vu.vi10); + vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf17.y()); lq_buffer(Mask::xy, vu.vf22, vu.vi10); // lq.xyz vf16, 2(vi10) | miniz.w vf13, vf13, vf01 163 - vu.vf13.mini(Mask::w, vu.vf13, vu.vf01.z()); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); + vu.vf13.mini(Mask::w, vu.vf13, gen.fog.z()); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); // fmt::print("vf16 vertex [{}] @ \n", vu.vf16.print(), vu.vi10 + 2); // iand vi09, vi05, vi11 | ftoi4.xyzw vf12, vf12 164 - vu.vf12.ftoi4(Mask::xyzw, vu.vf12); vu.vi09 = vu.vi05 & vu.vi11; + vu.vf12.ftoi4_check(Mask::xyzw, vu.vf12); vu.vi09 = vu.vi05 & vu.vi11; // mfir.x vf25, vi09 | maddz.xyzw vf15, vf10, vf17 165 vu.acc.madd(Mask::xyzw, vu.vf15, vu.vf10, vu.vf17.z()); vu.vf25.mfir(Mask::x, vu.vi09); // sq.xyzw vf18, -12(vi10) | mul.xyz vf14, vf14, Q 166 @@ -569,29 +310,29 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S vu.vf20.mul(Mask::xyz, vu.vf20, vu.Q); bc = (vu.vi14 == vu.vi10); // fmt::print("store: {} {}\n", vu.vi10 - 10, vu.vf12.print_hex()); // sq.xyzw vf12, -10(vi10) | maxy.w vf13, vf13, vf01 168 - vu.vf13.max(Mask::w, vu.vf13, vu.vf01.y()); sq_buffer(Mask::xyzw, vu.vf12, vu.vi10 + -10); + vu.vf13.max(Mask::w, vu.vf13, gen.fog.y()); sq_buffer(Mask::xyzw, vu.vf12, vu.vi10 + -10); if (bc) { goto L15; } // div Q, vf01.x, vf15.w | itof12.xyz vf21, vf25 169 - vu.vf21.itof12(Mask::xyz, vu.vf25); vu.Q = vu.vf01.x() / vu.vf15.w(); + vu.vf21.itof12(Mask::xyz, vu.vf25); vu.Q = gen.fog.x() / vu.vf15.w(); // BRANCH! // ibeq vi03, vi07, L12 | add.xyzw vf14, vf14, vf04 170 - vu.vf14.add(Mask::xyzw, vu.vf14, vu.vf04); bc = (vu.vi03 == vu.vi07); + vu.vf14.add(Mask::xyzw, vu.vf14, gen.hvdf_off); bc = (vu.vi03 == vu.vi07); // mtir vi02, vf22.x | mulaw.xyzw ACC, vf11, vf00 171 vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.vi02 = vu.vf22.x_as_u16(); if (bc) { goto L12; } // nop | addw.w vf13, vf13, vf01 172 - vu.vf13.add(Mask::w, vu.vf13, vu.vf01.w()); - L12: + vu.vf13.add(Mask::w, vu.vf13, kFogFloatOffset); + L12: // R // iaddi vi10, vi10, 0x3 | maddax.xyzw ACC, vf08, vf16 173 - vu.acc.madda(Mask::xyzw, vu.vf08, vu.vf16.x()); vu.vi10 = vu.vi10 + 3; + vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf16.x()); vu.vi10 = vu.vi10 + 3; // lq.xy vf23, 0(vi10) | madday.xyzw ACC, vf09, vf16 174 - vu.acc.madda(Mask::xyzw, vu.vf09, vu.vf16.y()); lq_buffer(Mask::xy, vu.vf23, vu.vi10); + vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf16.y()); lq_buffer(Mask::xy, vu.vf23, vu.vi10); // lq.xyz vf17, 2(vi10) | miniz.w vf14, vf14, vf01 175 - vu.vf14.mini(Mask::w, vu.vf14, vu.vf01.z()); lq_buffer(Mask::xyz, vu.vf17, vu.vi10 + 2); + vu.vf14.mini(Mask::w, vu.vf14, gen.fog.z()); lq_buffer(Mask::xyz, vu.vf17, vu.vi10 + 2); // iand vi06, vi02, vi11 | ftoi4.xyzw vf13, vf13 176 - vu.vf13.ftoi4(Mask::xyzw, vu.vf13); vu.vi06 = vu.vi02 & vu.vi11; + vu.vf13.ftoi4_check(Mask::xyzw, vu.vf13); vu.vi06 = vu.vi02 & vu.vi11; // mfir.x vf22, vi06 | maddz.xyzw vf12, vf10, vf16 177 vu.acc.madd(Mask::xyzw, vu.vf12, vu.vf10, vu.vf16.z()); vu.vf22.mfir(Mask::x, vu.vi06); // fmt::print("vf12 transformed: [{}]\n", vu.vf12.print()); @@ -601,29 +342,29 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // ibeq vi14, vi10, L15 | mul.xyz vf21, vf21, Q 179 vu.vf21.mul(Mask::xyz, vu.vf21, vu.Q); bc = (vu.vi14 == vu.vi10); // sq.xyzw vf13, -10(vi10) | maxy.w vf14, vf14, vf01 180 - vu.vf14.max(Mask::w, vu.vf14, vu.vf01.y()); sq_buffer(Mask::xyzw, vu.vf13, vu.vi10 + -10); + vu.vf14.max(Mask::w, vu.vf14, gen.fog.y()); sq_buffer(Mask::xyzw, vu.vf13, vu.vi10 + -10); if (bc) { goto L15; } // div Q, vf01.x, vf12.w | itof12.xyz vf18, vf22 181 - vu.vf18.itof12(Mask::xyz, vu.vf22); vu.Q = vu.vf01.x() / vu.vf12.w(); + vu.vf18.itof12(Mask::xyz, vu.vf22); vu.Q = gen.fog.x() / vu.vf12.w(); // BRANCH! // ibeq vi04, vi08, L13 | add.xyzw vf15, vf15, vf04 182 - vu.vf15.add(Mask::xyzw, vu.vf15, vu.vf04); bc = (vu.vi04 == vu.vi08); + vu.vf15.add(Mask::xyzw, vu.vf15, gen.hvdf_off); bc = (vu.vi04 == vu.vi08); // mtir vi03, vf23.x | mulaw.xyzw ACC, vf11, vf00 183 vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.vi03 = vu.vf23.x_as_u16(); if (bc) { goto L13; } // nop | addw.w vf14, vf14, vf01 184 - vu.vf14.add(Mask::w, vu.vf14, vu.vf01.w()); - L13: + vu.vf14.add(Mask::w, vu.vf14, kFogFloatOffset); + L13: // R // iaddi vi10, vi10, 0x3 | maddax.xyzw ACC, vf08, vf17 185 - vu.acc.madda(Mask::xyzw, vu.vf08, vu.vf17.x()); vu.vi10 = vu.vi10 + 3; + vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf17.x()); vu.vi10 = vu.vi10 + 3; // lq.xy vf24, 0(vi10) | madday.xyzw ACC, vf09, vf17 186 - vu.acc.madda(Mask::xyzw, vu.vf09, vu.vf17.y()); lq_buffer(Mask::xy, vu.vf24, vu.vi10); + vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf17.y()); lq_buffer(Mask::xy, vu.vf24, vu.vi10); // lq.xyz vf16, 2(vi10) | miniz.w vf15, vf15, vf01 187 - vu.vf15.mini(Mask::w, vu.vf15, vu.vf01.z()); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); + vu.vf15.mini(Mask::w, vu.vf15, gen.fog.z()); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); // iand vi07, vi03, vi11 | ftoi4.xyzw vf14, vf14 188 - vu.vf14.ftoi4(Mask::xyzw, vu.vf14); vu.vi07 = vu.vi03 & vu.vi11; + vu.vf14.ftoi4_check(Mask::xyzw, vu.vf14); vu.vi07 = vu.vi03 & vu.vi11; // mfir.x vf23, vi07 | maddz.xyzw vf13, vf10, vf17 189 vu.acc.madd(Mask::xyzw, vu.vf13, vu.vf10, vu.vf17.z()); vu.vf23.mfir(Mask::x, vu.vi07); // sq.xyzw vf20, -12(vi10) | mul.xyz vf12, vf12, Q 190 @@ -632,29 +373,29 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // ibeq vi14, vi10, L15 | mul.xyz vf18, vf18, Q 191 vu.vf18.mul(Mask::xyz, vu.vf18, vu.Q); bc = (vu.vi14 == vu.vi10); // sq.xyzw vf14, -10(vi10) | maxy.w vf15, vf15, vf01 192 - vu.vf15.max(Mask::w, vu.vf15, vu.vf01.y()); sq_buffer(Mask::xyzw, vu.vf14, vu.vi10 + -10); + vu.vf15.max(Mask::w, vu.vf15, gen.fog.y()); sq_buffer(Mask::xyzw, vu.vf14, vu.vi10 + -10); if (bc) { goto L15; } // div Q, vf01.x, vf13.w | itof12.xyz vf19, vf23 193 - vu.vf19.itof12(Mask::xyz, vu.vf23); vu.Q = vu.vf01.x() / vu.vf13.w(); + vu.vf19.itof12(Mask::xyz, vu.vf23); vu.Q = gen.fog.x() / vu.vf13.w(); // BRANCH! // ibeq vi05, vi09, L14 | add.xyzw vf12, vf12, vf04 194 - vu.vf12.add(Mask::xyzw, vu.vf12, vu.vf04); bc = (vu.vi05 == vu.vi09); + vu.vf12.add(Mask::xyzw, vu.vf12, gen.hvdf_off); bc = (vu.vi05 == vu.vi09); // mtir vi04, vf24.x | mulaw.xyzw ACC, vf11, vf00 195 vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.vi04 = vu.vf24.x_as_u16(); if (bc) { goto L14; } // nop | addw.w vf15, vf15, vf01 196 - vu.vf15.add(Mask::w, vu.vf15, vu.vf01.w()); - L14: + vu.vf15.add(Mask::w, vu.vf15, kFogFloatOffset); + L14: // R // iaddi vi10, vi10, 0x3 | maddax.xyzw ACC, vf08, vf16 197 - vu.acc.madda(Mask::xyzw, vu.vf08, vu.vf16.x()); vu.vi10 = vu.vi10 + 3; + vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf16.x()); vu.vi10 = vu.vi10 + 3; // lq.xy vf25, 0(vi10) | madday.xyzw ACC, vf09, vf16 198 - vu.acc.madda(Mask::xyzw, vu.vf09, vu.vf16.y()); lq_buffer(Mask::xy, vu.vf25, vu.vi10); + vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf16.y()); lq_buffer(Mask::xy, vu.vf25, vu.vi10); // lq.xyz vf17, 2(vi10) | miniz.w vf12, vf12, vf01 199 - vu.vf12.mini(Mask::w, vu.vf12, vu.vf01.z()); lq_buffer(Mask::xyz, vu.vf17, vu.vi10 + 2); + vu.vf12.mini(Mask::w, vu.vf12, gen.fog.z()); lq_buffer(Mask::xyz, vu.vf17, vu.vi10 + 2); // iand vi08, vi04, vi11 | ftoi4.xyzw vf15, vf15 200 - vu.vf15.ftoi4(Mask::xyzw, vu.vf15); vu.vi08 = vu.vi04 & vu.vi11; + vu.vf15.ftoi4_check(Mask::xyzw, vu.vf15); vu.vi08 = vu.vi04 & vu.vi11; // mfir.x vf24, vi08 | maddz.xyzw vf14, vf10, vf16 201 vu.acc.madd(Mask::xyzw, vu.vf14, vu.vf10, vu.vf16.z()); vu.vf24.mfir(Mask::x, vu.vi08); // sq.xyzw vf21, -12(vi10) | mul.xyz vf13, vf13, Q 202 @@ -664,10 +405,10 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S vu.vf19.mul(Mask::xyz, vu.vf19, vu.Q); bc = (vu.vi14 != vu.vi10); // sq.xyzw vf15, -10(vi10) | maxy.w vf12, vf12, vf01 204 // fmt::print("reloop {} {}\n", vu.vi14, vu.vi10); - vu.vf12.max(Mask::w, vu.vf12, vu.vf01.y()); sq_buffer(Mask::xyzw, vu.vf15, vu.vi10 + -10); + vu.vf12.max(Mask::w, vu.vf12, gen.fog.y()); sq_buffer(Mask::xyzw, vu.vf15, vu.vi10 + -10); if (bc) { goto L10; } - L15: + L15: // R // BRANCH! // b L82 | nop 205 bc = true; @@ -675,400 +416,7 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S ilw_buffer(Mask::w, vu.vi12, 906); if (bc) { goto L82; } - // isubiu vi02, vi13, 0x363 | addw.z vf22, vf00, vf00 207 - vu.vf22.add(Mask::z, vu.vf00, vu.vf00.w()); vu.vi02 = vu.vi13 - 0x363; /* 867 */ - // iaddiu vi13, vi13, 0x1e | addw.z vf23, vf00, vf00 208 - vu.vf23.add(Mask::z, vu.vf00, vu.vf00.w()); vu.vi13 = vu.vi13 + 0x1e; /* 30 */ - // BRANCH! - // ibne vi00, vi02, L16 | addw.z vf24, vf00, vf00 209 - vu.vf24.add(Mask::z, vu.vf00, vu.vf00.w()); bc = (vu.vi02 != 0); - // nop | addw.z vf25, vf00, vf00 210 - vu.vf25.add(Mask::z, vu.vf00, vu.vf00.w()); - if (bc) { goto L16; } - - // iaddiu vi13, vi00, 0x345 | nop 211 - vu.vi13 = 0x345; /* 837 */ - L16: - // iaddi vi03, vi13, 0x7 | nop 212 - vu.vi03 = vu.vi13 + 7; - // ilw.w vi01, 5(vi13) | nop 213 - ilw_buffer(Mask::w, vu.vi01, vu.vi13 + 5); - // isw.x vi03, 906(vi00) | nop 214 - isw_buffer(Mask::x, vu.vi03, 906); - // iaddi vi10, vi12, 0x9 | subw.w vf18, vf00, vf00 215 - vu.vf18.sub(Mask::w, vu.vf00, vu.vf00.w()); vu.vi10 = vu.vi12 + 9; - // lq.xyzw vf08, 0(vi13) | subw.w vf19, vf00, vf00 216 - vu.vf19.sub(Mask::w, vu.vf00, vu.vf00.w()); lq_buffer(Mask::xyzw, vu.vf08, vu.vi13); - // lq.xyzw vf09, 1(vi13) | subw.w vf20, vf00, vf00 217 - vu.vf20.sub(Mask::w, vu.vf00, vu.vf00.w()); lq_buffer(Mask::xyzw, vu.vf09, vu.vi13 + 1); - // lq.xyzw vf10, 2(vi13) | subw.w vf21, vf00, vf00 218 - vu.vf21.sub(Mask::w, vu.vf00, vu.vf00.w()); lq_buffer(Mask::xyzw, vu.vf10, vu.vi13 + 2); - // lq.xyzw vf11, 3(vi13) | ftoi12.z vf22, vf22 219 - vu.vf22.ftoi12(Mask::z, vu.vf22); lq_buffer(Mask::xyzw, vu.vf11, vu.vi13 + 3); - // iadd vi02, vi01, vi01 | ftoi12.z vf23, vf23 220 - vu.vf23.ftoi12(Mask::z, vu.vf23); vu.vi02 = vu.vi01 + vu.vi01; - // iadd vi01, vi01, vi02 | sub.xyzw vf16, vf16, vf16 221 - vu.vf16.set_zero(); vu.vi01 = vu.vi01 + vu.vi02; - // iaddi vi11, vi00, -0x2 | nop 222 - vu.vi11 = -2; - // iadd vi14, vi10, vi01 | ftoi12.z vf24, vf24 223 - vu.vf24.ftoi12(Mask::z, vu.vf24); vu.vi14 = vu.vi10 + vu.vi01; - // isw.w vi12, 906(vi00) | ftoi12.z vf25, vf25 224 - vu.vf25.ftoi12(Mask::z, vu.vf25); isw_buffer(Mask::w, vu.vi12, 906); - // iaddi vi14, vi14, 0x9 | nop 225 - vu.vi14 = vu.vi14 + 9; - // lq.xy vf22, 0(vi10) | nop 226 - lq_buffer(Mask::xy, vu.vf22, vu.vi10); - // lq.xyz vf16, 2(vi10) | nop 227 - lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); - // nop | nop 228 - - // nop | nop 229 - - // nop | mulaw.xyzw ACC, vf11, vf00 230 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); - // mtir vi02, vf22.x | maddax.xyzw ACC, vf08, vf16 231 - vu.acc.madda(Mask::xyzw, vu.vf08, vu.vf16.x()); vu.vi02 = vu.vf22.x_as_u16(); - // iaddi vi10, vi10, 0x3 | madday.xyzw ACC, vf09, vf16 232 - vu.acc.madda(Mask::xyzw, vu.vf09, vu.vf16.y()); vu.vi10 = vu.vi10 + 3; - // lq.xy vf23, 0(vi10) | maddz.xyzw vf12, vf10, vf16 233 - vu.acc.madd(Mask::xyzw, vu.vf12, vu.vf10, vu.vf16.z()); lq_buffer(Mask::xy, vu.vf23, vu.vi10); - // lq.xyz vf16, 2(vi10) | nop 234 - lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); - // iand vi06, vi02, vi11 | nop 235 - vu.vi06 = vu.vi02 & vu.vi11; - // mfir.x vf22, vi06 | nop 236 - vu.vf22.mfir(Mask::x, vu.vi06); - // nop | nop 237 - - // nop | nop 238 - - // nop | nop 239 - - // nop | itof12.xyz vf18, vf22 240 - vu.vf18.itof12(Mask::xyz, vu.vf22); - // div Q, vf01.x, vf12.w | mul.xyzw vf26, vf12, vf05 241 - vu.vf26.mul(Mask::xyzw, vu.vf12, vu.vf05); vu.Q = vu.vf01.x() / vu.vf12.w(); - // nop | nop 242 - - // nop | mulaw.xyzw ACC, vf11, vf00 243 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); - // mtir vi03, vf23.x | maddax.xyzw ACC, vf08, vf16 244 - vu.acc.madda(Mask::xyzw, vu.vf08, vu.vf16.x()); vu.vi03 = vu.vf23.x_as_u16(); - // iaddi vi10, vi10, 0x3 | madday.xyzw ACC, vf09, vf16 245 - vu.acc.madda(Mask::xyzw, vu.vf09, vu.vf16.y()); vu.vi10 = vu.vi10 + 3; - // lq.xy vf24, 0(vi10) | maddz.xyzw vf13, vf10, vf16 246 - vu.acc.madd(Mask::xyzw, vu.vf13, vu.vf10, vu.vf16.z()); lq_buffer(Mask::xy, vu.vf24, vu.vi10); - // lq.xyz vf16, 2(vi10) | nop 247 - lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); - // iand vi07, vi03, vi11 | nop 248 - vu.vi07 = vu.vi03 & vu.vi11; - // mfir.x vf23, vi07 | mul.xyz vf12, vf12, Q 249 - vu.vf12.mul(Mask::xyz, vu.vf12, vu.Q); vu.vf23.mfir(Mask::x, vu.vi07); - // fcset 0x0 | nop 250 - ASSERT(false); - // nop | nop 251 - - // nop | mul.xyz vf18, vf18, Q 252 - vu.vf18.mul(Mask::xyz, vu.vf18, vu.Q); - // nop | itof12.xyz vf19, vf23 253 - vu.vf19.itof12(Mask::xyz, vu.vf23); - // div Q, vf01.x, vf13.w | mulaw.xyzw ACC, vf11, vf00 254 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.Q = vu.vf01.x() / vu.vf13.w(); - // nop | add.xyzw vf12, vf12, vf04 255 - vu.vf12.add(Mask::xyzw, vu.vf12, vu.vf04); - // nop | maddax.xyzw ACC, vf08, vf16 256 - vu.acc.madda(Mask::xyzw, vu.vf08, vu.vf16.x()); - // mtir vi04, vf24.x | madday.xyzw ACC, vf09, vf16 257 - vu.acc.madda(Mask::xyzw, vu.vf09, vu.vf16.y()); vu.vi04 = vu.vf24.x_as_u16(); - // iaddi vi10, vi10, 0x3 | maddz.xyzw vf14, vf10, vf16 258 - vu.acc.madd(Mask::xyzw, vu.vf14, vu.vf10, vu.vf16.z()); vu.vi10 = vu.vi10 + 3; - // lq.xy vf25, 0(vi10) | miniz.w vf12, vf12, vf01 259 - vu.vf12.mini(Mask::w, vu.vf12, vu.vf01.z()); lq_buffer(Mask::xy, vu.vf25, vu.vi10); - // lq.xyz vf16, 2(vi10) | mul.xyzw vf27, vf13, vf05 260 - vu.vf27.mul(Mask::xyzw, vu.vf13, vu.vf05); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); - // iand vi08, vi04, vi11 | nop 261 - vu.vi08 = vu.vi04 & vu.vi11; - // mfir.x vf24, vi08 | mul.xyz vf13, vf13, Q 262 - vu.vf13.mul(Mask::xyz, vu.vf13, vu.Q); vu.vf24.mfir(Mask::x, vu.vi08); - // nop | maxy.w vf12, vf12, vf01 263 - vu.vf12.max(Mask::w, vu.vf12, vu.vf01.y()); - // nop | clipw.xyz vf26, vf26 264 - ASSERT(false); cf = clip(vu.vf26, vu.vf26.w(), cf); - // nop | mul.xyz vf19, vf19, Q 265 - vu.vf19.mul(Mask::xyz, vu.vf19, vu.Q); - // BRANCH! - // ibeq vi02, vi06, L17 | itof12.xyz vf20, vf24 266 - vu.vf20.itof12(Mask::xyz, vu.vf24); bc = (vu.vi02 == vu.vi06); - // div Q, vf01.x, vf14.w | mulaw.xyzw ACC, vf11, vf00 267 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.Q = vu.vf01.x() / vu.vf14.w(); - if (bc) { goto L17; } - - // nop | addw.w vf12, vf12, vf01 268 - vu.vf12.add(Mask::w, vu.vf12, vu.vf01.w()); - L17: - // nop | add.xyzw vf13, vf13, vf04 269 - vu.vf13.add(Mask::xyzw, vu.vf13, vu.vf04); - // nop | maddax.xyzw ACC, vf08, vf16 270 - vu.acc.madda(Mask::xyzw, vu.vf08, vu.vf16.x()); - // mtir vi05, vf25.x | madday.xyzw ACC, vf09, vf16 271 - vu.acc.madda(Mask::xyzw, vu.vf09, vu.vf16.y()); vu.vi05 = vu.vf25.x_as_u16(); - // iaddi vi10, vi10, 0x3 | maddz.xyzw vf15, vf10, vf16 272 - vu.acc.madd(Mask::xyzw, vu.vf15, vu.vf10, vu.vf16.z()); vu.vi10 = vu.vi10 + 3; - // lq.xy vf22, 0(vi10) | miniz.w vf13, vf13, vf01 273 - vu.vf13.mini(Mask::w, vu.vf13, vu.vf01.z()); lq_buffer(Mask::xy, vu.vf22, vu.vi10); - // lq.xyz vf16, 2(vi10) | mul.xyzw vf28, vf14, vf05 274 - vu.vf28.mul(Mask::xyzw, vu.vf14, vu.vf05); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); - // iand vi09, vi05, vi11 | ftoi4.xyzw vf12, vf12 275 - vu.vf12.ftoi4(Mask::xyzw, vu.vf12); vu.vi09 = vu.vi05 & vu.vi11; - // mfir.x vf25, vi09 | mul.xyz vf14, vf14, Q 276 - vu.vf14.mul(Mask::xyz, vu.vf14, vu.Q); vu.vf25.mfir(Mask::x, vu.vi09); - // sq.xyzw vf18, -12(vi10) | maxy.w vf13, vf13, vf01 277 - vu.vf13.max(Mask::w, vu.vf13, vu.vf01.y()); sq_buffer(Mask::xyzw, vu.vf18, vu.vi10 + -12); - // BRANCH! - // ibeq vi14, vi10, L28 | clipw.xyz vf27, vf27 278 - ASSERT(false); cf = clip(vu.vf27, vu.vf27.w(), cf); bc = (vu.vi14 == vu.vi10); - // sq.xyzw vf12, -10(vi10) | mul.xyz vf20, vf20, Q 279 - vu.vf20.mul(Mask::xyz, vu.vf20, vu.Q); sq_buffer(Mask::xyzw, vu.vf12, vu.vi10 + -10); - if (bc) { goto L28; } - - // BRANCH! - // ibeq vi03, vi07, L18 | itof12.xyz vf21, vf25 280 - vu.vf21.itof12(Mask::xyz, vu.vf25); bc = (vu.vi03 == vu.vi07); - // div Q, vf01.x, vf15.w | mulaw.xyzw ACC, vf11, vf00 281 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.Q = vu.vf01.x() / vu.vf15.w(); - if (bc) { goto L18; } - - // nop | addw.w vf13, vf13, vf01 282 - vu.vf13.add(Mask::w, vu.vf13, vu.vf01.w()); - L18: - // nop | add.xyzw vf14, vf14, vf04 283 - vu.vf14.add(Mask::xyzw, vu.vf14, vu.vf04); - // nop | maddax.xyzw ACC, vf08, vf16 284 - vu.acc.madda(Mask::xyzw, vu.vf08, vu.vf16.x()); - // mtir vi02, vf22.x | madday.xyzw ACC, vf09, vf16 285 - vu.acc.madda(Mask::xyzw, vu.vf09, vu.vf16.y()); vu.vi02 = vu.vf22.x_as_u16(); - // iaddi vi10, vi10, 0x3 | maddz.xyzw vf12, vf10, vf16 286 - vu.acc.madd(Mask::xyzw, vu.vf12, vu.vf10, vu.vf16.z()); vu.vi10 = vu.vi10 + 3; - // lq.xy vf23, 0(vi10) | miniz.w vf14, vf14, vf01 287 - vu.vf14.mini(Mask::w, vu.vf14, vu.vf01.z()); lq_buffer(Mask::xy, vu.vf23, vu.vi10); - // lq.xyz vf16, 2(vi10) | mul.xyzw vf29, vf15, vf05 288 - vu.vf29.mul(Mask::xyzw, vu.vf15, vu.vf05); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); - // iand vi06, vi02, vi11 | ftoi4.xyzw vf13, vf13 289 - vu.vf13.ftoi4(Mask::xyzw, vu.vf13); vu.vi06 = vu.vi02 & vu.vi11; - // mfir.x vf22, vi06 | mul.xyz vf15, vf15, Q 290 - vu.vf15.mul(Mask::xyz, vu.vf15, vu.Q); vu.vf22.mfir(Mask::x, vu.vi06); - // sq.xyzw vf19, -12(vi10) | maxy.w vf14, vf14, vf01 291 - vu.vf14.max(Mask::w, vu.vf14, vu.vf01.y()); sq_buffer(Mask::xyzw, vu.vf19, vu.vi10 + -12); - // BRANCH! - // ibeq vi14, vi10, L28 | clipw.xyz vf28, vf28 292 - ASSERT(false); cf = clip(vu.vf28, vu.vf28.w(), cf); bc = (vu.vi14 == vu.vi10); - // sq.xyzw vf13, -10(vi10) | mul.xyz vf21, vf21, Q 293 - vu.vf21.mul(Mask::xyz, vu.vf21, vu.Q); sq_buffer(Mask::xyzw, vu.vf13, vu.vi10 + -10); - if (bc) { goto L28; } - - L19: - // BRANCH! - // ibeq vi04, vi08, L20 | itof12.xyz vf18, vf22 294 - vu.vf18.itof12(Mask::xyz, vu.vf22); bc = (vu.vi04 == vu.vi08); - // div Q, vf01.x, vf12.w | mulaw.xyzw ACC, vf11, vf00 295 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.Q = vu.vf01.x() / vu.vf12.w(); - if (bc) { goto L20; } - - // nop | addw.w vf14, vf14, vf01 296 - vu.vf14.add(Mask::w, vu.vf14, vu.vf01.w()); - L20: - // fcand vi01, 0x3ffff | add.xyzw vf15, vf15, vf04 297 - vu.vf15.add(Mask::xyzw, vu.vf15, vu.vf04); ASSERT(false); vu.vi01 = cf & 0x3ffff; - - // BRANCH! - // ibne vi00, vi01, L31 | maddax.xyzw ACC, vf08, vf16 298 - vu.acc.madda(Mask::xyzw, vu.vf08, vu.vf16.x()); bc = (vu.vi01 != 0); - // mtir vi03, vf23.x | madday.xyzw ACC, vf09, vf16 299 - vu.acc.madda(Mask::xyzw, vu.vf09, vu.vf16.y()); vu.vi03 = vu.vf23.x_as_u16(); - if (bc) { goto L31; } - - L21: - // iaddi vi10, vi10, 0x3 | maddz.xyzw vf13, vf10, vf16 300 - vu.acc.madd(Mask::xyzw, vu.vf13, vu.vf10, vu.vf16.z()); vu.vi10 = vu.vi10 + 3; - // lq.xy vf24, 0(vi10) | miniz.w vf15, vf15, vf01 301 - vu.vf15.mini(Mask::w, vu.vf15, vu.vf01.z()); lq_buffer(Mask::xy, vu.vf24, vu.vi10); - // lq.xyz vf16, 2(vi10) | mul.xyzw vf26, vf12, vf05 302 - vu.vf26.mul(Mask::xyzw, vu.vf12, vu.vf05); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); - // iand vi07, vi03, vi11 | ftoi4.xyzw vf14, vf14 303 - vu.vf14.ftoi4(Mask::xyzw, vu.vf14); vu.vi07 = vu.vi03 & vu.vi11; - // mfir.x vf23, vi07 | mul.xyz vf12, vf12, Q 304 - vu.vf12.mul(Mask::xyz, vu.vf12, vu.Q); vu.vf23.mfir(Mask::x, vu.vi07); - // sq.xyzw vf20, -12(vi10) | maxy.w vf15, vf15, vf01 305 - vu.vf15.max(Mask::w, vu.vf15, vu.vf01.y()); sq_buffer(Mask::xyzw, vu.vf20, vu.vi10 + -12); - // BRANCH! - // ibeq vi14, vi10, L28 | clipw.xyz vf29, vf29 306 - ASSERT(false); cf = clip(vu.vf29, vu.vf29.w(), cf); bc = (vu.vi14 == vu.vi10); - // sq.xyzw vf14, -10(vi10) | mul.xyz vf18, vf18, Q 307 - vu.vf18.mul(Mask::xyz, vu.vf18, vu.Q); sq_buffer(Mask::xyzw, vu.vf14, vu.vi10 + -10); - if (bc) { goto L28; } - - // BRANCH! - // ibeq vi05, vi09, L22 | itof12.xyz vf19, vf23 308 - vu.vf19.itof12(Mask::xyz, vu.vf23); bc = (vu.vi05 == vu.vi09); - // div Q, vf01.x, vf13.w | mulaw.xyzw ACC, vf11, vf00 309 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.Q = vu.vf01.x() / vu.vf13.w(); - if (bc) { goto L22; } - - // nop | addw.w vf15, vf15, vf01 310 - vu.vf15.add(Mask::w, vu.vf15, vu.vf01.w()); - L22: - // fcand vi01, 0x3ffff | add.xyzw vf12, vf12, vf04 311 - vu.vf12.add(Mask::xyzw, vu.vf12, vu.vf04); ASSERT(false); vu.vi01 = cf & 0x3ffff; - - // BRANCH! - // ibne vi00, vi01, L32 | maddax.xyzw ACC, vf08, vf16 312 - vu.acc.madda(Mask::xyzw, vu.vf08, vu.vf16.x()); bc = (vu.vi01 != 0); - // mtir vi04, vf24.x | madday.xyzw ACC, vf09, vf16 313 - vu.acc.madda(Mask::xyzw, vu.vf09, vu.vf16.y()); vu.vi04 = vu.vf24.x_as_u16(); - if (bc) { goto L32; } - - L23: - // iaddi vi10, vi10, 0x3 | maddz.xyzw vf14, vf10, vf16 314 - vu.acc.madd(Mask::xyzw, vu.vf14, vu.vf10, vu.vf16.z()); vu.vi10 = vu.vi10 + 3; - // lq.xy vf25, 0(vi10) | miniz.w vf12, vf12, vf01 315 - vu.vf12.mini(Mask::w, vu.vf12, vu.vf01.z()); lq_buffer(Mask::xy, vu.vf25, vu.vi10); - // lq.xyz vf16, 2(vi10) | mul.xyzw vf27, vf13, vf05 316 - vu.vf27.mul(Mask::xyzw, vu.vf13, vu.vf05); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); - // iand vi08, vi04, vi11 | ftoi4.xyzw vf15, vf15 317 - vu.vf15.ftoi4(Mask::xyzw, vu.vf15); vu.vi08 = vu.vi04 & vu.vi11; - // mfir.x vf24, vi08 | mul.xyz vf13, vf13, Q 318 - vu.vf13.mul(Mask::xyz, vu.vf13, vu.Q); vu.vf24.mfir(Mask::x, vu.vi08); - // sq.xyzw vf21, -12(vi10) | maxy.w vf12, vf12, vf01 319 - vu.vf12.max(Mask::w, vu.vf12, vu.vf01.y()); sq_buffer(Mask::xyzw, vu.vf21, vu.vi10 + -12); - // BRANCH! - // ibeq vi14, vi10, L28 | clipw.xyz vf26, vf26 320 - ASSERT(false); cf = clip(vu.vf26, vu.vf26.w(), cf); bc = (vu.vi14 == vu.vi10); - // sq.xyzw vf15, -10(vi10) | mul.xyz vf19, vf19, Q 321 - vu.vf19.mul(Mask::xyz, vu.vf19, vu.Q); sq_buffer(Mask::xyzw, vu.vf15, vu.vi10 + -10); - if (bc) { goto L28; } - - // BRANCH! - // ibeq vi02, vi06, L24 | itof12.xyz vf20, vf24 322 - vu.vf20.itof12(Mask::xyz, vu.vf24); bc = (vu.vi02 == vu.vi06); - // div Q, vf01.x, vf14.w | mulaw.xyzw ACC, vf11, vf00 323 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.Q = vu.vf01.x() / vu.vf14.w(); - if (bc) { goto L24; } - - // nop | addw.w vf12, vf12, vf01 324 - vu.vf12.add(Mask::w, vu.vf12, vu.vf01.w()); - L24: - // fcand vi01, 0x3ffff | add.xyzw vf13, vf13, vf04 325 - vu.vf13.add(Mask::xyzw, vu.vf13, vu.vf04); ASSERT(false); vu.vi01 = cf & 0x3ffff; - - // BRANCH! - // ibne vi00, vi01, L29 | maddax.xyzw ACC, vf08, vf16 326 - vu.acc.madda(Mask::xyzw, vu.vf08, vu.vf16.x()); bc = (vu.vi01 != 0); - // mtir vi05, vf25.x | madday.xyzw ACC, vf09, vf16 327 - vu.acc.madda(Mask::xyzw, vu.vf09, vu.vf16.y()); vu.vi05 = vu.vf25.x_as_u16(); - if (bc) { goto L29; } - - L25: - // iaddi vi10, vi10, 0x3 | maddz.xyzw vf15, vf10, vf16 328 - vu.acc.madd(Mask::xyzw, vu.vf15, vu.vf10, vu.vf16.z()); vu.vi10 = vu.vi10 + 3; - // lq.xy vf22, 0(vi10) | miniz.w vf13, vf13, vf01 329 - vu.vf13.mini(Mask::w, vu.vf13, vu.vf01.z()); lq_buffer(Mask::xy, vu.vf22, vu.vi10); - // lq.xyz vf16, 2(vi10) | mul.xyzw vf28, vf14, vf05 330 - vu.vf28.mul(Mask::xyzw, vu.vf14, vu.vf05); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); - // iand vi09, vi05, vi11 | ftoi4.xyzw vf12, vf12 331 - vu.vf12.ftoi4(Mask::xyzw, vu.vf12); vu.vi09 = vu.vi05 & vu.vi11; - // mfir.x vf25, vi09 | mul.xyz vf14, vf14, Q 332 - vu.vf14.mul(Mask::xyz, vu.vf14, vu.Q); vu.vf25.mfir(Mask::x, vu.vi09); - // sq.xyzw vf18, -12(vi10) | maxy.w vf13, vf13, vf01 333 - vu.vf13.max(Mask::w, vu.vf13, vu.vf01.y()); sq_buffer(Mask::xyzw, vu.vf18, vu.vi10 + -12); - // BRANCH! - // ibeq vi14, vi10, L28 | clipw.xyz vf27, vf27 334 - ASSERT(false); cf = clip(vu.vf27, vu.vf27.w(), cf); bc = (vu.vi14 == vu.vi10); - // sq.xyzw vf12, -10(vi10) | mul.xyz vf20, vf20, Q 335 - vu.vf20.mul(Mask::xyz, vu.vf20, vu.Q); sq_buffer(Mask::xyzw, vu.vf12, vu.vi10 + -10); - if (bc) { goto L28; } - - // BRANCH! - // ibeq vi03, vi07, L26 | itof12.xyz vf21, vf25 336 - vu.vf21.itof12(Mask::xyz, vu.vf25); bc = (vu.vi03 == vu.vi07); - // div Q, vf01.x, vf15.w | mulaw.xyzw ACC, vf11, vf00 337 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.Q = vu.vf01.x() / vu.vf15.w(); - if (bc) { goto L26; } - - // nop | addw.w vf13, vf13, vf01 338 - vu.vf13.add(Mask::w, vu.vf13, vu.vf01.w()); - L26: - // fcand vi01, 0x3ffff | add.xyzw vf14, vf14, vf04 339 - vu.vf14.add(Mask::xyzw, vu.vf14, vu.vf04); ASSERT(false); vu.vi01 = cf & 0x3ffff; - - // BRANCH! - // ibne vi00, vi01, L30 | maddax.xyzw ACC, vf08, vf16 340 - vu.acc.madda(Mask::xyzw, vu.vf08, vu.vf16.x()); bc = (vu.vi01 != 0); - // mtir vi02, vf22.x | madday.xyzw ACC, vf09, vf16 341 - vu.acc.madda(Mask::xyzw, vu.vf09, vu.vf16.y()); vu.vi02 = vu.vf22.x_as_u16(); - if (bc) { goto L30; } - - L27: - // iaddi vi10, vi10, 0x3 | maddz.xyzw vf12, vf10, vf16 342 - vu.acc.madd(Mask::xyzw, vu.vf12, vu.vf10, vu.vf16.z()); vu.vi10 = vu.vi10 + 3; - // lq.xy vf23, 0(vi10) | miniz.w vf14, vf14, vf01 343 - vu.vf14.mini(Mask::w, vu.vf14, vu.vf01.z()); lq_buffer(Mask::xy, vu.vf23, vu.vi10); - // lq.xyz vf16, 2(vi10) | mul.xyzw vf29, vf15, vf05 344 - vu.vf29.mul(Mask::xyzw, vu.vf15, vu.vf05); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); - // iand vi06, vi02, vi11 | ftoi4.xyzw vf13, vf13 345 - vu.vf13.ftoi4(Mask::xyzw, vu.vf13); vu.vi06 = vu.vi02 & vu.vi11; - // mfir.x vf22, vi06 | mul.xyz vf15, vf15, Q 346 - vu.vf15.mul(Mask::xyz, vu.vf15, vu.Q); vu.vf22.mfir(Mask::x, vu.vi06); - // sq.xyzw vf19, -12(vi10) | maxy.w vf14, vf14, vf01 347 - vu.vf14.max(Mask::w, vu.vf14, vu.vf01.y()); sq_buffer(Mask::xyzw, vu.vf19, vu.vi10 + -12); - // BRANCH! - // ibne vi14, vi10, L19 | clipw.xyz vf28, vf28 348 - ASSERT(false); cf = clip(vu.vf28, vu.vf28.w(), cf); bc = (vu.vi14 != vu.vi10); - // sq.xyzw vf13, -10(vi10) | mul.xyz vf21, vf21, Q 349 - vu.vf21.mul(Mask::xyz, vu.vf21, vu.Q); sq_buffer(Mask::xyzw, vu.vf13, vu.vi10 + -10); - if (bc) { goto L19; } - - L28: - // BRANCH! - // b L82 | nop 350 - bc = true; - // ilw.w vi12, 906(vi00) | nop 351 - ilw_buffer(Mask::w, vu.vi12, 906); - if (bc) { goto L82; } - - L29: - // BRANCH! - // b L25 | addw.w vf12, vf12, vf01 352 - vu.vf12.add(Mask::w, vu.vf12, vu.vf01.w()); bc = true; - // nop | nop 353 - - if (bc) { goto L25; } - - L30: - // BRANCH! - // b L27 | addw.w vf13, vf13, vf01 354 - vu.vf13.add(Mask::w, vu.vf13, vu.vf01.w()); bc = true; - // nop | nop 355 - - if (bc) { goto L27; } - - L31: - // BRANCH! - // b L21 | addw.w vf14, vf14, vf01 356 - vu.vf14.add(Mask::w, vu.vf14, vu.vf01.w()); bc = true; - // nop | nop 357 - - if (bc) { goto L21; } - - L32: - // BRANCH! - // b L23 | addw.w vf15, vf15, vf01 358 - vu.vf15.add(Mask::w, vu.vf15, vu.vf01.w()); bc = true; - // nop | nop 359 - - if (bc) { goto L23; } - - L33: + L33: // R // isubiu vi02, vi13, 0x363 | addw.z vf22, vf00, vf00 360 vu.vf22.add(Mask::z, vu.vf00, vu.vf00.w()); vu.vi02 = vu.vi13 - 0x363; /* 867 */ // iaddiu vi13, vi13, 0x1e | addw.z vf23, vf00, vf00 361 @@ -1082,7 +430,7 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // iaddiu vi13, vi00, 0x345 | nop 364 vu.vi13 = 0x345; /* 837 */ - L34: + L34: // R // iaddi vi03, vi13, 0x7 | nop 365 vu.vi03 = vu.vi13 + 7; // ilw.w vi01, 5(vi13) | nop 366 @@ -1092,23 +440,23 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // iaddi vi10, vi12, 0x9 | subw.w vf18, vf00, vf00 368 vu.vf18.sub(Mask::w, vu.vf00, vu.vf00.w()); vu.vi10 = vu.vi12 + 9; // lq.xyzw vf08, 0(vi13) | subw.w vf19, vf00, vf00 369 - vu.vf19.sub(Mask::w, vu.vf00, vu.vf00.w()); lq_buffer(Mask::xyzw, vu.vf08, vu.vi13); + vu.vf19.sub(Mask::w, vu.vf00, vu.vf00.w()); lq_buffer(Mask::xyzw, gen.mat0, vu.vi13); // lq.xyzw vf09, 1(vi13) | subw.w vf20, vf00, vf00 370 - vu.vf20.sub(Mask::w, vu.vf00, vu.vf00.w()); lq_buffer(Mask::xyzw, vu.vf09, vu.vi13 + 1); + vu.vf20.sub(Mask::w, vu.vf00, vu.vf00.w()); lq_buffer(Mask::xyzw, gen.mat1, vu.vi13 + 1); // lq.xyzw vf10, 2(vi13) | subw.w vf21, vf00, vf00 371 vu.vf21.sub(Mask::w, vu.vf00, vu.vf00.w()); lq_buffer(Mask::xyzw, vu.vf10, vu.vi13 + 2); // lq.xyzw vf11, 3(vi13) | ftoi12.z vf22, vf22 372 - vu.vf22.ftoi12(Mask::z, vu.vf22); lq_buffer(Mask::xyzw, vu.vf11, vu.vi13 + 3); + vu.vf22.ftoi12_check(Mask::z, vu.vf22); lq_buffer(Mask::xyzw, vu.vf11, vu.vi13 + 3); // iadd vi02, vi01, vi01 | ftoi12.z vf23, vf23 373 - vu.vf23.ftoi12(Mask::z, vu.vf23); vu.vi02 = vu.vi01 + vu.vi01; + vu.vf23.ftoi12_check(Mask::z, vu.vf23); vu.vi02 = vu.vi01 + vu.vi01; // iadd vi01, vi01, vi02 | sub.xyzw vf16, vf16, vf16 374 vu.vf16.set_zero(); vu.vi01 = vu.vi01 + vu.vi02; // iaddi vi11, vi00, -0x2 | nop 375 vu.vi11 = -2; // iadd vi14, vi10, vi01 | ftoi12.z vf24, vf24 376 - vu.vf24.ftoi12(Mask::z, vu.vf24); vu.vi14 = vu.vi10 + vu.vi01; + vu.vf24.ftoi12_check(Mask::z, vu.vf24); vu.vi14 = vu.vi10 + vu.vi01; // isw.w vi12, 906(vi00) | ftoi12.z vf25, vf25 377 - vu.vf25.ftoi12(Mask::z, vu.vf25); isw_buffer(Mask::w, vu.vi12, 906); + vu.vf25.ftoi12_check(Mask::z, vu.vf25); isw_buffer(Mask::w, vu.vi12, 906); // iaddi vi14, vi14, 0x9 | nop 378 vu.vi14 = vu.vi14 + 9; // lq.xy vf22, 0(vi10) | nop 379 @@ -1122,9 +470,9 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // nop | mulaw.xyzw ACC, vf11, vf00 383 vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); // mtir vi02, vf22.x | maddax.xyzw ACC, vf08, vf16 384 - vu.acc.madda(Mask::xyzw, vu.vf08, vu.vf16.x()); vu.vi02 = vu.vf22.x_as_u16(); + vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf16.x()); vu.vi02 = vu.vf22.x_as_u16(); // iaddi vi10, vi10, 0x3 | madday.xyzw ACC, vf09, vf16 385 - vu.acc.madda(Mask::xyzw, vu.vf09, vu.vf16.y()); vu.vi10 = vu.vi10 + 3; + vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf16.y()); vu.vi10 = vu.vi10 + 3; // lq.xy vf23, 0(vi10) | maddz.xyzw vf12, vf10, vf16 386 vu.acc.madd(Mask::xyzw, vu.vf12, vu.vf10, vu.vf16.z()); lq_buffer(Mask::xy, vu.vf23, vu.vi10); // lq.xyz vf16, 2(vi10) | nop 387 @@ -1142,15 +490,15 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // nop | itof12.xyz vf18, vf22 393 vu.vf18.itof12(Mask::xyz, vu.vf22); // div Q, vf01.x, vf12.w | mul.xyzw vf26, vf12, vf05 394 - vu.vf26.mul(Mask::xyzw, vu.vf12, vu.vf05); vu.Q = vu.vf01.x() / vu.vf12.w(); + vu.vf26.mul(Mask::xyzw, vu.vf12, gen.hmge_scale); vu.Q = gen.fog.x() / vu.vf12.w(); // nop | nop 395 // nop | mulaw.xyzw ACC, vf11, vf00 396 vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); // mtir vi03, vf23.x | maddax.xyzw ACC, vf08, vf16 397 - vu.acc.madda(Mask::xyzw, vu.vf08, vu.vf16.x()); vu.vi03 = vu.vf23.x_as_u16(); + vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf16.x()); vu.vi03 = vu.vf23.x_as_u16(); // iaddi vi10, vi10, 0x3 | madday.xyzw ACC, vf09, vf16 398 - vu.acc.madda(Mask::xyzw, vu.vf09, vu.vf16.y()); vu.vi10 = vu.vi10 + 3; + vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf16.y()); vu.vi10 = vu.vi10 + 3; // lq.xy vf24, 0(vi10) | maddz.xyzw vf13, vf10, vf16 399 vu.acc.madd(Mask::xyzw, vu.vf13, vu.vf10, vu.vf16.z()); lq_buffer(Mask::xy, vu.vf24, vu.vi10); // lq.xyz vf16, 2(vi10) | nop 400 @@ -1168,25 +516,25 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // nop | itof12.xyz vf19, vf23 406 vu.vf19.itof12(Mask::xyz, vu.vf23); // div Q, vf01.x, vf13.w | mulaw.xyzw ACC, vf11, vf00 407 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.Q = vu.vf01.x() / vu.vf13.w(); + vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.Q = gen.fog.x() / vu.vf13.w(); // nop | add.xyzw vf12, vf12, vf04 408 - vu.vf12.add(Mask::xyzw, vu.vf12, vu.vf04); + vu.vf12.add(Mask::xyzw, vu.vf12, gen.hvdf_off); // nop | maddax.xyzw ACC, vf08, vf16 409 - vu.acc.madda(Mask::xyzw, vu.vf08, vu.vf16.x()); + vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf16.x()); // mtir vi04, vf24.x | madday.xyzw ACC, vf09, vf16 410 - vu.acc.madda(Mask::xyzw, vu.vf09, vu.vf16.y()); vu.vi04 = vu.vf24.x_as_u16(); + vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf16.y()); vu.vi04 = vu.vf24.x_as_u16(); // iaddi vi10, vi10, 0x3 | maddz.xyzw vf14, vf10, vf16 411 vu.acc.madd(Mask::xyzw, vu.vf14, vu.vf10, vu.vf16.z()); vu.vi10 = vu.vi10 + 3; // lq.xy vf25, 0(vi10) | miniz.w vf12, vf12, vf01 412 - vu.vf12.mini(Mask::w, vu.vf12, vu.vf01.z()); lq_buffer(Mask::xy, vu.vf25, vu.vi10); + vu.vf12.mini(Mask::w, vu.vf12, gen.fog.z()); lq_buffer(Mask::xy, vu.vf25, vu.vi10); // lq.xyz vf16, 2(vi10) | mul.xyzw vf27, vf13, vf05 413 - vu.vf27.mul(Mask::xyzw, vu.vf13, vu.vf05); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); + vu.vf27.mul(Mask::xyzw, vu.vf13, gen.hmge_scale); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); // iand vi08, vi04, vi11 | nop 414 vu.vi08 = vu.vi04 & vu.vi11; // mfir.x vf24, vi08 | mul.xyz vf13, vf13, Q 415 vu.vf13.mul(Mask::xyz, vu.vf13, vu.Q); vu.vf24.mfir(Mask::x, vu.vi08); // nop | maxy.w vf12, vf12, vf01 416 - vu.vf12.max(Mask::w, vu.vf12, vu.vf01.y()); + vu.vf12.max(Mask::w, vu.vf12, gen.fog.y()); // nop | clipw.xyz vf26, vf26 417 cf = clip(vu.vf26, vu.vf26.w(), cf); // nop | mul.xyz vf19, vf19, Q 418 @@ -1195,30 +543,30 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // ibeq vi02, vi06, L35 | itof12.xyz vf20, vf24 419 vu.vf20.itof12(Mask::xyz, vu.vf24); bc = (vu.vi02 == vu.vi06); // div Q, vf01.x, vf14.w | mulaw.xyzw ACC, vf11, vf00 420 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.Q = vu.vf01.x() / vu.vf14.w(); + vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.Q = gen.fog.x() / vu.vf14.w(); if (bc) { goto L35; } // nop | addw.w vf12, vf12, vf01 421 - vu.vf12.add(Mask::w, vu.vf12, vu.vf01.w()); + vu.vf12.add(Mask::w, vu.vf12, kFogFloatOffset); L35: // nop | add.xyzw vf13, vf13, vf04 422 - vu.vf13.add(Mask::xyzw, vu.vf13, vu.vf04); + vu.vf13.add(Mask::xyzw, vu.vf13, gen.hvdf_off); // nop | maddax.xyzw ACC, vf08, vf16 423 - vu.acc.madda(Mask::xyzw, vu.vf08, vu.vf16.x()); + vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf16.x()); // mtir vi05, vf25.x | madday.xyzw ACC, vf09, vf16 424 - vu.acc.madda(Mask::xyzw, vu.vf09, vu.vf16.y()); vu.vi05 = vu.vf25.x_as_u16(); + vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf16.y()); vu.vi05 = vu.vf25.x_as_u16(); // iaddi vi10, vi10, 0x3 | maddz.xyzw vf15, vf10, vf16 425 vu.acc.madd(Mask::xyzw, vu.vf15, vu.vf10, vu.vf16.z()); vu.vi10 = vu.vi10 + 3; // lq.xy vf22, 0(vi10) | miniz.w vf13, vf13, vf01 426 - vu.vf13.mini(Mask::w, vu.vf13, vu.vf01.z()); lq_buffer(Mask::xy, vu.vf22, vu.vi10); + vu.vf13.mini(Mask::w, vu.vf13, gen.fog.z()); lq_buffer(Mask::xy, vu.vf22, vu.vi10); // lq.xyz vf16, 2(vi10) | mul.xyzw vf28, vf14, vf05 427 - vu.vf28.mul(Mask::xyzw, vu.vf14, vu.vf05); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); + vu.vf28.mul(Mask::xyzw, vu.vf14, gen.hmge_scale); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); // iand vi09, vi05, vi11 | ftoi4.xyzw vf12, vf12 428 - vu.vf12.ftoi4(Mask::xyzw, vu.vf12); vu.vi09 = vu.vi05 & vu.vi11; + vu.vf12.ftoi4_check(Mask::xyzw, vu.vf12); vu.vi09 = vu.vi05 & vu.vi11; // mfir.x vf25, vi09 | mul.xyz vf14, vf14, Q 429 vu.vf14.mul(Mask::xyz, vu.vf14, vu.Q); vu.vf25.mfir(Mask::x, vu.vi09); // sq.xyzw vf18, -12(vi10) | maxy.w vf13, vf13, vf01 430 - vu.vf13.max(Mask::w, vu.vf13, vu.vf01.y()); sq_buffer(Mask::xyzw, vu.vf18, vu.vi10 + -12); + vu.vf13.max(Mask::w, vu.vf13, gen.fog.y()); sq_buffer(Mask::xyzw, vu.vf18, vu.vi10 + -12); // BRANCH! // ibeq vi14, vi10, L46 | clipw.xyz vf27, vf27 431 cf = clip(vu.vf27, vu.vf27.w(), cf); bc = (vu.vi14 == vu.vi10); @@ -1230,30 +578,30 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // ibeq vi03, vi07, L36 | itof12.xyz vf21, vf25 433 vu.vf21.itof12(Mask::xyz, vu.vf25); bc = (vu.vi03 == vu.vi07); // div Q, vf01.x, vf15.w | mulaw.xyzw ACC, vf11, vf00 434 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.Q = vu.vf01.x() / vu.vf15.w(); + vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.Q = gen.fog.x() / vu.vf15.w(); if (bc) { goto L36; } // nop | addw.w vf13, vf13, vf01 435 - vu.vf13.add(Mask::w, vu.vf13, vu.vf01.w()); + vu.vf13.add(Mask::w, vu.vf13, kFogFloatOffset); L36: // nop | add.xyzw vf14, vf14, vf04 436 - vu.vf14.add(Mask::xyzw, vu.vf14, vu.vf04); + vu.vf14.add(Mask::xyzw, vu.vf14, gen.hvdf_off); // nop | maddax.xyzw ACC, vf08, vf16 437 - vu.acc.madda(Mask::xyzw, vu.vf08, vu.vf16.x()); + vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf16.x()); // mtir vi02, vf22.x | madday.xyzw ACC, vf09, vf16 438 - vu.acc.madda(Mask::xyzw, vu.vf09, vu.vf16.y()); vu.vi02 = vu.vf22.x_as_u16(); + vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf16.y()); vu.vi02 = vu.vf22.x_as_u16(); // iaddi vi10, vi10, 0x3 | maddz.xyzw vf12, vf10, vf16 439 vu.acc.madd(Mask::xyzw, vu.vf12, vu.vf10, vu.vf16.z()); vu.vi10 = vu.vi10 + 3; // lq.xy vf23, 0(vi10) | miniz.w vf14, vf14, vf01 440 - vu.vf14.mini(Mask::w, vu.vf14, vu.vf01.z()); lq_buffer(Mask::xy, vu.vf23, vu.vi10); + vu.vf14.mini(Mask::w, vu.vf14, gen.fog.z()); lq_buffer(Mask::xy, vu.vf23, vu.vi10); // lq.xyz vf16, 2(vi10) | mul.xyzw vf29, vf15, vf05 441 - vu.vf29.mul(Mask::xyzw, vu.vf15, vu.vf05); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); + vu.vf29.mul(Mask::xyzw, vu.vf15, gen.hmge_scale); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); // iand vi06, vi02, vi11 | ftoi4.xyzw vf13, vf13 442 - vu.vf13.ftoi4(Mask::xyzw, vu.vf13); vu.vi06 = vu.vi02 & vu.vi11; + vu.vf13.ftoi4_check(Mask::xyzw, vu.vf13); vu.vi06 = vu.vi02 & vu.vi11; // mfir.x vf22, vi06 | mul.xyz vf15, vf15, Q 443 vu.vf15.mul(Mask::xyz, vu.vf15, vu.Q); vu.vf22.mfir(Mask::x, vu.vi06); // sq.xyzw vf19, -12(vi10) | maxy.w vf14, vf14, vf01 444 - vu.vf14.max(Mask::w, vu.vf14, vu.vf01.y()); sq_buffer(Mask::xyzw, vu.vf19, vu.vi10 + -12); + vu.vf14.max(Mask::w, vu.vf14, gen.fog.y()); sq_buffer(Mask::xyzw, vu.vf19, vu.vi10 + -12); // BRANCH! // ibeq vi14, vi10, L46 | clipw.xyz vf28, vf28 445 cf = clip(vu.vf28, vu.vf28.w(), cf); bc = (vu.vi14 == vu.vi10); @@ -1266,35 +614,35 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // ibeq vi04, vi08, L38 | itof12.xyz vf18, vf22 447 vu.vf18.itof12(Mask::xyz, vu.vf22); bc = (vu.vi04 == vu.vi08); // div Q, vf01.x, vf12.w | mulaw.xyzw ACC, vf11, vf00 448 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.Q = vu.vf01.x() / vu.vf12.w(); + vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.Q = gen.fog.x() / vu.vf12.w(); if (bc) { goto L38; } // nop | addw.w vf14, vf14, vf01 449 - vu.vf14.add(Mask::w, vu.vf14, vu.vf01.w()); + vu.vf14.add(Mask::w, vu.vf14, kFogFloatOffset); L38: // fcand vi01, 0x3ffff | add.xyzw vf15, vf15, vf04 450 - vu.vf15.add(Mask::xyzw, vu.vf15, vu.vf04); vu.vi01 = cf & 0x3ffff; + vu.vf15.add(Mask::xyzw, vu.vf15, gen.hvdf_off); vu.vi01 = cf & 0x3ffff; // BRANCH! // ibne vi00, vi01, L55 | maddax.xyzw ACC, vf08, vf16 451 - vu.acc.madda(Mask::xyzw, vu.vf08, vu.vf16.x()); bc = (vu.vi01 != 0); + vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf16.x()); bc = (vu.vi01 != 0); // mtir vi03, vf23.x | madday.xyzw ACC, vf09, vf16 452 - vu.acc.madda(Mask::xyzw, vu.vf09, vu.vf16.y()); vu.vi03 = vu.vf23.x_as_u16(); + vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf16.y()); vu.vi03 = vu.vf23.x_as_u16(); if (bc) { goto L55; } L39: // iaddi vi10, vi10, 0x3 | maddz.xyzw vf13, vf10, vf16 453 vu.acc.madd(Mask::xyzw, vu.vf13, vu.vf10, vu.vf16.z()); vu.vi10 = vu.vi10 + 3; // lq.xy vf24, 0(vi10) | miniz.w vf15, vf15, vf01 454 - vu.vf15.mini(Mask::w, vu.vf15, vu.vf01.z()); lq_buffer(Mask::xy, vu.vf24, vu.vi10); + vu.vf15.mini(Mask::w, vu.vf15, gen.fog.z()); lq_buffer(Mask::xy, vu.vf24, vu.vi10); // lq.xyz vf16, 2(vi10) | mul.xyzw vf26, vf12, vf05 455 - vu.vf26.mul(Mask::xyzw, vu.vf12, vu.vf05); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); + vu.vf26.mul(Mask::xyzw, vu.vf12, gen.hmge_scale); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); // iand vi07, vi03, vi11 | ftoi4.xyzw vf14, vf14 456 - vu.vf14.ftoi4(Mask::xyzw, vu.vf14); vu.vi07 = vu.vi03 & vu.vi11; + vu.vf14.ftoi4_check(Mask::xyzw, vu.vf14); vu.vi07 = vu.vi03 & vu.vi11; // mfir.x vf23, vi07 | mul.xyz vf12, vf12, Q 457 vu.vf12.mul(Mask::xyz, vu.vf12, vu.Q); vu.vf23.mfir(Mask::x, vu.vi07); // sq.xyzw vf20, -12(vi10) | maxy.w vf15, vf15, vf01 458 - vu.vf15.max(Mask::w, vu.vf15, vu.vf01.y()); sq_buffer(Mask::xyzw, vu.vf20, vu.vi10 + -12); + vu.vf15.max(Mask::w, vu.vf15, gen.fog.y()); sq_buffer(Mask::xyzw, vu.vf20, vu.vi10 + -12); // BRANCH! // ibeq vi14, vi10, L46 | clipw.xyz vf29, vf29 459 cf = clip(vu.vf29, vu.vf29.w(), cf); bc = (vu.vi14 == vu.vi10); @@ -1306,35 +654,35 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // ibeq vi05, vi09, L40 | itof12.xyz vf19, vf23 461 vu.vf19.itof12(Mask::xyz, vu.vf23); bc = (vu.vi05 == vu.vi09); // div Q, vf01.x, vf13.w | mulaw.xyzw ACC, vf11, vf00 462 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.Q = vu.vf01.x() / vu.vf13.w(); + vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.Q = gen.fog.x() / vu.vf13.w(); if (bc) { goto L40; } // nop | addw.w vf15, vf15, vf01 463 - vu.vf15.add(Mask::w, vu.vf15, vu.vf01.w()); + vu.vf15.add(Mask::w, vu.vf15, kFogFloatOffset); L40: // fcand vi01, 0x3ffff | add.xyzw vf12, vf12, vf04 464 - vu.vf12.add(Mask::xyzw, vu.vf12, vu.vf04); vu.vi01 = cf & 0x3ffff; + vu.vf12.add(Mask::xyzw, vu.vf12, gen.hvdf_off); vu.vi01 = cf & 0x3ffff; // BRANCH! // ibne vi00, vi01, L59 | maddax.xyzw ACC, vf08, vf16 465 - vu.acc.madda(Mask::xyzw, vu.vf08, vu.vf16.x()); bc = (vu.vi01 != 0); + vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf16.x()); bc = (vu.vi01 != 0); // mtir vi04, vf24.x | madday.xyzw ACC, vf09, vf16 466 - vu.acc.madda(Mask::xyzw, vu.vf09, vu.vf16.y()); vu.vi04 = vu.vf24.x_as_u16(); + vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf16.y()); vu.vi04 = vu.vf24.x_as_u16(); if (bc) { goto L59; } L41: // iaddi vi10, vi10, 0x3 | maddz.xyzw vf14, vf10, vf16 467 vu.acc.madd(Mask::xyzw, vu.vf14, vu.vf10, vu.vf16.z()); vu.vi10 = vu.vi10 + 3; // lq.xy vf25, 0(vi10) | miniz.w vf12, vf12, vf01 468 - vu.vf12.mini(Mask::w, vu.vf12, vu.vf01.z()); lq_buffer(Mask::xy, vu.vf25, vu.vi10); + vu.vf12.mini(Mask::w, vu.vf12, gen.fog.z()); lq_buffer(Mask::xy, vu.vf25, vu.vi10); // lq.xyz vf16, 2(vi10) | mul.xyzw vf27, vf13, vf05 469 - vu.vf27.mul(Mask::xyzw, vu.vf13, vu.vf05); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); + vu.vf27.mul(Mask::xyzw, vu.vf13, gen.hmge_scale); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); // iand vi08, vi04, vi11 | ftoi4.xyzw vf15, vf15 470 - vu.vf15.ftoi4(Mask::xyzw, vu.vf15); vu.vi08 = vu.vi04 & vu.vi11; + vu.vf15.ftoi4_check(Mask::xyzw, vu.vf15); vu.vi08 = vu.vi04 & vu.vi11; // mfir.x vf24, vi08 | mul.xyz vf13, vf13, Q 471 vu.vf13.mul(Mask::xyz, vu.vf13, vu.Q); vu.vf24.mfir(Mask::x, vu.vi08); // sq.xyzw vf21, -12(vi10) | maxy.w vf12, vf12, vf01 472 - vu.vf12.max(Mask::w, vu.vf12, vu.vf01.y()); sq_buffer(Mask::xyzw, vu.vf21, vu.vi10 + -12); + vu.vf12.max(Mask::w, vu.vf12, gen.fog.y()); sq_buffer(Mask::xyzw, vu.vf21, vu.vi10 + -12); // BRANCH! // ibeq vi14, vi10, L46 | clipw.xyz vf26, vf26 473 cf = clip(vu.vf26, vu.vf26.w(), cf); bc = (vu.vi14 == vu.vi10); @@ -1346,35 +694,35 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // ibeq vi02, vi06, L42 | itof12.xyz vf20, vf24 475 vu.vf20.itof12(Mask::xyz, vu.vf24); bc = (vu.vi02 == vu.vi06); // div Q, vf01.x, vf14.w | mulaw.xyzw ACC, vf11, vf00 476 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.Q = vu.vf01.x() / vu.vf14.w(); + vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.Q = gen.fog.x() / vu.vf14.w(); if (bc) { goto L42; } // nop | addw.w vf12, vf12, vf01 477 - vu.vf12.add(Mask::w, vu.vf12, vu.vf01.w()); + vu.vf12.add(Mask::w, vu.vf12, kFogFloatOffset); L42: // fcand vi01, 0x3ffff | add.xyzw vf13, vf13, vf04 478 - vu.vf13.add(Mask::xyzw, vu.vf13, vu.vf04); vu.vi01 = cf & 0x3ffff; + vu.vf13.add(Mask::xyzw, vu.vf13, gen.hvdf_off); vu.vi01 = cf & 0x3ffff; // BRANCH! // ibne vi00, vi01, L47 | maddax.xyzw ACC, vf08, vf16 479 - vu.acc.madda(Mask::xyzw, vu.vf08, vu.vf16.x()); bc = (vu.vi01 != 0); + vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf16.x()); bc = (vu.vi01 != 0); // mtir vi05, vf25.x | madday.xyzw ACC, vf09, vf16 480 - vu.acc.madda(Mask::xyzw, vu.vf09, vu.vf16.y()); vu.vi05 = vu.vf25.x_as_u16(); + vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf16.y()); vu.vi05 = vu.vf25.x_as_u16(); if (bc) { goto L47; } L43: // iaddi vi10, vi10, 0x3 | maddz.xyzw vf15, vf10, vf16 481 vu.acc.madd(Mask::xyzw, vu.vf15, vu.vf10, vu.vf16.z()); vu.vi10 = vu.vi10 + 3; // lq.xy vf22, 0(vi10) | miniz.w vf13, vf13, vf01 482 - vu.vf13.mini(Mask::w, vu.vf13, vu.vf01.z()); lq_buffer(Mask::xy, vu.vf22, vu.vi10); + vu.vf13.mini(Mask::w, vu.vf13, gen.fog.z()); lq_buffer(Mask::xy, vu.vf22, vu.vi10); // lq.xyz vf16, 2(vi10) | mul.xyzw vf28, vf14, vf05 483 - vu.vf28.mul(Mask::xyzw, vu.vf14, vu.vf05); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); + vu.vf28.mul(Mask::xyzw, vu.vf14, gen.hmge_scale); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); // iand vi09, vi05, vi11 | ftoi4.xyzw vf12, vf12 484 - vu.vf12.ftoi4(Mask::xyzw, vu.vf12); vu.vi09 = vu.vi05 & vu.vi11; + vu.vf12.ftoi4_check(Mask::xyzw, vu.vf12); vu.vi09 = vu.vi05 & vu.vi11; // mfir.x vf25, vi09 | mul.xyz vf14, vf14, Q 485 vu.vf14.mul(Mask::xyz, vu.vf14, vu.Q); vu.vf25.mfir(Mask::x, vu.vi09); // sq.xyzw vf18, -12(vi10) | maxy.w vf13, vf13, vf01 486 - vu.vf13.max(Mask::w, vu.vf13, vu.vf01.y()); sq_buffer(Mask::xyzw, vu.vf18, vu.vi10 + -12); + vu.vf13.max(Mask::w, vu.vf13, gen.fog.y()); sq_buffer(Mask::xyzw, vu.vf18, vu.vi10 + -12); // BRANCH! // ibeq vi14, vi10, L46 | clipw.xyz vf27, vf27 487 cf = clip(vu.vf27, vu.vf27.w(), cf); bc = (vu.vi14 == vu.vi10); @@ -1386,35 +734,35 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // ibeq vi03, vi07, L44 | itof12.xyz vf21, vf25 489 vu.vf21.itof12(Mask::xyz, vu.vf25); bc = (vu.vi03 == vu.vi07); // div Q, vf01.x, vf15.w | mulaw.xyzw ACC, vf11, vf00 490 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.Q = vu.vf01.x() / vu.vf15.w(); + vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.Q = gen.fog.x() / vu.vf15.w(); if (bc) { goto L44; } // nop | addw.w vf13, vf13, vf01 491 - vu.vf13.add(Mask::w, vu.vf13, vu.vf01.w()); + vu.vf13.add(Mask::w, vu.vf13, kFogFloatOffset); L44: // fcand vi01, 0x3ffff | add.xyzw vf14, vf14, vf04 492 - vu.vf14.add(Mask::xyzw, vu.vf14, vu.vf04); vu.vi01 = cf & 0x3ffff; + vu.vf14.add(Mask::xyzw, vu.vf14, gen.hvdf_off); vu.vi01 = cf & 0x3ffff; // BRANCH! // ibne vi00, vi01, L51 | maddax.xyzw ACC, vf08, vf16 493 - vu.acc.madda(Mask::xyzw, vu.vf08, vu.vf16.x()); bc = (vu.vi01 != 0); + vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf16.x()); bc = (vu.vi01 != 0); // mtir vi02, vf22.x | madday.xyzw ACC, vf09, vf16 494 - vu.acc.madda(Mask::xyzw, vu.vf09, vu.vf16.y()); vu.vi02 = vu.vf22.x_as_u16(); + vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf16.y()); vu.vi02 = vu.vf22.x_as_u16(); if (bc) { goto L51; } L45: // iaddi vi10, vi10, 0x3 | maddz.xyzw vf12, vf10, vf16 495 vu.acc.madd(Mask::xyzw, vu.vf12, vu.vf10, vu.vf16.z()); vu.vi10 = vu.vi10 + 3; // lq.xy vf23, 0(vi10) | miniz.w vf14, vf14, vf01 496 - vu.vf14.mini(Mask::w, vu.vf14, vu.vf01.z()); lq_buffer(Mask::xy, vu.vf23, vu.vi10); + vu.vf14.mini(Mask::w, vu.vf14, gen.fog.z()); lq_buffer(Mask::xy, vu.vf23, vu.vi10); // lq.xyz vf16, 2(vi10) | mul.xyzw vf29, vf15, vf05 497 - vu.vf29.mul(Mask::xyzw, vu.vf15, vu.vf05); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); + vu.vf29.mul(Mask::xyzw, vu.vf15, gen.hmge_scale); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); // iand vi06, vi02, vi11 | ftoi4.xyzw vf13, vf13 498 - vu.vf13.ftoi4(Mask::xyzw, vu.vf13); vu.vi06 = vu.vi02 & vu.vi11; + vu.vf13.ftoi4_check(Mask::xyzw, vu.vf13); vu.vi06 = vu.vi02 & vu.vi11; // mfir.x vf22, vi06 | mul.xyz vf15, vf15, Q 499 vu.vf15.mul(Mask::xyz, vu.vf15, vu.Q); vu.vf22.mfir(Mask::x, vu.vi06); // sq.xyzw vf19, -12(vi10) | maxy.w vf14, vf14, vf01 500 - vu.vf14.max(Mask::w, vu.vf14, vu.vf01.y()); sq_buffer(Mask::xyzw, vu.vf19, vu.vi10 + -12); + vu.vf14.max(Mask::w, vu.vf14, gen.fog.y()); sq_buffer(Mask::xyzw, vu.vf19, vu.vi10 + -12); // BRANCH! // ibne vi14, vi10, L37 | clipw.xyz vf28, vf28 501 cf = clip(vu.vf28, vu.vf28.w(), cf); bc = (vu.vi14 != vu.vi10); @@ -1435,15 +783,15 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // ibne vi02, vi06, L43 | nop 505 bc = (vu.vi02 != vu.vi06); // sq.xyzw vf23, 998(vi00) | addw.w vf12, vf12, vf01 506 - vu.vf12.add(Mask::w, vu.vf12, vu.vf01.w()); sq_buffer(Mask::xyzw, vu.vf23, 998); + vu.vf12.add(Mask::w, vu.vf12, kFogFloatOffset); sq_buffer(Mask::xyzw, vu.vf23, 998); if (bc) { goto L43; } // sq.xyzw vf24, 999(vi00) | mul.xyzw vf23, vf28, vf07 507 - vu.vf23.mul(Mask::xyzw, vu.vf28, vu.vf07); sq_buffer(Mask::xyzw, vu.vf24, 999); + vu.vf23.mul(Mask::xyzw, vu.vf28, gen.guard); sq_buffer(Mask::xyzw, vu.vf24, 999); // sq.xyzw vf25, 1000(vi00) | mul.xyzw vf24, vf29, vf07 508 - vu.vf24.mul(Mask::xyzw, vu.vf29, vu.vf07); sq_buffer(Mask::xyzw, vu.vf25, 1000); + vu.vf24.mul(Mask::xyzw, vu.vf29, gen.guard); sq_buffer(Mask::xyzw, vu.vf25, 1000); // isw.x vi01, 1001(vi00) | mul.xyzw vf25, vf26, vf07 509 - vu.vf25.mul(Mask::xyzw, vu.vf26, vu.vf07); isw_buffer(Mask::x, vu.vi01, 1001); + vu.vf25.mul(Mask::xyzw, vu.vf26, gen.guard); isw_buffer(Mask::x, vu.vi01, 1001); // isw.y vi02, 1001(vi00) | nop 510 isw_buffer(Mask::y, vu.vi02, 1001); // isw.z vi03, 1001(vi00) | clipw.xyz vf23, vf23 511 @@ -1480,7 +828,7 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S L48: // div Q, vf01.x, vf14.w | nop 524 - vu.Q = vu.vf01.x() / vu.vf14.w(); + vu.Q = gen.fog.x() / vu.vf14.w(); // lq.xyzw vf23, 998(vi00) | nop 525 lq_buffer(Mask::xyzw, vu.vf23, 998); // lq.xyzw vf24, 999(vi00) | nop 526 @@ -1501,143 +849,22 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S if (bc) { goto L43; } L49: - // mfir.w vf31, vi08 | nop 533 - vu.vf31.mfir(Mask::w, vu.vi08); - // mfir.x vf30, vi09 | nop 534 - vu.vf30.mfir(Mask::x, vu.vi09); - // mfir.y vf30, vi10 | nop 535 - vu.vf30.mfir(Mask::y, vu.vi10); - // mfir.z vf30, vi11 | nop 536 - vu.vf30.mfir(Mask::z, vu.vi11); - // mfir.w vf30, vi12 | nop 537 - vu.vf30.mfir(Mask::w, vu.vi12); - // sq.xyzw vf12, 1004(vi00) | nop 538 - sq_buffer(Mask::xyzw, vu.vf12, 1004); - // lq.xyzw vf12, -15(vi10) | nop 539 - lq_buffer(Mask::xyzw, vu.vf12, vu.vi10 + -15); - // sq.xyzw vf13, 1005(vi00) | nop 540 - sq_buffer(Mask::xyzw, vu.vf13, 1005); - // lq.xyzw vf13, -12(vi10) | nop 541 - lq_buffer(Mask::xyzw, vu.vf13, vu.vi10 + -12); - // sq.xyzw vf14, 1006(vi00) | nop 542 - sq_buffer(Mask::xyzw, vu.vf14, 1006); - // div Q, vf00.w, vf12.z | nop 543 - vu.Q = vu.vf00.w() / vu.vf12.z(); - // sq.xyzw vf31, 1002(vi00) | nop 544 - sq_buffer(Mask::xyzw, vu.vf31, 1002); - // sq.xyzw vf30, 1003(vi00) | nop 545 - sq_buffer(Mask::xyzw, vu.vf30, 1003); - // sq.xyzw vf15, 1007(vi00) | nop 546 - sq_buffer(Mask::xyzw, vu.vf15, 1007); - // sq.xyzw vf16, 1008(vi00) | nop 547 - sq_buffer(Mask::xyzw, vu.vf16, 1008); - // lq.xyzw vf03, 4(vi13) | nop 548 - lq_buffer(Mask::xyzw, vu.vf03, vu.vi13 + 4); - // lq.xyzw vf15, -14(vi10) | sub.xw vf31, vf00, vf00 549 - vu.vf31.sub(Mask::xw, vu.vf00, vu.vf00); lq_buffer(Mask::xyzw, vu.vf15, vu.vi10 + -14); - // div Q, vf00.w, vf13.z | nop 550 - vu.Q = vu.vf00.w() / vu.vf13.z(); - // lq.xyzw vf16, -11(vi10) | mul.xyz vf12, vf12, Q 551 - vu.vf12.mul(Mask::xyz, vu.vf12, vu.Q); lq_buffer(Mask::xyzw, vu.vf16, vu.vi10 + -11); - // lq.xyzw vf17, -8(vi10) | addx.y vf31, vf00, vf03 552 - vu.vf31.add(Mask::y, vu.vf00, vu.vf03.x()); lq_buffer(Mask::xyzw, vu.vf17, vu.vi10 + -8); - // nop | itof0.xyzw vf15, vf15 553 - vu.vf15.itof0(Mask::xyzw, vu.vf15); - // sq.xyzw vf28, 989(vi00) | nop 554 - sq_buffer(Mask::xyzw, vu.vf28, 989); - // sq.xyzw vf12, 991(vi00) | itof0.xyzw vf16, vf16 555 - vu.vf16.itof0(Mask::xyzw, vu.vf16); sq_buffer(Mask::xyzw, vu.vf12, 991); - // sq.xyzw vf29, 992(vi00) | itof0.xyzw vf17, vf17 556 - vu.vf17.itof0(Mask::xyzw, vu.vf17); sq_buffer(Mask::xyzw, vu.vf29, 992); - // div Q, vf00.w, vf18.z | nop 557 - vu.Q = vu.vf00.w() / vu.vf18.z(); - // nop | mul.xyz vf13, vf13, Q 558 - vu.vf13.mul(Mask::xyz, vu.vf13, vu.Q); - // sq.xyzw vf26, 995(vi00) | nop 559 - sq_buffer(Mask::xyzw, vu.vf26, 995); - // move.z vf31, vf03 | nop 560 - vu.vf31.move(Mask::z, vu.vf03); - // sq.xyzw vf15, 990(vi00) | nop 561 - sq_buffer(Mask::xyzw, vu.vf15, 990); - // sq.xyzw vf16, 993(vi00) | nop 562 - sq_buffer(Mask::xyzw, vu.vf16, 993); - // sq.xyzw vf17, 996(vi00) | nop 563 - sq_buffer(Mask::xyzw, vu.vf17, 996); - // sq.xyzw vf13, 994(vi00) | nop 564 - sq_buffer(Mask::xyzw, vu.vf13, 994); - // sq.xyzw vf31, 961(vi00) | mul.xyz vf14, vf18, Q 565 - vu.vf14.mul(Mask::xyz, vu.vf18, vu.Q); sq_buffer(Mask::xyzw, vu.vf31, 961); - // nop | nop 566 - - // nop | nop 567 - - // BRANCH! - // bal vi15, L66 | nop 568 ASSERT(false); - // sq.xyzw vf14, 997(vi00) | nop 569 - sq_buffer(Mask::xyzw, vu.vf14, 997); - if (bc) { goto L66; } - - // BRANCH! - // ibeq vi00, vi05, L50 | nop 570 - bc = (vu.vi05 == 0); - // nop | nop 571 - - if (bc) { goto L50; } - - // BRANCH! - // bal vi15, L63 | nop 572 - ASSERT(false); - // nop | nop 573 - - if (bc) { goto L63; } - - L50: - // ilw.x vi05, 1002(vi00) | nop 574 - ilw_buffer(Mask::x, vu.vi05, 1002); - // ilw.y vi06, 1002(vi00) | nop 575 - ilw_buffer(Mask::y, vu.vi06, 1002); - // ilw.z vi07, 1002(vi00) | nop 576 - ilw_buffer(Mask::z, vu.vi07, 1002); - // ilw.w vi08, 1002(vi00) | nop 577 - ilw_buffer(Mask::w, vu.vi08, 1002); - // ilw.x vi09, 1003(vi00) | nop 578 - ilw_buffer(Mask::x, vu.vi09, 1003); - // ilw.y vi10, 1003(vi00) | nop 579 - ilw_buffer(Mask::y, vu.vi10, 1003); - // ilw.z vi11, 1003(vi00) | nop 580 - ilw_buffer(Mask::z, vu.vi11, 1003); - // ilw.w vi12, 1003(vi00) | nop 581 - ilw_buffer(Mask::w, vu.vi12, 1003); - // lq.xyzw vf12, 1004(vi00) | nop 582 - lq_buffer(Mask::xyzw, vu.vf12, 1004); - // lq.xyzw vf13, 1005(vi00) | nop 583 - lq_buffer(Mask::xyzw, vu.vf13, 1005); - // lq.xyzw vf14, 1006(vi00) | nop 584 - lq_buffer(Mask::xyzw, vu.vf14, 1006); - // lq.xyzw vf15, 1007(vi00) | nop 585 - lq_buffer(Mask::xyzw, vu.vf15, 1007); - // BRANCH! - // b L48 | nop 586 - bc = true; - // lq.xyzw vf16, 1008(vi00) | nop 587 - lq_buffer(Mask::xyzw, vu.vf16, 1008); - if (bc) { goto L48; } L51: // BRANCH! // ibne vi03, vi07, L45 | nop 588 bc = (vu.vi03 != vu.vi07); // sq.xyzw vf23, 998(vi00) | addw.w vf13, vf13, vf01 589 - vu.vf13.add(Mask::w, vu.vf13, vu.vf01.w()); sq_buffer(Mask::xyzw, vu.vf23, 998); + vu.vf13.add(Mask::w, vu.vf13, kFogFloatOffset); sq_buffer(Mask::xyzw, vu.vf23, 998); if (bc) { goto L45; } // sq.xyzw vf24, 999(vi00) | mul.xyzw vf23, vf29, vf07 590 - vu.vf23.mul(Mask::xyzw, vu.vf29, vu.vf07); sq_buffer(Mask::xyzw, vu.vf24, 999); + vu.vf23.mul(Mask::xyzw, vu.vf29, gen.guard); sq_buffer(Mask::xyzw, vu.vf24, 999); // sq.xyzw vf25, 1000(vi00) | mul.xyzw vf24, vf26, vf07 591 - vu.vf24.mul(Mask::xyzw, vu.vf26, vu.vf07); sq_buffer(Mask::xyzw, vu.vf25, 1000); + vu.vf24.mul(Mask::xyzw, vu.vf26, gen.guard); sq_buffer(Mask::xyzw, vu.vf25, 1000); // isw.x vi01, 1001(vi00) | mul.xyzw vf25, vf27, vf07 592 - vu.vf25.mul(Mask::xyzw, vu.vf27, vu.vf07); isw_buffer(Mask::x, vu.vi01, 1001); + vu.vf25.mul(Mask::xyzw, vu.vf27, gen.guard); isw_buffer(Mask::x, vu.vi01, 1001); // isw.y vi02, 1001(vi00) | nop 593 isw_buffer(Mask::y, vu.vi02, 1001); // TODO more clipping pipeline? @@ -1673,7 +900,7 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S L52: // div Q, vf01.x, vf15.w | nop 607 - vu.Q = vu.vf01.x() / vu.vf15.w(); + vu.Q = gen.fog.x() / vu.vf15.w(); // lq.xyzw vf23, 998(vi00) | nop 608 lq_buffer(Mask::xyzw, vu.vf23, 998); // lq.xyzw vf24, 999(vi00) | nop 609 @@ -1694,143 +921,22 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S if (bc) { goto L45; } L53: - // mfir.w vf31, vi08 | nop 616 - vu.vf31.mfir(Mask::w, vu.vi08); - // mfir.x vf30, vi09 | nop 617 - vu.vf30.mfir(Mask::x, vu.vi09); - // mfir.y vf30, vi10 | nop 618 - vu.vf30.mfir(Mask::y, vu.vi10); - // mfir.z vf30, vi11 | nop 619 - vu.vf30.mfir(Mask::z, vu.vi11); - // mfir.w vf30, vi12 | nop 620 - vu.vf30.mfir(Mask::w, vu.vi12); - // sq.xyzw vf12, 1004(vi00) | nop 621 - sq_buffer(Mask::xyzw, vu.vf12, 1004); - // lq.xyzw vf12, -15(vi10) | nop 622 - lq_buffer(Mask::xyzw, vu.vf12, vu.vi10 + -15); - // sq.xyzw vf13, 1005(vi00) | nop 623 - sq_buffer(Mask::xyzw, vu.vf13, 1005); - // lq.xyzw vf13, -12(vi10) | nop 624 - lq_buffer(Mask::xyzw, vu.vf13, vu.vi10 + -12); - // sq.xyzw vf14, 1006(vi00) | nop 625 - sq_buffer(Mask::xyzw, vu.vf14, 1006); - // div Q, vf00.w, vf12.z | nop 626 - vu.Q = vu.vf00.w() / vu.vf12.z(); - // sq.xyzw vf31, 1002(vi00) | nop 627 - sq_buffer(Mask::xyzw, vu.vf31, 1002); - // sq.xyzw vf30, 1003(vi00) | nop 628 - sq_buffer(Mask::xyzw, vu.vf30, 1003); - // sq.xyzw vf15, 1007(vi00) | nop 629 - sq_buffer(Mask::xyzw, vu.vf15, 1007); - // sq.xyzw vf16, 1008(vi00) | nop 630 - sq_buffer(Mask::xyzw, vu.vf16, 1008); - // lq.xyzw vf03, 4(vi13) | nop 631 - lq_buffer(Mask::xyzw, vu.vf03, vu.vi13 + 4); - // lq.xyzw vf15, -14(vi10) | sub.xw vf31, vf00, vf00 632 - vu.vf31.sub(Mask::xw, vu.vf00, vu.vf00); lq_buffer(Mask::xyzw, vu.vf15, vu.vi10 + -14); - // div Q, vf00.w, vf13.z | nop 633 - vu.Q = vu.vf00.w() / vu.vf13.z(); - // lq.xyzw vf16, -11(vi10) | mul.xyz vf12, vf12, Q 634 - vu.vf12.mul(Mask::xyz, vu.vf12, vu.Q); lq_buffer(Mask::xyzw, vu.vf16, vu.vi10 + -11); - // lq.xyzw vf17, -8(vi10) | addx.y vf31, vf00, vf03 635 - vu.vf31.add(Mask::y, vu.vf00, vu.vf03.x()); lq_buffer(Mask::xyzw, vu.vf17, vu.vi10 + -8); - // nop | itof0.xyzw vf15, vf15 636 - vu.vf15.itof0(Mask::xyzw, vu.vf15); - // sq.xyzw vf29, 989(vi00) | nop 637 - sq_buffer(Mask::xyzw, vu.vf29, 989); - // sq.xyzw vf12, 991(vi00) | itof0.xyzw vf16, vf16 638 - vu.vf16.itof0(Mask::xyzw, vu.vf16); sq_buffer(Mask::xyzw, vu.vf12, 991); - // sq.xyzw vf26, 992(vi00) | itof0.xyzw vf17, vf17 639 - vu.vf17.itof0(Mask::xyzw, vu.vf17); sq_buffer(Mask::xyzw, vu.vf26, 992); - // div Q, vf00.w, vf19.z | nop 640 - vu.Q = vu.vf00.w() / vu.vf19.z(); - // nop | mul.xyz vf13, vf13, Q 641 - vu.vf13.mul(Mask::xyz, vu.vf13, vu.Q); - // sq.xyzw vf27, 995(vi00) | nop 642 - sq_buffer(Mask::xyzw, vu.vf27, 995); - // move.z vf31, vf03 | nop 643 - vu.vf31.move(Mask::z, vu.vf03); - // sq.xyzw vf15, 990(vi00) | nop 644 - sq_buffer(Mask::xyzw, vu.vf15, 990); - // sq.xyzw vf16, 993(vi00) | nop 645 - sq_buffer(Mask::xyzw, vu.vf16, 993); - // sq.xyzw vf17, 996(vi00) | nop 646 - sq_buffer(Mask::xyzw, vu.vf17, 996); - // sq.xyzw vf13, 994(vi00) | nop 647 - sq_buffer(Mask::xyzw, vu.vf13, 994); - // sq.xyzw vf31, 961(vi00) | mul.xyz vf14, vf19, Q 648 - vu.vf14.mul(Mask::xyz, vu.vf19, vu.Q); sq_buffer(Mask::xyzw, vu.vf31, 961); - // nop | nop 649 - - // nop | nop 650 - - // BRANCH! - // bal vi15, L66 | nop 651 ASSERT(false); - // sq.xyzw vf14, 997(vi00) | nop 652 - sq_buffer(Mask::xyzw, vu.vf14, 997); - if (bc) { goto L66; } - - // BRANCH! - // ibeq vi00, vi05, L54 | nop 653 - bc = (vu.vi05 == 0); - // nop | nop 654 - - if (bc) { goto L54; } - - // BRANCH! - // bal vi15, L63 | nop 655 - ASSERT(false); - // nop | nop 656 - - if (bc) { goto L63; } - - L54: - // ilw.x vi05, 1002(vi00) | nop 657 - ilw_buffer(Mask::x, vu.vi05, 1002); - // ilw.y vi06, 1002(vi00) | nop 658 - ilw_buffer(Mask::y, vu.vi06, 1002); - // ilw.z vi07, 1002(vi00) | nop 659 - ilw_buffer(Mask::z, vu.vi07, 1002); - // ilw.w vi08, 1002(vi00) | nop 660 - ilw_buffer(Mask::w, vu.vi08, 1002); - // ilw.x vi09, 1003(vi00) | nop 661 - ilw_buffer(Mask::x, vu.vi09, 1003); - // ilw.y vi10, 1003(vi00) | nop 662 - ilw_buffer(Mask::y, vu.vi10, 1003); - // ilw.z vi11, 1003(vi00) | nop 663 - ilw_buffer(Mask::z, vu.vi11, 1003); - // ilw.w vi12, 1003(vi00) | nop 664 - ilw_buffer(Mask::w, vu.vi12, 1003); - // lq.xyzw vf12, 1004(vi00) | nop 665 - lq_buffer(Mask::xyzw, vu.vf12, 1004); - // lq.xyzw vf13, 1005(vi00) | nop 666 - lq_buffer(Mask::xyzw, vu.vf13, 1005); - // lq.xyzw vf14, 1006(vi00) | nop 667 - lq_buffer(Mask::xyzw, vu.vf14, 1006); - // lq.xyzw vf15, 1007(vi00) | nop 668 - lq_buffer(Mask::xyzw, vu.vf15, 1007); - // BRANCH! - // b L52 | nop 669 - bc = true; - // lq.xyzw vf16, 1008(vi00) | nop 670 - lq_buffer(Mask::xyzw, vu.vf16, 1008); - if (bc) { goto L52; } L55: // BRANCH! // ibne vi04, vi08, L39 | nop 671 bc = (vu.vi04 != vu.vi08); // sq.xyzw vf23, 998(vi00) | addw.w vf14, vf14, vf01 672 - vu.vf14.add(Mask::w, vu.vf14, vu.vf01.w()); sq_buffer(Mask::xyzw, vu.vf23, 998); + vu.vf14.add(Mask::w, vu.vf14, kFogFloatOffset); sq_buffer(Mask::xyzw, vu.vf23, 998); if (bc) { goto L39; } // sq.xyzw vf24, 999(vi00) | mul.xyzw vf23, vf26, vf07 673 - vu.vf23.mul(Mask::xyzw, vu.vf26, vu.vf07); sq_buffer(Mask::xyzw, vu.vf24, 999); + vu.vf23.mul(Mask::xyzw, vu.vf26, gen.guard); sq_buffer(Mask::xyzw, vu.vf24, 999); // sq.xyzw vf25, 1000(vi00) | mul.xyzw vf24, vf27, vf07 674 - vu.vf24.mul(Mask::xyzw, vu.vf27, vu.vf07); sq_buffer(Mask::xyzw, vu.vf25, 1000); + vu.vf24.mul(Mask::xyzw, vu.vf27, gen.guard); sq_buffer(Mask::xyzw, vu.vf25, 1000); // isw.x vi01, 1001(vi00) | mul.xyzw vf25, vf28, vf07 675 - vu.vf25.mul(Mask::xyzw, vu.vf28, vu.vf07); isw_buffer(Mask::x, vu.vi01, 1001); + vu.vf25.mul(Mask::xyzw, vu.vf28, gen.guard); isw_buffer(Mask::x, vu.vi01, 1001); // isw.y vi02, 1001(vi00) | nop 676 isw_buffer(Mask::y, vu.vi02, 1001); // TODO more clipping? @@ -1866,7 +972,7 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S L56: // div Q, vf01.x, vf12.w | nop 690 - vu.Q = vu.vf01.x() / vu.vf12.w(); + vu.Q = gen.fog.x() / vu.vf12.w(); // lq.xyzw vf23, 998(vi00) | nop 691 lq_buffer(Mask::xyzw, vu.vf23, 998); // lq.xyzw vf24, 999(vi00) | nop 692 @@ -1887,143 +993,22 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S if (bc) { goto L39; } L57: - // mfir.w vf31, vi08 | nop 699 - vu.vf31.mfir(Mask::w, vu.vi08); - // mfir.x vf30, vi09 | nop 700 - vu.vf30.mfir(Mask::x, vu.vi09); - // mfir.y vf30, vi10 | nop 701 - vu.vf30.mfir(Mask::y, vu.vi10); - // mfir.z vf30, vi11 | nop 702 - vu.vf30.mfir(Mask::z, vu.vi11); - // mfir.w vf30, vi12 | nop 703 - vu.vf30.mfir(Mask::w, vu.vi12); - // sq.xyzw vf12, 1004(vi00) | nop 704 - sq_buffer(Mask::xyzw, vu.vf12, 1004); - // lq.xyzw vf12, -15(vi10) | nop 705 - lq_buffer(Mask::xyzw, vu.vf12, vu.vi10 + -15); - // sq.xyzw vf13, 1005(vi00) | nop 706 - sq_buffer(Mask::xyzw, vu.vf13, 1005); - // lq.xyzw vf13, -12(vi10) | nop 707 - lq_buffer(Mask::xyzw, vu.vf13, vu.vi10 + -12); - // sq.xyzw vf14, 1006(vi00) | nop 708 - sq_buffer(Mask::xyzw, vu.vf14, 1006); - // div Q, vf00.w, vf12.z | nop 709 - vu.Q = vu.vf00.w() / vu.vf12.z(); - // sq.xyzw vf31, 1002(vi00) | nop 710 - sq_buffer(Mask::xyzw, vu.vf31, 1002); - // sq.xyzw vf30, 1003(vi00) | nop 711 - sq_buffer(Mask::xyzw, vu.vf30, 1003); - // sq.xyzw vf15, 1007(vi00) | nop 712 - sq_buffer(Mask::xyzw, vu.vf15, 1007); - // sq.xyzw vf16, 1008(vi00) | nop 713 - sq_buffer(Mask::xyzw, vu.vf16, 1008); - // lq.xyzw vf03, 4(vi13) | nop 714 - lq_buffer(Mask::xyzw, vu.vf03, vu.vi13 + 4); - // lq.xyzw vf15, -14(vi10) | sub.xw vf31, vf00, vf00 715 - vu.vf31.sub(Mask::xw, vu.vf00, vu.vf00); lq_buffer(Mask::xyzw, vu.vf15, vu.vi10 + -14); - // div Q, vf00.w, vf13.z | nop 716 - vu.Q = vu.vf00.w() / vu.vf13.z(); - // lq.xyzw vf16, -11(vi10) | mul.xyz vf12, vf12, Q 717 - vu.vf12.mul(Mask::xyz, vu.vf12, vu.Q); lq_buffer(Mask::xyzw, vu.vf16, vu.vi10 + -11); - // lq.xyzw vf17, -8(vi10) | addx.y vf31, vf00, vf03 718 - vu.vf31.add(Mask::y, vu.vf00, vu.vf03.x()); lq_buffer(Mask::xyzw, vu.vf17, vu.vi10 + -8); - // nop | itof0.xyzw vf15, vf15 719 - vu.vf15.itof0(Mask::xyzw, vu.vf15); - // sq.xyzw vf26, 989(vi00) | nop 720 - sq_buffer(Mask::xyzw, vu.vf26, 989); - // sq.xyzw vf12, 991(vi00) | itof0.xyzw vf16, vf16 721 - vu.vf16.itof0(Mask::xyzw, vu.vf16); sq_buffer(Mask::xyzw, vu.vf12, 991); - // sq.xyzw vf27, 992(vi00) | itof0.xyzw vf17, vf17 722 - vu.vf17.itof0(Mask::xyzw, vu.vf17); sq_buffer(Mask::xyzw, vu.vf27, 992); - // div Q, vf00.w, vf20.z | nop 723 - vu.Q = vu.vf00.w() / vu.vf20.z(); - // nop | mul.xyz vf13, vf13, Q 724 - vu.vf13.mul(Mask::xyz, vu.vf13, vu.Q); - // sq.xyzw vf28, 995(vi00) | nop 725 - sq_buffer(Mask::xyzw, vu.vf28, 995); - // move.z vf31, vf03 | nop 726 - vu.vf31.move(Mask::z, vu.vf03); - // sq.xyzw vf15, 990(vi00) | nop 727 - sq_buffer(Mask::xyzw, vu.vf15, 990); - // sq.xyzw vf16, 993(vi00) | nop 728 - sq_buffer(Mask::xyzw, vu.vf16, 993); - // sq.xyzw vf17, 996(vi00) | nop 729 - sq_buffer(Mask::xyzw, vu.vf17, 996); - // sq.xyzw vf13, 994(vi00) | nop 730 - sq_buffer(Mask::xyzw, vu.vf13, 994); - // sq.xyzw vf31, 961(vi00) | mul.xyz vf14, vf20, Q 731 - vu.vf14.mul(Mask::xyz, vu.vf20, vu.Q); sq_buffer(Mask::xyzw, vu.vf31, 961); - // nop | nop 732 - - // nop | nop 733 - - // BRANCH! - // bal vi15, L66 | nop 734 ASSERT(false); - // sq.xyzw vf14, 997(vi00) | nop 735 - sq_buffer(Mask::xyzw, vu.vf14, 997); - if (bc) { goto L66; } - - // BRANCH! - // ibeq vi00, vi05, L58 | nop 736 - bc = (vu.vi05 == 0); - // nop | nop 737 - - if (bc) { goto L58; } - - // BRANCH! - // bal vi15, L63 | nop 738 - ASSERT(false); - // nop | nop 739 - - if (bc) { goto L63; } - - L58: - // ilw.x vi05, 1002(vi00) | nop 740 - ilw_buffer(Mask::x, vu.vi05, 1002); - // ilw.y vi06, 1002(vi00) | nop 741 - ilw_buffer(Mask::y, vu.vi06, 1002); - // ilw.z vi07, 1002(vi00) | nop 742 - ilw_buffer(Mask::z, vu.vi07, 1002); - // ilw.w vi08, 1002(vi00) | nop 743 - ilw_buffer(Mask::w, vu.vi08, 1002); - // ilw.x vi09, 1003(vi00) | nop 744 - ilw_buffer(Mask::x, vu.vi09, 1003); - // ilw.y vi10, 1003(vi00) | nop 745 - ilw_buffer(Mask::y, vu.vi10, 1003); - // ilw.z vi11, 1003(vi00) | nop 746 - ilw_buffer(Mask::z, vu.vi11, 1003); - // ilw.w vi12, 1003(vi00) | nop 747 - ilw_buffer(Mask::w, vu.vi12, 1003); - // lq.xyzw vf12, 1004(vi00) | nop 748 - lq_buffer(Mask::xyzw, vu.vf12, 1004); - // lq.xyzw vf13, 1005(vi00) | nop 749 - lq_buffer(Mask::xyzw, vu.vf13, 1005); - // lq.xyzw vf14, 1006(vi00) | nop 750 - lq_buffer(Mask::xyzw, vu.vf14, 1006); - // lq.xyzw vf15, 1007(vi00) | nop 751 - lq_buffer(Mask::xyzw, vu.vf15, 1007); - // BRANCH! - // b L56 | nop 752 - bc = true; - // lq.xyzw vf16, 1008(vi00) | nop 753 - lq_buffer(Mask::xyzw, vu.vf16, 1008); - if (bc) { goto L56; } L59: // BRANCH! // ibne vi05, vi09, L41 | nop 754 bc = (vu.vi05 != vu.vi09); // sq.xyzw vf23, 998(vi00) | addw.w vf15, vf15, vf01 755 - vu.vf15.add(Mask::w, vu.vf15, vu.vf01.w()); sq_buffer(Mask::xyzw, vu.vf23, 998); + vu.vf15.add(Mask::w, vu.vf15, kFogFloatOffset); sq_buffer(Mask::xyzw, vu.vf23, 998); if (bc) { goto L41; } // sq.xyzw vf24, 999(vi00) | mul.xyzw vf23, vf27, vf07 756 - vu.vf23.mul(Mask::xyzw, vu.vf27, vu.vf07); sq_buffer(Mask::xyzw, vu.vf24, 999); + vu.vf23.mul(Mask::xyzw, vu.vf27, gen.guard); sq_buffer(Mask::xyzw, vu.vf24, 999); // sq.xyzw vf25, 1000(vi00) | mul.xyzw vf24, vf28, vf07 757 - vu.vf24.mul(Mask::xyzw, vu.vf28, vu.vf07); sq_buffer(Mask::xyzw, vu.vf25, 1000); + vu.vf24.mul(Mask::xyzw, vu.vf28, gen.guard); sq_buffer(Mask::xyzw, vu.vf25, 1000); // isw.x vi01, 1001(vi00) | mul.xyzw vf25, vf29, vf07 758 - vu.vf25.mul(Mask::xyzw, vu.vf29, vu.vf07); isw_buffer(Mask::x, vu.vi01, 1001); + vu.vf25.mul(Mask::xyzw, vu.vf29, gen.guard); isw_buffer(Mask::x, vu.vi01, 1001); // isw.y vi02, 1001(vi00) | nop 759 isw_buffer(Mask::y, vu.vi02, 1001); // isw.z vi03, 1001(vi00) | clipw.xyz vf23, vf23 760 @@ -2059,7 +1044,7 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S L60: // div Q, vf01.x, vf13.w | nop 773 - vu.Q = vu.vf01.x() / vu.vf13.w(); + vu.Q = gen.fog.x() / vu.vf13.w(); // lq.xyzw vf23, 998(vi00) | nop 774 lq_buffer(Mask::xyzw, vu.vf23, 998); // lq.xyzw vf24, 999(vi00) | nop 775 @@ -2080,691 +1065,10 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S if (bc) { goto L41; } L61: - // mfir.w vf31, vi08 | nop 782 - vu.vf31.mfir(Mask::w, vu.vi08); - // mfir.x vf30, vi09 | nop 783 - vu.vf30.mfir(Mask::x, vu.vi09); - // mfir.y vf30, vi10 | nop 784 - vu.vf30.mfir(Mask::y, vu.vi10); - // mfir.z vf30, vi11 | nop 785 - vu.vf30.mfir(Mask::z, vu.vi11); - // mfir.w vf30, vi12 | nop 786 - vu.vf30.mfir(Mask::w, vu.vi12); - // sq.xyzw vf12, 1004(vi00) | nop 787 - sq_buffer(Mask::xyzw, vu.vf12, 1004); - // lq.xyzw vf12, -15(vi10) | nop 788 - lq_buffer(Mask::xyzw, vu.vf12, vu.vi10 + -15); - // sq.xyzw vf13, 1005(vi00) | nop 789 - sq_buffer(Mask::xyzw, vu.vf13, 1005); - // lq.xyzw vf13, -12(vi10) | nop 790 - lq_buffer(Mask::xyzw, vu.vf13, vu.vi10 + -12); - // sq.xyzw vf14, 1006(vi00) | nop 791 - sq_buffer(Mask::xyzw, vu.vf14, 1006); - // div Q, vf00.w, vf12.z | nop 792 - vu.Q = vu.vf00.w() / vu.vf12.z(); - // sq.xyzw vf31, 1002(vi00) | nop 793 - sq_buffer(Mask::xyzw, vu.vf31, 1002); - // sq.xyzw vf30, 1003(vi00) | nop 794 - sq_buffer(Mask::xyzw, vu.vf30, 1003); - // sq.xyzw vf15, 1007(vi00) | nop 795 - sq_buffer(Mask::xyzw, vu.vf15, 1007); - // sq.xyzw vf16, 1008(vi00) | nop 796 - sq_buffer(Mask::xyzw, vu.vf16, 1008); - // lq.xyzw vf03, 4(vi13) | nop 797 - lq_buffer(Mask::xyzw, vu.vf03, vu.vi13 + 4); - // lq.xyzw vf15, -14(vi10) | sub.xw vf31, vf00, vf00 798 - vu.vf31.sub(Mask::xw, vu.vf00, vu.vf00); lq_buffer(Mask::xyzw, vu.vf15, vu.vi10 + -14); - // div Q, vf00.w, vf13.z | nop 799 - vu.Q = vu.vf00.w() / vu.vf13.z(); - // lq.xyzw vf16, -11(vi10) | mul.xyz vf12, vf12, Q 800 - vu.vf12.mul(Mask::xyz, vu.vf12, vu.Q); lq_buffer(Mask::xyzw, vu.vf16, vu.vi10 + -11); - // lq.xyzw vf17, -8(vi10) | addx.y vf31, vf00, vf03 801 - vu.vf31.add(Mask::y, vu.vf00, vu.vf03.x()); lq_buffer(Mask::xyzw, vu.vf17, vu.vi10 + -8); - // nop | itof0.xyzw vf15, vf15 802 - vu.vf15.itof0(Mask::xyzw, vu.vf15); - // sq.xyzw vf27, 989(vi00) | nop 803 - sq_buffer(Mask::xyzw, vu.vf27, 989); - // sq.xyzw vf12, 991(vi00) | itof0.xyzw vf16, vf16 804 - vu.vf16.itof0(Mask::xyzw, vu.vf16); sq_buffer(Mask::xyzw, vu.vf12, 991); - // sq.xyzw vf28, 992(vi00) | itof0.xyzw vf17, vf17 805 - vu.vf17.itof0(Mask::xyzw, vu.vf17); sq_buffer(Mask::xyzw, vu.vf28, 992); - // div Q, vf00.w, vf21.z | nop 806 - vu.Q = vu.vf00.w() / vu.vf21.z(); - // nop | mul.xyz vf13, vf13, Q 807 - vu.vf13.mul(Mask::xyz, vu.vf13, vu.Q); - // sq.xyzw vf29, 995(vi00) | nop 808 - sq_buffer(Mask::xyzw, vu.vf29, 995); - // move.z vf31, vf03 | nop 809 - vu.vf31.move(Mask::z, vu.vf03); - // sq.xyzw vf15, 990(vi00) | nop 810 - sq_buffer(Mask::xyzw, vu.vf15, 990); - // sq.xyzw vf16, 993(vi00) | nop 811 - sq_buffer(Mask::xyzw, vu.vf16, 993); - // sq.xyzw vf17, 996(vi00) | nop 812 - sq_buffer(Mask::xyzw, vu.vf17, 996); - // sq.xyzw vf13, 994(vi00) | nop 813 - sq_buffer(Mask::xyzw, vu.vf13, 994); - // sq.xyzw vf31, 961(vi00) | mul.xyz vf14, vf21, Q 814 - vu.vf14.mul(Mask::xyz, vu.vf21, vu.Q); sq_buffer(Mask::xyzw, vu.vf31, 961); - // nop | nop 815 - - // nop | nop 816 - - // BRANCH! - // bal vi15, L66 | nop 817 - ASSERT(false); - // sq.xyzw vf14, 997(vi00) | nop 818 - sq_buffer(Mask::xyzw, vu.vf14, 997); - if (bc) { goto L66; } - - // BRANCH! - // ibeq vi00, vi05, L62 | nop 819 - bc = (vu.vi05 == 0); - // nop | nop 820 - - if (bc) { goto L62; } - - // BRANCH! - // bal vi15, L63 | nop 821 - ASSERT(false); - // nop | nop 822 - - if (bc) { goto L63; } - - L62: - // ilw.x vi05, 1002(vi00) | nop 823 - ilw_buffer(Mask::x, vu.vi05, 1002); - // ilw.y vi06, 1002(vi00) | nop 824 - ilw_buffer(Mask::y, vu.vi06, 1002); - // ilw.z vi07, 1002(vi00) | nop 825 - ilw_buffer(Mask::z, vu.vi07, 1002); - // ilw.w vi08, 1002(vi00) | nop 826 - ilw_buffer(Mask::w, vu.vi08, 1002); - // ilw.x vi09, 1003(vi00) | nop 827 - ilw_buffer(Mask::x, vu.vi09, 1003); - // ilw.y vi10, 1003(vi00) | nop 828 - ilw_buffer(Mask::y, vu.vi10, 1003); - // ilw.z vi11, 1003(vi00) | nop 829 - ilw_buffer(Mask::z, vu.vi11, 1003); - // ilw.w vi12, 1003(vi00) | nop 830 - ilw_buffer(Mask::w, vu.vi12, 1003); - // lq.xyzw vf12, 1004(vi00) | nop 831 - lq_buffer(Mask::xyzw, vu.vf12, 1004); - // lq.xyzw vf13, 1005(vi00) | nop 832 - lq_buffer(Mask::xyzw, vu.vf13, 1005); - // lq.xyzw vf14, 1006(vi00) | nop 833 - lq_buffer(Mask::xyzw, vu.vf14, 1006); - // lq.xyzw vf15, 1007(vi00) | nop 834 - lq_buffer(Mask::xyzw, vu.vf15, 1007); - // BRANCH! - // b L60 | nop 835 - bc = true; - // lq.xyzw vf16, 1008(vi00) | nop 836 - lq_buffer(Mask::xyzw, vu.vf16, 1008); - if (bc) { goto L60; } - - L63: - // ilw.w vi01, 8(vi13) | nop 837 - ilw_buffer(Mask::w, vu.vi01, vu.vi13 + 8); - // ilw.y vi02, 1003(vi00) | nop 838 - ilw_buffer(Mask::y, vu.vi02, 1003); - // iaddi vi03, vi13, 0x7 | nop 839 - vu.vi03 = vu.vi13 + 7; - // BRANCH! - // ibltz vi01, L65 | nop 840 - bc = ((s16)vu.vi01) < 0; - // ilw.w vi04, 906(vi00) | nop 841 - ilw_buffer(Mask::w, vu.vi04, 906); - if (bc) { goto L65; } - - // iaddi vi02, vi02, -0xf | nop 842 - vu.vi02 = vu.vi02 + -15; - // isub vi02, vi02, vi04 | nop 843 - vu.vi02 = vu.vi02 - vu.vi04; - L64: - // ilw.w vi04, 5(vi03) | nop 844 - ilw_buffer(Mask::w, vu.vi04, vu.vi03 + 5); - // ilw.w vi01, 6(vi03) | nop 845 - ilw_buffer(Mask::w, vu.vi01, vu.vi03 + 6); - // nop | nop 846 - - // nop | nop 847 - - // isub vi04, vi02, vi04 | nop 848 - vu.vi04 = vu.vi02 - vu.vi04; - // nop | nop 849 - - // BRANCH! - // ibltz vi04, L65 | nop 850 - bc = ((s16)vu.vi04) < 0; - // nop | nop 851 - - if (bc) { goto L65; } - - // BRANCH! - // ibgtz vi01, L64 | nop 852 - bc = ((s16)vu.vi01) > 0; - // iaddi vi03, vi03, 0x5 | nop 853 - vu.vi03 = vu.vi03 + 5; - if (bc) { goto L64; } - - L65: - // iaddiu vi01, vi00, 0x3b9 | nop 854 - vu.vi01 = 0x3b9; /* 953 */ - // lq.xyzw vf12, 0(vi03) | nop 855 - lq_buffer(Mask::xyzw, vu.vf12, vu.vi03); - // lq.xyzw vf13, 1(vi03) | nop 856 - lq_buffer(Mask::xyzw, vu.vf13, vu.vi03 + 1); - // lq.xyzw vf14, 2(vi03) | nop 857 - lq_buffer(Mask::xyzw, vu.vf14, vu.vi03 + 2); - // lq.xyzw vf15, 3(vi03) | nop 858 - lq_buffer(Mask::xyzw, vu.vf15, vu.vi03 + 3); - // sq.xyzw vf02, 0(vi01) | nop 859 - sq_buffer(Mask::xyzw, vu.vf02, vu.vi01); - // sq.xyzw vf12, 1(vi01) | nop 860 - sq_buffer(Mask::xyzw, vu.vf12, vu.vi01 + 1); - // sq.xyzw vf13, 2(vi01) | nop 861 - sq_buffer(Mask::xyzw, vu.vf13, vu.vi01 + 2); - // sq.xyzw vf14, 3(vi01) | nop 862 - sq_buffer(Mask::xyzw, vu.vf14, vu.vi01 + 3); - // sq.xyzw vf15, 4(vi01) | nop 863 - sq_buffer(Mask::xyzw, vu.vf15, vu.vi01 + 4); - // lq.xyzw vf12, 4(vi03) | nop 864 - lq_buffer(Mask::xyzw, vu.vf12, vu.vi03 + 4); - // lq.xyzw vf13, 5(vi13) | nop 865 - lq_buffer(Mask::xyzw, vu.vf13, vu.vi13 + 5); - // lq.xyzw vf14, 6(vi13) | nop 866 - lq_buffer(Mask::xyzw, vu.vf14, vu.vi13 + 6); - // sq.xyzw vf12, 5(vi01) | nop 867 - sq_buffer(Mask::xyzw, vu.vf12, vu.vi01 + 5); - // sq.xyzw vf13, 6(vi01) | nop 868 - sq_buffer(Mask::xyzw, vu.vf13, vu.vi01 + 6); - // sq.xyzw vf14, 7(vi01) | nop 869 - sq_buffer(Mask::xyzw, vu.vf14, vu.vi01 + 7); - // xgkick vi01 | nop 870 - xgkick(vu.vi01, render_state, prof); - // lq.xyzw vf12, 905(vi00) | nop 871 - lq_buffer(Mask::xyzw, vu.vf12, 905); - // iaddiu vi01, vi00, 0x3f3 | nop 872 - vu.vi01 = 0x3f3; /* 1011 */ - // isubiu vi02, vi00, 0x7fff | nop 873 - vu.vi02 = -32767; - // sq.xyzw vf02, 0(vi01) | nop 874 - sq_buffer(Mask::xyzw, vu.vf02, vu.vi01); - // iswr.x vi02, vi01 | nop 875 - isw_buffer(Mask::x, vu.vi02, vu.vi01); - // sq.xyzw vf12, 1(vi01) | nop 876 - sq_buffer(Mask::xyzw, vu.vf12, vu.vi01 + 1); - // xgkick vi01 | nop 877 - xgkick(vu.vi01, render_state, prof); - L66: - // sq.xyzw vf00, 907(vi00) | nop 878 - sq_buffer(Mask::xyzw, vu.vf00, 907); - // sq.xyzw vf00, 914(vi00) | nop 879 - sq_buffer(Mask::xyzw, vu.vf00, 914); - // sq.xyzw vf00, 921(vi00) | nop 880 - sq_buffer(Mask::xyzw, vu.vf00, 921); - // sq.xyzw vf00, 928(vi00) | nop 881 - sq_buffer(Mask::xyzw, vu.vf00, 928); - // sq.xyzw vf00, 935(vi00) | nop 882 - sq_buffer(Mask::xyzw, vu.vf00, 935); - // sq.xyzw vf00, 942(vi00) | nop 883 - sq_buffer(Mask::xyzw, vu.vf00, 942); - // iaddiu vi01, vi00, 0x40f | nop 884 - vu.vi01 = 0x40f; /* 1039 */ - // isw.z vi01, 907(vi00) | nop 885 - isw_buffer(Mask::z, vu.vi01, 907); - // iaddiu vi01, vi00, 0x411 | nop 886 - vu.vi01 = 0x411; /* 1041 */ - // isw.z vi01, 914(vi00) | nop 887 - isw_buffer(Mask::z, vu.vi01, 914); - // iaddiu vi01, vi00, 0x413 | nop 888 - vu.vi01 = 0x413; /* 1043 */ - // isw.z vi01, 921(vi00) | nop 889 - isw_buffer(Mask::z, vu.vi01, 921); - // iaddiu vi01, vi00, 0x415 | nop 890 - vu.vi01 = 0x415; /* 1045 */ - // isw.z vi01, 928(vi00) | nop 891 - isw_buffer(Mask::z, vu.vi01, 928); - // iaddiu vi01, vi00, 0x417 | nop 892 - vu.vi01 = 0x417; /* 1047 */ - // isw.z vi01, 935(vi00) | nop 893 - isw_buffer(Mask::z, vu.vi01, 935); - // iaddiu vi01, vi00, 0x419 | nop 894 - vu.vi01 = 0x419; /* 1049 */ - // isw.z vi01, 942(vi00) | nop 895 - isw_buffer(Mask::z, vu.vi01, 942); - // iaddiu vi03, vi00, 0x3c2 | nop 896 - vu.vi03 = 0x3c2; /* 962 */ - // iaddiu vi04, vi00, 0x3c1 | nop 897 - vu.vi04 = 0x3c1; /* 961 */ - // mfir.x vf31, vi15 | nop 898 - vu.vf31.mfir(Mask::x, vu.vi15); - // iaddi vi05, vi00, 0x0 | nop 899 - vu.vi05 = 0; - // BRANCH! - // bal vi15, L67 | nop 900 - ASSERT(false); - // iaddiu vi07, vi00, 0x3dd | nop 901 - vu.vi07 = 0x3dd; /* 989 */ - if (bc) { goto L67; } - - // BRANCH! - // bal vi15, L67 | nop 902 - ASSERT(false); - // iaddiu vi07, vi00, 0x3e0 | nop 903 - vu.vi07 = 0x3e0; /* 992 */ - if (bc) { goto L67; } - - // BRANCH! - // bal vi15, L67 | nop 904 - ASSERT(false); - // iaddiu vi07, vi00, 0x3e3 | nop 905 - vu.vi07 = 0x3e3; /* 995 */ - if (bc) { goto L67; } - - // BRANCH! - // b L76 | nop 906 - bc = true; - // nop | nop 907 - - if (bc) { goto L76; } - - L67: - // iaddiu vi09, vi00, 0x38b | nop 908 - vu.vi09 = 0x38b; /* 907 */ - L68: - // iaddi vi10, vi00, 0x0 | nop 909 - vu.vi10 = 0; - L69: - // isubiu vi01, vi09, 0x3b5 | nop 910 - vu.vi01 = vu.vi09 - 0x3b5; /* 949 */ - // ilwr.y vi08, vi09 | nop 911 - ilw_buffer(Mask::y, vu.vi08, vu.vi09); - // BRANCH! - // ibgez vi01, L73 | nop 912 - bc = ((s16)vu.vi01) >= 0; - // ilwr.z vi06, vi09 | nop 913 - ilw_buffer(Mask::z, vu.vi06, vu.vi09); - if (bc) { goto L73; } - - // lq.xyzw vf24, 0(vi07) | nop 914 - lq_buffer(Mask::xyzw, vu.vf24, vu.vi07); - // lq.xyzw vf23, 0(vi08) | nop 915 - lq_buffer(Mask::xyzw, vu.vf23, vu.vi08); - // BRANCH! - // ibne vi00, vi08, L70 | nop 916 - bc = (vu.vi08 != 0); - // iswr.y vi07, vi09 | nop 917 - isw_buffer(Mask::y, vu.vi07, vu.vi09); - if (bc) { goto L70; } - - // jalr vi11, vi06 | nop 918 - ASSERT(false); - // iswr.x vi07, vi09 | nop 919 - isw_buffer(Mask::x, vu.vi07, vu.vi09); - // nop | nop 920 - - // nop | nop 921 - - // nop | nop 922 - - // fsand vi02, 0x2 | nop 923 - ASSERT(false); - // BRANCH! - // ibne vi00, vi02, L74 | nop 924 - bc = (vu.vi02 != 0); - // nop | nop 925 - - if (bc) { goto L74; } - - // BRANCH! - // b L69 | nop 926 - bc = true; - // iaddi vi09, vi09, 0x7 | nop 927 - vu.vi09 = vu.vi09 + 7; - if (bc) { goto L69; } - - L70: - // jalr vi11, vi06 | nop 928 - ASSERT(false); - // lq.xyzw vf15, 1(vi08) | nop 929 - lq_buffer(Mask::xyzw, vu.vf15, vu.vi08 + 1); - // lq.xyzw vf16, 1(vi07) | nop 930 - lq_buffer(Mask::xyzw, vu.vf16, vu.vi07 + 1); - // lq.xyzw vf12, 2(vi08) | nop 931 - lq_buffer(Mask::xyzw, vu.vf12, vu.vi08 + 2); - // fsand vi01, 0x2 | nop 932 ASSERT(false); - // fsand vi02, 0x2 | subw.w vf31, vf30, vf31 933 - vu.vf31.sub(Mask::w, vu.vf30, vu.vf31.w()); ASSERT(false); - // BRANCH! - // ibne vi00, vi01, L72 | nop 934 - bc = (vu.vi01 != 0); - // lq.xyzw vf13, 2(vi07) | nop 935 - lq_buffer(Mask::xyzw, vu.vf13, vu.vi07 + 2); - if (bc) { goto L72; } - - // BRANCH! - // ibne vi00, vi02, L71 | nop 936 - bc = (vu.vi02 != 0); - // div Q, vf30.w, vf31.w | nop 937 - vu.Q = vu.vf30.w() / vu.vf31.w(); - if (bc) { goto L71; } - // BRANCH! - // b L69 | nop 938 - bc = true; - // iaddi vi09, vi09, 0x7 | nop 939 - vu.vi09 = vu.vi09 + 7; - if (bc) { goto L69; } - L71: - // BRANCH! - // bal vi11, L81 | nop 940 - ASSERT(false); - // iaddi vi07, vi09, 0x1 | nop 941 - vu.vi07 = vu.vi09 + 1; - if (bc) { goto L81; } - - // sq.xyzw vf25, 1(vi09) | nop 942 - sq_buffer(Mask::xyzw, vu.vf25, vu.vi09 + 1); - // sq.xyzw vf17, 2(vi09) | nop 943 - sq_buffer(Mask::xyzw, vu.vf17, vu.vi09 + 2); - // sq.xyzw vf14, 3(vi09) | nop 944 - sq_buffer(Mask::xyzw, vu.vf14, vu.vi09 + 3); - // BRANCH! - // b L69 | nop 945 - bc = true; - // iaddi vi09, vi09, 0x7 | nop 946 - vu.vi09 = vu.vi09 + 7; - if (bc) { goto L69; } - - L72: - // BRANCH! - // ibne vi00, vi02, L74 | nop 947 - bc = (vu.vi02 != 0); - // div Q, vf30.w, vf31.w | nop 948 - vu.Q = vu.vf30.w() / vu.vf31.w(); - if (bc) { goto L74; } - - // BRANCH! - // bal vi11, L81 | nop 949 - ASSERT(false); - // nop | nop 950 - - if (bc) { goto L81; } - - // sq.xyzw vf25, 4(vi09) | nop 951 - sq_buffer(Mask::xyzw, vu.vf25, vu.vi09 + 4); - // sq.xyzw vf17, 5(vi09) | nop 952 - sq_buffer(Mask::xyzw, vu.vf17, vu.vi09 + 5); - // sq.xyzw vf14, 6(vi09) | nop 953 - sq_buffer(Mask::xyzw, vu.vf14, vu.vi09 + 6); - // iaddi vi09, vi09, 0x7 | nop 954 - vu.vi09 = vu.vi09 + 7; - // isw.x vi09, 949(vi10) | nop 955 - isw_buffer(Mask::x, vu.vi09, vu.vi10 + 949); - // isw.y vi07, 949(vi10) | nop 956 - isw_buffer(Mask::y, vu.vi07, vu.vi10 + 949); - // iaddi vi10, vi10, 0x1 | nop 957 - vu.vi10 = vu.vi10 + 1; - // BRANCH! - // b L69 | nop 958 - bc = true; - // iaddi vi07, vi09, -0x3 | nop 959 - vu.vi07 = vu.vi09 + -3; - if (bc) { goto L69; } - - L73: - // lq.xyzw vf23, 0(vi07) | nop 960 - lq_buffer(Mask::xyzw, vu.vf23, vu.vi07); - // lq.xyzw vf15, 1(vi07) | nop 961 - lq_buffer(Mask::xyzw, vu.vf15, vu.vi07 + 1); - // lq.xyzw vf12, 2(vi07) | nop 962 - lq_buffer(Mask::xyzw, vu.vf12, vu.vi07 + 2); - // iaddi vi05, vi05, 0x1 | nop 963 - vu.vi05 = vu.vi05 + 1; - // nop | nop 964 - - // div Q, vf00.w, vf23.w | nop 965 - vu.Q = vu.vf00.w() / vu.vf23.w(); - // nop | ftoi0.xyzw vf15, vf15 966 - vu.vf15.ftoi0(Mask::xyzw, vu.vf15); - // nop | mul.xyzw vf23, vf23, vf06 967 - vu.vf23.mul(Mask::xyzw, vu.vf23, vu.vf06); - // iaddi vi03, vi03, 0x3 | nop 968 - vu.vi03 = vu.vi03 + 3; - // waitq | nop 969 - ASSERT(false); - // sq.xyzw vf15, -2(vi03) | mul.xyz vf23, vf23, Q 970 - vu.vf23.mul(Mask::xyz, vu.vf23, vu.Q); sq_buffer(Mask::xyzw, vu.vf15, vu.vi03 + -2); - // nop | mul.xyz vf12, vf12, Q 971 - vu.vf12.mul(Mask::xyz, vu.vf12, vu.Q); - // nop | add.xyzw vf23, vf23, vf04 972 - vu.vf23.add(Mask::xyzw, vu.vf23, vu.vf04); - // nop | maxy.w vf23, vf23, vf01 973 - vu.vf23.max(Mask::w, vu.vf23, vu.vf01.y()); - // nop | miniz.w vf23, vf23, vf01 974 - vu.vf23.mini(Mask::w, vu.vf23, vu.vf01.z()); - // nop | ftoi4.xyzw vf23, vf23 975 - vu.vf23.ftoi4(Mask::xyzw, vu.vf23); - // sq.xyzw vf12, -3(vi03) | nop 976 - sq_buffer(Mask::xyzw, vu.vf12, vu.vi03 + -3); - // sq.xyzw vf23, -1(vi03) | nop 977 - sq_buffer(Mask::xyzw, vu.vf23, vu.vi03 + -1); - L74: - // BRANCH! - // iblez vi10, L75 | nop 978 - bc = ((s16)vu.vi10) <= 0; - // nop | nop 979 - - if (bc) { goto L75; } - - // ilw.x vi09, 948(vi10) | nop 980 - ilw_buffer(Mask::x, vu.vi09, vu.vi10 + 948); - // ilw.y vi07, 948(vi10) | nop 981 - ilw_buffer(Mask::y, vu.vi07, vu.vi10 + 948); - // BRANCH! - // b L69 | nop 982 - bc = true; - // iaddi vi10, vi10, -0x1 | nop 983 - vu.vi10 = vu.vi10 + -1; - if (bc) { goto L69; } - - L75: - // jr vi15 | nop 984 - ASSERT(false); - // nop | nop 985 - - L76: - // iaddiu vi09, vi00, 0x38b | nop 986 - vu.vi09 = 0x38b; /* 907 */ - L77: - // ilwr.x vi08, vi09 | nop 987 - ilw_buffer(Mask::x, vu.vi08, vu.vi09); - // ilwr.y vi07, vi09 | nop 988 - ilw_buffer(Mask::y, vu.vi07, vu.vi09); - // ilwr.z vi06, vi09 | nop 989 - ilw_buffer(Mask::z, vu.vi06, vu.vi09); - // nop | nop 990 - - // BRANCH! - // ibeq vi00, vi08, L79 | nop 991 - bc = (vu.vi08 == 0); - // lq.xyzw vf23, 0(vi07) | nop 992 - lq_buffer(Mask::xyzw, vu.vf23, vu.vi07); - if (bc) { goto L79; } - - // BRANCH! - // ibeq vi07, vi08, L79 | nop 993 - bc = (vu.vi07 == vu.vi08); - // lq.xyzw vf24, 0(vi08) | nop 994 - lq_buffer(Mask::xyzw, vu.vf24, vu.vi08); - if (bc) { goto L79; } - - // jalr vi11, vi06 | nop 995 - ASSERT(false); - // lq.xyzw vf15, 1(vi07) | nop 996 - lq_buffer(Mask::xyzw, vu.vf15, vu.vi07 + 1); - // lq.xyzw vf16, 1(vi08) | nop 997 - lq_buffer(Mask::xyzw, vu.vf16, vu.vi08 + 1); - // lq.xyzw vf12, 2(vi07) | nop 998 - lq_buffer(Mask::xyzw, vu.vf12, vu.vi07 + 2); - // fsand vi01, 0x2 | nop 999 - ASSERT(false); - // fsand vi02, 0x2 | subw.w vf31, vf30, vf31 1000 - vu.vf31.sub(Mask::w, vu.vf30, vu.vf31.w()); ASSERT(false); - // BRANCH! - // ibeq vi02, vi01, L79 | nop 1001 - bc = (vu.vi02 == vu.vi01); - // lq.xyzw vf13, 2(vi08) | nop 1002 - lq_buffer(Mask::xyzw, vu.vf13, vu.vi08 + 2); - if (bc) { goto L79; } - - // BRANCH! - // ibeq vi00, vi01, L78 | nop 1003 - bc = (vu.vi01 == 0); - // div Q, vf30.w, vf31.w | nop 1004 - vu.Q = vu.vf30.w() / vu.vf31.w(); - if (bc) { goto L78; } - - // BRANCH! - // bal vi11, L81 | nop 1005 - ASSERT(false); - // nop | nop 1006 - - if (bc) { goto L81; } - - // sq.xyzw vf25, 4(vi09) | nop 1007 - sq_buffer(Mask::xyzw, vu.vf25, vu.vi09 + 4); - // sq.xyzw vf17, 5(vi09) | nop 1008 - sq_buffer(Mask::xyzw, vu.vf17, vu.vi09 + 5); - // sq.xyzw vf14, 6(vi09) | nop 1009 - sq_buffer(Mask::xyzw, vu.vf14, vu.vi09 + 6); - // iaddi vi07, vi09, 0x4 | nop 1010 - vu.vi07 = vu.vi09 + 4; - // ior vi12, vi09, vi00 | nop 1011 - vu.vi12 = vu.vi09; - // BRANCH! - // bal vi15, L68 | nop 1012 - ASSERT(false); - // iaddi vi09, vi09, 0x7 | nop 1013 - vu.vi09 = vu.vi09 + 7; - if (bc) { goto L68; } - - // BRANCH! - // b L79 | nop 1014 - bc = true; - // ior vi09, vi12, vi00 | nop 1015 - vu.vi09 = vu.vi12; - if (bc) { goto L79; } - - L78: - // BRANCH! - // bal vi11, L81 | nop 1016 - ASSERT(false); - // nop | nop 1017 - - if (bc) { goto L81; } - - // sq.xyzw vf25, 1(vi09) | nop 1018 - sq_buffer(Mask::xyzw, vu.vf25, vu.vi09 + 1); - // sq.xyzw vf17, 2(vi09) | nop 1019 - sq_buffer(Mask::xyzw, vu.vf17, vu.vi09 + 2); - // sq.xyzw vf14, 3(vi09) | nop 1020 - sq_buffer(Mask::xyzw, vu.vf14, vu.vi09 + 3); - // iaddi vi07, vi09, 0x1 | nop 1021 - vu.vi07 = vu.vi09 + 1; - // ior vi12, vi09, vi00 | nop 1022 - vu.vi12 = vu.vi09; - // BRANCH! - // bal vi15, L68 | nop 1023 - ASSERT(false); - // iaddi vi09, vi09, 0x7 | nop 1024 - vu.vi09 = vu.vi09 + 7; - if (bc) { goto L68; } - - // ior vi09, vi12, vi00 | nop 1025 - vu.vi09 = vu.vi12; - L79: - // isubiu vi01, vi09, 0x3ae | nop 1026 - vu.vi01 = vu.vi09 - 0x3ae; /* 942 */ - // iswr.x vi00, vi09 | nop 1027 - isw_buffer(Mask::x, vu.vi00, vu.vi09); - // iswr.y vi00, vi09 | nop 1028 - isw_buffer(Mask::y, vu.vi00, vu.vi09); - // BRANCH! - // ibltz vi01, L77 | nop 1029 - bc = ((s16)vu.vi01) < 0; - // iaddi vi09, vi09, 0x7 | nop 1030 - vu.vi09 = vu.vi09 + 7; - if (bc) { goto L77; } - - // BRANCH! - // ibeq vi00, vi05, L80 | nop 1031 - bc = (vu.vi05 == 0); - // mtir vi15, vf31.x | nop 1032 - vu.vi15 = vu.vf31.x_as_u16(); - if (bc) { goto L80; } - - // iaddiu vi05, vi05, 0x4000 | nop 1033 - vu.vi05 = vu.vi05 + 0x4000; /* 16384 */ - // iaddiu vi05, vi05, 0x4000 | nop 1034 - vu.vi05 = vu.vi05 + 0x4000; /* 16384 */ - // iswr.x vi05, vi04 | nop 1035 - isw_buffer(Mask::x, vu.vi05, vu.vi04); - L80: - // nop | nop 1036 - - // jr vi15 | nop 1037 - ASSERT(false); - // nop | nop 1038 - - // jr vi11 | addx.w vf30, vf23, vf23 1039 - vu.vf30.add(Mask::w, vu.vf23, vu.vf23.x()); ASSERT(false); - // nop | addx.w vf31, vf24, vf24 1040 - vu.vf31.add(Mask::w, vu.vf24, vu.vf24.x()); - // jr vi11 | subx.w vf30, vf23, vf23 1041 - vu.vf30.sub(Mask::w, vu.vf23, vu.vf23.x()); ASSERT(false); - // nop | subx.w vf31, vf24, vf24 1042 - vu.vf31.sub(Mask::w, vu.vf24, vu.vf24.x()); - // jr vi11 | addy.w vf30, vf23, vf23 1043 - vu.vf30.add(Mask::w, vu.vf23, vu.vf23.y()); ASSERT(false); - // nop | addy.w vf31, vf24, vf24 1044 - vu.vf31.add(Mask::w, vu.vf24, vu.vf24.y()); - // jr vi11 | suby.w vf30, vf23, vf23 1045 - vu.vf30.sub(Mask::w, vu.vf23, vu.vf23.y()); ASSERT(false); - // nop | suby.w vf31, vf24, vf24 1046 - vu.vf31.sub(Mask::w, vu.vf24, vu.vf24.y()); - // jr vi11 | addz.w vf30, vf23, vf23 1047 - vu.vf30.add(Mask::w, vu.vf23, vu.vf23.z()); ASSERT(false); - // nop | addz.w vf31, vf24, vf24 1048 - vu.vf31.add(Mask::w, vu.vf24, vu.vf24.z()); - // jr vi11 | subz.w vf30, vf23, vf23 1049 - vu.vf30.sub(Mask::w, vu.vf23, vu.vf23.z()); ASSERT(false); - // nop | subz.w vf31, vf24, vf24 1050 - vu.vf31.sub(Mask::w, vu.vf24, vu.vf24.z()); - L81: - // nop | sub.xyzw vf25, vf24, vf23 1051 - vu.vf25.sub(Mask::xyzw, vu.vf24, vu.vf23); - // nop | sub.xyzw vf17, vf16, vf15 1052 - vu.vf17.sub(Mask::xyzw, vu.vf16, vu.vf15); - // nop | sub.xyzw vf14, vf13, vf12 1053 - vu.vf14.sub(Mask::xyzw, vu.vf13, vu.vf12); - // waitq | mul.xyzw vf25, vf25, Q 1054 - vu.vf25.mul(Mask::xyzw, vu.vf25, vu.Q); ASSERT(false); - // nop | mul.xyzw vf17, vf17, Q 1055 - vu.vf17.mul(Mask::xyzw, vu.vf17, vu.Q); - // nop | mul.xyzw vf14, vf14, Q 1056 - vu.vf14.mul(Mask::xyzw, vu.vf14, vu.Q); - // nop | add.xyzw vf25, vf23, vf25 1057 - vu.vf25.add(Mask::xyzw, vu.vf23, vu.vf25); - // jr vi11 | add.xyzw vf17, vf15, vf17 1058 - vu.vf17.add(Mask::xyzw, vu.vf15, vu.vf17); ASSERT(false); - // nop | add.xyzw vf14, vf12, vf14 1059 - vu.vf14.add(Mask::xyzw, vu.vf12, vu.vf14); - L82: + L82: // R // iaddi vi14, vi13, 0x7 | nop 1060 vu.vi14 = vu.vi13 + 7; // lq.xyzw vf03, 4(vi13) | nop 1061 @@ -2793,7 +1097,7 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // iadd vi06, vi03, vi12 | nop 1072 vu.vi06 = vu.vi03 + vu.vi12; // sqi.xyzw vf02, vi06 | nop 1073 - sq_buffer(Mask::xyzw, vu.vf02, vu.vi06++); + sq_buffer(Mask::xyzw, gen.adgif_tmpl, vu.vi06++); // sqi.xyzw vf16, vi06 | nop 1074 sq_buffer(Mask::xyzw, vu.vf16, vu.vi06++); // sqi.xyzw vf17, vi06 | nop 1075 diff --git a/game/graphics/opengl_renderer/GenericRenderer.cpp b/game/graphics/opengl_renderer/GenericRenderer.cpp index 5146d35c12..8b687e6280 100644 --- a/game/graphics/opengl_renderer/GenericRenderer.cpp +++ b/game/graphics/opengl_renderer/GenericRenderer.cpp @@ -4,7 +4,7 @@ GenericRenderer::GenericRenderer(const std::string& name, BucketId my_id) : BucketRenderer(name, my_id), m_direct(name, my_id, 0x30000), - m_direct2(30000, 60000, 1000, name) {} + m_direct2(30000, 60000, 1000, name, true) {} void GenericRenderer::init_shaders(ShaderLibrary& shaders) { m_direct2.init_shaders(shaders); diff --git a/game/graphics/opengl_renderer/GenericRenderer.h b/game/graphics/opengl_renderer/GenericRenderer.h index aad1695837..ec7ae0e8cc 100644 --- a/game/graphics/opengl_renderer/GenericRenderer.h +++ b/game/graphics/opengl_renderer/GenericRenderer.h @@ -47,7 +47,7 @@ class GenericRenderer : public BucketRenderer { struct Vu { u32 row[4]; u32 stcycl = 0; - Vf vf01, vf02, vf03, vf04, vf05, vf06, vf07, vf08, vf09, vf10, vf11, vf12, vf13, vf14, vf15, + Vf vf03, vf10, vf11, vf12, vf13, vf14, vf15, vf16, vf17, vf18, vf19, vf20, vf21, vf22, vf23, vf24, vf25, vf26, vf27, vf28, vf29, vf30, vf31; const Vf vf00; @@ -59,6 +59,16 @@ class GenericRenderer : public BucketRenderer { Vu() : vf00(0, 0, 0, 1) {} } vu; + struct { + Vf fog; + Vf adgif_tmpl; + Vf hvdf_off; + Vf hmge_scale; + Vf guard; + + Vf mat0, mat1; + } gen; + struct alignas(16) BufferMemory { u8 data[1024 * 16]; } m_buffer; diff --git a/game/graphics/opengl_renderer/MercRenderer.cpp b/game/graphics/opengl_renderer/MercRenderer.cpp index a2731796c0..ce52d2895c 100644 --- a/game/graphics/opengl_renderer/MercRenderer.cpp +++ b/game/graphics/opengl_renderer/MercRenderer.cpp @@ -5,7 +5,7 @@ MercRenderer::MercRenderer(const std::string& name, BucketId my_id) : BucketRenderer(name, my_id), m_direct(fmt::format("{}-dir", name), my_id, 0x30000), - m_direct2(20000, 40000, 1000, name) { + m_direct2(20000, 40000, 1000, name, false) { memset(m_buffer.data, 0, sizeof(m_buffer.data)); } From 78da75f53e388d9b9c4d979e1cb02c09e40fb3e1 Mon Sep 17 00:00:00 2001 From: water Date: Fri, 4 Mar 2022 21:39:20 -0500 Subject: [PATCH 02/12] name the obvious ones --- .../opengl_renderer/GenericProgram.cpp | 370 +++++++++--------- .../opengl_renderer/GenericRenderer.h | 11 +- 2 files changed, 193 insertions(+), 188 deletions(-) diff --git a/game/graphics/opengl_renderer/GenericProgram.cpp b/game/graphics/opengl_renderer/GenericProgram.cpp index 347042fb02..8b9b0d24d6 100644 --- a/game/graphics/opengl_renderer/GenericProgram.cpp +++ b/game/graphics/opengl_renderer/GenericProgram.cpp @@ -200,32 +200,32 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // lq.xyzw vf09, 1(vi13) | subw.w vf20, vf00, vf00 119 vu.vf20.sub(Mask::w, vu.vf00, vu.vf00.w()); lq_buffer(Mask::xyzw, gen.mat1, vu.vi13 + 1); // lq.xyzw vf10, 2(vi13) | subw.w vf21, vf00, vf00 120 - vu.vf21.sub(Mask::w, vu.vf00, vu.vf00.w()); lq_buffer(Mask::xyzw, vu.vf10, vu.vi13 + 2); + vu.vf21.sub(Mask::w, vu.vf00, vu.vf00.w()); lq_buffer(Mask::xyzw, gen.mat2, vu.vi13 + 2); // lq.xyzw vf11, 3(vi13) | ftoi12.z vf22, vf22 121 // fmt::print("a: [{}] [{}]\n", vu.vf22.print(), vu.vf23.print()); - vu.vf22.ftoi12_check(Mask::z, vu.vf22); lq_buffer(Mask::xyzw, vu.vf11, vu.vi13 + 3); + vu.vf22.ftoi12_check(Mask::z, vu.vf22); lq_buffer(Mask::xyzw, gen.mat3, vu.vi13 + 3); // iadd vi02, vi01, vi01 | ftoi12.z vf23, vf23 122 vu.vf23.ftoi12_check(Mask::z, vu.vf23); vu.vi02 = vu.vi01 + vu.vi01; // iadd vi01, vi01, vi02 | sub.xyzw vf16, vf16, vf16 123 - vu.vf16.set_zero(); vu.vi01 = vu.vi01 + vu.vi02; + gen.vtx_load0.set_zero(); vu.vi01 = vu.vi01 + vu.vi02; // iaddi vi11, vi00, -0x2 | sub.xyzw vf17, vf17, vf17 124 - vu.vf17.set_zero(); vu.vi11 = -2; + gen.vtx_load1.set_zero(); vu.vi11 = -2; // lq.xy vf22, 0(vi10) | nop 125 lq_buffer(Mask::xy, vu.vf22, vu.vi10); // lq.xyz vf16, 2(vi10) | nop 126 - lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); + lq_buffer(Mask::xyz, gen.vtx_load0, vu.vi10 + 2); // mtir vi02, vf22.x | mulaw.xyzw ACC, vf11, vf00 127 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.vi02 = vu.vf22.x_as_u16(); + vu.acc.mula(Mask::xyzw, gen.mat3, vu.vf00.w()); vu.vi02 = vu.vf22.x_as_u16(); // iaddi vi10, vi10, 0x3 | maddax.xyzw ACC, vf08, vf16 128 - vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf16.x()); vu.vi10 = vu.vi10 + 3; + vu.acc.madda(Mask::xyzw, gen.mat0, gen.vtx_load0.x()); vu.vi10 = vu.vi10 + 3; // lq.xy vf23, 0(vi10) | madday.xyzw ACC, vf09, vf16 129 - vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf16.y()); lq_buffer(Mask::xy, vu.vf23, vu.vi10); + vu.acc.madda(Mask::xyzw, gen.mat1, gen.vtx_load0.y()); lq_buffer(Mask::xy, vu.vf23, vu.vi10); // lq.xyz vf17, 2(vi10) | nop 130 - lq_buffer(Mask::xyz, vu.vf17, vu.vi10 + 2); + lq_buffer(Mask::xyz, gen.vtx_load1, vu.vi10 + 2); // iand vi06, vi02, vi11 | nop 131 vu.vi06 = vu.vi02 & vu.vi11; // mfir.x vf22, vi06 | maddz.xyzw vf12, vf10, vf16 132 - vu.acc.madd(Mask::xyzw, vu.vf12, vu.vf10, vu.vf16.z()); vu.vf22.mfir(Mask::x, vu.vi06); + vu.acc.madd(Mask::xyzw, gen.vtx_p0, gen.mat2, gen.vtx_load0.z()); vu.vf22.mfir(Mask::x, vu.vi06); // iadd vi14, vi10, vi01 | ftoi12.z vf24, vf24 133 // fmt::print("b: [{}] [{}]\n", vu.vf24.print(), vu.vf25.print()); vu.vf24.ftoi12_check(Mask::z, vu.vf24); vu.vi14 = vu.vi10 + vu.vi01; @@ -234,178 +234,178 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // nop | nop 135 // div Q, vf01.x, vf12.w | itof12.xyz vf18, vf22 136 - vu.vf18.itof12(Mask::xyz, vu.vf22); vu.Q = gen.fog.x() / vu.vf12.w(); + vu.vf18.itof12(Mask::xyz, vu.vf22); vu.Q = gen.fog.x() / gen.vtx_p0.w(); // mtir vi03, vf23.x | mulaw.xyzw ACC, vf11, vf00 137 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.vi03 = vu.vf23.x_as_u16(); + vu.acc.mula(Mask::xyzw, gen.mat3, vu.vf00.w()); vu.vi03 = vu.vf23.x_as_u16(); // iaddi vi10, vi10, 0x3 | maddax.xyzw ACC, vf08, vf17 138 - vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf17.x()); vu.vi10 = vu.vi10 + 3; + vu.acc.madda(Mask::xyzw, gen.mat0, gen.vtx_load1.x()); vu.vi10 = vu.vi10 + 3; // lq.xy vf24, 0(vi10) | madday.xyzw ACC, vf09, vf17 139 - vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf17.y()); lq_buffer(Mask::xy, vu.vf24, vu.vi10); + vu.acc.madda(Mask::xyzw, gen.mat1, gen.vtx_load1.y()); lq_buffer(Mask::xy, vu.vf24, vu.vi10); // lq.xyz vf16, 2(vi10) | nop 140 - lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); + lq_buffer(Mask::xyz, gen.vtx_load0, vu.vi10 + 2); // iand vi07, vi03, vi11 | nop 141 vu.vi07 = vu.vi03 & vu.vi11; // mfir.x vf23, vi07 | maddz.xyzw vf13, vf10, vf17 142 - vu.acc.madd(Mask::xyzw, vu.vf13, vu.vf10, vu.vf17.z()); vu.vf23.mfir(Mask::x, vu.vi07); + vu.acc.madd(Mask::xyzw, gen.vtx_p1, gen.mat2, gen.vtx_load1.z()); vu.vf23.mfir(Mask::x, vu.vi07); // nop | mul.xyz vf12, vf12, Q 143 - vu.vf12.mul(Mask::xyz, vu.vf12, vu.Q); + gen.vtx_p0.mul(Mask::xyz, gen.vtx_p0, vu.Q); // nop | mul.xyz vf18, vf18, Q 144 vu.vf18.mul(Mask::xyz, vu.vf18, vu.Q); // nop | nop 145 // div Q, vf01.x, vf13.w | itof12.xyz vf19, vf23 146 - vu.vf19.itof12(Mask::xyz, vu.vf23); vu.Q = gen.fog.x() / vu.vf13.w(); + vu.vf19.itof12(Mask::xyz, vu.vf23); vu.Q = gen.fog.x() / gen.vtx_p1.w(); // nop | add.xyzw vf12, vf12, vf04 147 - vu.vf12.add(Mask::xyzw, vu.vf12, gen.hvdf_off); + gen.vtx_p0.add(Mask::xyzw, gen.vtx_p0, gen.hvdf_off); // mtir vi04, vf24.x | mulaw.xyzw ACC, vf11, vf00 148 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.vi04 = vu.vf24.x_as_u16(); + vu.acc.mula(Mask::xyzw, gen.mat3, vu.vf00.w()); vu.vi04 = vu.vf24.x_as_u16(); // iaddi vi10, vi10, 0x3 | maddax.xyzw ACC, vf08, vf16 149 - vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf16.x()); vu.vi10 = vu.vi10 + 3; + vu.acc.madda(Mask::xyzw, gen.mat0, gen.vtx_load0.x()); vu.vi10 = vu.vi10 + 3; // lq.xy vf25, 0(vi10) | madday.xyzw ACC, vf09, vf16 150 - vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf16.y()); lq_buffer(Mask::xy, vu.vf25, vu.vi10); + vu.acc.madda(Mask::xyzw, gen.mat1, gen.vtx_load0.y()); lq_buffer(Mask::xy, vu.vf25, vu.vi10); // lq.xyz vf17, 2(vi10) | miniz.w vf12, vf12, vf01 151 - vu.vf12.mini(Mask::w, vu.vf12, gen.fog.z()); lq_buffer(Mask::xyz, vu.vf17, vu.vi10 + 2); + gen.vtx_p0.mini(Mask::w, gen.vtx_p0, gen.fog.z()); lq_buffer(Mask::xyz, gen.vtx_load1, vu.vi10 + 2); // iand vi08, vi04, vi11 | nop 152 vu.vi08 = vu.vi04 & vu.vi11; // mfir.x vf24, vi08 | maddz.xyzw vf14, vf10, vf16 153 - vu.acc.madd(Mask::xyzw, vu.vf14, vu.vf10, vu.vf16.z()); vu.vf24.mfir(Mask::x, vu.vi08); + vu.acc.madd(Mask::xyzw, gen.vtx_p2, gen.mat2, gen.vtx_load0.z()); vu.vf24.mfir(Mask::x, vu.vi08); // nop | mul.xyz vf13, vf13, Q 154 - vu.vf13.mul(Mask::xyz, vu.vf13, vu.Q); + gen.vtx_p1.mul(Mask::xyz, gen.vtx_p1, vu.Q); // nop | mul.xyz vf19, vf19, Q 155 vu.vf19.mul(Mask::xyz, vu.vf19, vu.Q); // iaddi vi14, vi14, 0x9 | maxy.w vf12, vf12, vf01 156 - vu.vf12.max(Mask::w, vu.vf12, gen.fog.y()); vu.vi14 = vu.vi14 + 9; - // fmt::print("vf12-1a: [{}]\n", vu.vf12.print()); + gen.vtx_p0.max(Mask::w, gen.vtx_p0, gen.fog.y()); vu.vi14 = vu.vi14 + 9; + // fmt::print("vf12-1a: [{}]\n", gen.vtx_p0.print()); L10: // R - // fmt::print("vf12-1b: [{}]\n", vu.vf12.print()); + // fmt::print("vf12-1b: [{}]\n", gen.vtx_p0.print()); // div Q, vf01.x, vf14.w | itof12.xyz vf20, vf24 157 - vu.vf20.itof12(Mask::xyz, vu.vf24); vu.Q = gen.fog.x() / vu.vf14.w(); + vu.vf20.itof12(Mask::xyz, vu.vf24); vu.Q = gen.fog.x() / gen.vtx_p2.w(); // BRANCH! // ibeq vi02, vi06, L11 | add.xyzw vf13, vf13, vf04 158 - vu.vf13.add(Mask::xyzw, vu.vf13, gen.hvdf_off); bc = (vu.vi02 == vu.vi06); + gen.vtx_p1.add(Mask::xyzw, gen.vtx_p1, gen.hvdf_off); bc = (vu.vi02 == vu.vi06); // mtir vi05, vf25.x | mulaw.xyzw ACC, vf11, vf00 159 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.vi05 = vu.vf25.x_as_u16(); + vu.acc.mula(Mask::xyzw, gen.mat3, vu.vf00.w()); vu.vi05 = vu.vf25.x_as_u16(); if (bc) { goto L11; } // nop | addw.w vf12, vf12, vf01 160 - vu.vf12.add(Mask::w, vu.vf12, kFogFloatOffset); + gen.vtx_p0.add(Mask::w, gen.vtx_p0, kFogFloatOffset); L11: // R // iaddi vi10, vi10, 0x3 | maddax.xyzw ACC, vf08, vf17 161 - vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf17.x()); vu.vi10 = vu.vi10 + 3; + vu.acc.madda(Mask::xyzw, gen.mat0, gen.vtx_load1.x()); vu.vi10 = vu.vi10 + 3; // lq.xy vf22, 0(vi10) | madday.xyzw ACC, vf09, vf17 162 - vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf17.y()); lq_buffer(Mask::xy, vu.vf22, vu.vi10); + vu.acc.madda(Mask::xyzw, gen.mat1, gen.vtx_load1.y()); lq_buffer(Mask::xy, vu.vf22, vu.vi10); // lq.xyz vf16, 2(vi10) | miniz.w vf13, vf13, vf01 163 - vu.vf13.mini(Mask::w, vu.vf13, gen.fog.z()); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); - // fmt::print("vf16 vertex [{}] @ \n", vu.vf16.print(), vu.vi10 + 2); + gen.vtx_p1.mini(Mask::w, gen.vtx_p1, gen.fog.z()); lq_buffer(Mask::xyz, gen.vtx_load0, vu.vi10 + 2); + // fmt::print("vf16 vertex [{}] @ \n", gen.vtx_load0.print(), vu.vi10 + 2); // iand vi09, vi05, vi11 | ftoi4.xyzw vf12, vf12 164 - vu.vf12.ftoi4_check(Mask::xyzw, vu.vf12); vu.vi09 = vu.vi05 & vu.vi11; + gen.vtx_p0.ftoi4_check(Mask::xyzw, gen.vtx_p0); vu.vi09 = vu.vi05 & vu.vi11; // mfir.x vf25, vi09 | maddz.xyzw vf15, vf10, vf17 165 - vu.acc.madd(Mask::xyzw, vu.vf15, vu.vf10, vu.vf17.z()); vu.vf25.mfir(Mask::x, vu.vi09); + vu.acc.madd(Mask::xyzw, gen.vtx_p3, gen.mat2, gen.vtx_load1.z()); vu.vf25.mfir(Mask::x, vu.vi09); // sq.xyzw vf18, -12(vi10) | mul.xyz vf14, vf14, Q 166 - vu.vf14.mul(Mask::xyz, vu.vf14, vu.Q); sq_buffer(Mask::xyzw, vu.vf18, vu.vi10 + -12); + gen.vtx_p2.mul(Mask::xyz, gen.vtx_p2, vu.Q); sq_buffer(Mask::xyzw, vu.vf18, vu.vi10 + -12); // BRANCH! // ibeq vi14, vi10, L15 | mul.xyz vf20, vf20, Q 167 vu.vf20.mul(Mask::xyz, vu.vf20, vu.Q); bc = (vu.vi14 == vu.vi10); - // fmt::print("store: {} {}\n", vu.vi10 - 10, vu.vf12.print_hex()); + // fmt::print("store: {} {}\n", vu.vi10 - 10, gen.vtx_p0.print_hex()); // sq.xyzw vf12, -10(vi10) | maxy.w vf13, vf13, vf01 168 - vu.vf13.max(Mask::w, vu.vf13, gen.fog.y()); sq_buffer(Mask::xyzw, vu.vf12, vu.vi10 + -10); + gen.vtx_p1.max(Mask::w, gen.vtx_p1, gen.fog.y()); sq_buffer(Mask::xyzw, gen.vtx_p0, vu.vi10 + -10); if (bc) { goto L15; } // div Q, vf01.x, vf15.w | itof12.xyz vf21, vf25 169 - vu.vf21.itof12(Mask::xyz, vu.vf25); vu.Q = gen.fog.x() / vu.vf15.w(); + vu.vf21.itof12(Mask::xyz, vu.vf25); vu.Q = gen.fog.x() / gen.vtx_p3.w(); // BRANCH! // ibeq vi03, vi07, L12 | add.xyzw vf14, vf14, vf04 170 - vu.vf14.add(Mask::xyzw, vu.vf14, gen.hvdf_off); bc = (vu.vi03 == vu.vi07); + gen.vtx_p2.add(Mask::xyzw, gen.vtx_p2, gen.hvdf_off); bc = (vu.vi03 == vu.vi07); // mtir vi02, vf22.x | mulaw.xyzw ACC, vf11, vf00 171 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.vi02 = vu.vf22.x_as_u16(); + vu.acc.mula(Mask::xyzw, gen.mat3, vu.vf00.w()); vu.vi02 = vu.vf22.x_as_u16(); if (bc) { goto L12; } // nop | addw.w vf13, vf13, vf01 172 - vu.vf13.add(Mask::w, vu.vf13, kFogFloatOffset); + gen.vtx_p1.add(Mask::w, gen.vtx_p1, kFogFloatOffset); L12: // R // iaddi vi10, vi10, 0x3 | maddax.xyzw ACC, vf08, vf16 173 - vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf16.x()); vu.vi10 = vu.vi10 + 3; + vu.acc.madda(Mask::xyzw, gen.mat0, gen.vtx_load0.x()); vu.vi10 = vu.vi10 + 3; // lq.xy vf23, 0(vi10) | madday.xyzw ACC, vf09, vf16 174 - vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf16.y()); lq_buffer(Mask::xy, vu.vf23, vu.vi10); + vu.acc.madda(Mask::xyzw, gen.mat1, gen.vtx_load0.y()); lq_buffer(Mask::xy, vu.vf23, vu.vi10); // lq.xyz vf17, 2(vi10) | miniz.w vf14, vf14, vf01 175 - vu.vf14.mini(Mask::w, vu.vf14, gen.fog.z()); lq_buffer(Mask::xyz, vu.vf17, vu.vi10 + 2); + gen.vtx_p2.mini(Mask::w, gen.vtx_p2, gen.fog.z()); lq_buffer(Mask::xyz, gen.vtx_load1, vu.vi10 + 2); // iand vi06, vi02, vi11 | ftoi4.xyzw vf13, vf13 176 - vu.vf13.ftoi4_check(Mask::xyzw, vu.vf13); vu.vi06 = vu.vi02 & vu.vi11; + gen.vtx_p1.ftoi4_check(Mask::xyzw, gen.vtx_p1); vu.vi06 = vu.vi02 & vu.vi11; // mfir.x vf22, vi06 | maddz.xyzw vf12, vf10, vf16 177 - vu.acc.madd(Mask::xyzw, vu.vf12, vu.vf10, vu.vf16.z()); vu.vf22.mfir(Mask::x, vu.vi06); - // fmt::print("vf12 transformed: [{}]\n", vu.vf12.print()); + vu.acc.madd(Mask::xyzw, gen.vtx_p0, gen.mat2, gen.vtx_load0.z()); vu.vf22.mfir(Mask::x, vu.vi06); + // fmt::print("vf12 transformed: [{}]\n", gen.vtx_p0.print()); // sq.xyzw vf19, -12(vi10) | mul.xyz vf15, vf15, Q 178 - vu.vf15.mul(Mask::xyz, vu.vf15, vu.Q); sq_buffer(Mask::xyzw, vu.vf19, vu.vi10 + -12); + gen.vtx_p3.mul(Mask::xyz, gen.vtx_p3, vu.Q); sq_buffer(Mask::xyzw, vu.vf19, vu.vi10 + -12); // BRANCH! // ibeq vi14, vi10, L15 | mul.xyz vf21, vf21, Q 179 vu.vf21.mul(Mask::xyz, vu.vf21, vu.Q); bc = (vu.vi14 == vu.vi10); // sq.xyzw vf13, -10(vi10) | maxy.w vf14, vf14, vf01 180 - vu.vf14.max(Mask::w, vu.vf14, gen.fog.y()); sq_buffer(Mask::xyzw, vu.vf13, vu.vi10 + -10); + gen.vtx_p2.max(Mask::w, gen.vtx_p2, gen.fog.y()); sq_buffer(Mask::xyzw, gen.vtx_p1, vu.vi10 + -10); if (bc) { goto L15; } // div Q, vf01.x, vf12.w | itof12.xyz vf18, vf22 181 - vu.vf18.itof12(Mask::xyz, vu.vf22); vu.Q = gen.fog.x() / vu.vf12.w(); + vu.vf18.itof12(Mask::xyz, vu.vf22); vu.Q = gen.fog.x() / gen.vtx_p0.w(); // BRANCH! // ibeq vi04, vi08, L13 | add.xyzw vf15, vf15, vf04 182 - vu.vf15.add(Mask::xyzw, vu.vf15, gen.hvdf_off); bc = (vu.vi04 == vu.vi08); + gen.vtx_p3.add(Mask::xyzw, gen.vtx_p3, gen.hvdf_off); bc = (vu.vi04 == vu.vi08); // mtir vi03, vf23.x | mulaw.xyzw ACC, vf11, vf00 183 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.vi03 = vu.vf23.x_as_u16(); + vu.acc.mula(Mask::xyzw, gen.mat3, vu.vf00.w()); vu.vi03 = vu.vf23.x_as_u16(); if (bc) { goto L13; } // nop | addw.w vf14, vf14, vf01 184 - vu.vf14.add(Mask::w, vu.vf14, kFogFloatOffset); + gen.vtx_p2.add(Mask::w, gen.vtx_p2, kFogFloatOffset); L13: // R // iaddi vi10, vi10, 0x3 | maddax.xyzw ACC, vf08, vf17 185 - vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf17.x()); vu.vi10 = vu.vi10 + 3; + vu.acc.madda(Mask::xyzw, gen.mat0, gen.vtx_load1.x()); vu.vi10 = vu.vi10 + 3; // lq.xy vf24, 0(vi10) | madday.xyzw ACC, vf09, vf17 186 - vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf17.y()); lq_buffer(Mask::xy, vu.vf24, vu.vi10); + vu.acc.madda(Mask::xyzw, gen.mat1, gen.vtx_load1.y()); lq_buffer(Mask::xy, vu.vf24, vu.vi10); // lq.xyz vf16, 2(vi10) | miniz.w vf15, vf15, vf01 187 - vu.vf15.mini(Mask::w, vu.vf15, gen.fog.z()); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); + gen.vtx_p3.mini(Mask::w, gen.vtx_p3, gen.fog.z()); lq_buffer(Mask::xyz, gen.vtx_load0, vu.vi10 + 2); // iand vi07, vi03, vi11 | ftoi4.xyzw vf14, vf14 188 - vu.vf14.ftoi4_check(Mask::xyzw, vu.vf14); vu.vi07 = vu.vi03 & vu.vi11; + gen.vtx_p2.ftoi4_check(Mask::xyzw, gen.vtx_p2); vu.vi07 = vu.vi03 & vu.vi11; // mfir.x vf23, vi07 | maddz.xyzw vf13, vf10, vf17 189 - vu.acc.madd(Mask::xyzw, vu.vf13, vu.vf10, vu.vf17.z()); vu.vf23.mfir(Mask::x, vu.vi07); + vu.acc.madd(Mask::xyzw, gen.vtx_p1, gen.mat2, gen.vtx_load1.z()); vu.vf23.mfir(Mask::x, vu.vi07); // sq.xyzw vf20, -12(vi10) | mul.xyz vf12, vf12, Q 190 - vu.vf12.mul(Mask::xyz, vu.vf12, vu.Q); sq_buffer(Mask::xyzw, vu.vf20, vu.vi10 + -12); + gen.vtx_p0.mul(Mask::xyz, gen.vtx_p0, vu.Q); sq_buffer(Mask::xyzw, vu.vf20, vu.vi10 + -12); // BRANCH! // ibeq vi14, vi10, L15 | mul.xyz vf18, vf18, Q 191 vu.vf18.mul(Mask::xyz, vu.vf18, vu.Q); bc = (vu.vi14 == vu.vi10); // sq.xyzw vf14, -10(vi10) | maxy.w vf15, vf15, vf01 192 - vu.vf15.max(Mask::w, vu.vf15, gen.fog.y()); sq_buffer(Mask::xyzw, vu.vf14, vu.vi10 + -10); + gen.vtx_p3.max(Mask::w, gen.vtx_p3, gen.fog.y()); sq_buffer(Mask::xyzw, gen.vtx_p2, vu.vi10 + -10); if (bc) { goto L15; } // div Q, vf01.x, vf13.w | itof12.xyz vf19, vf23 193 - vu.vf19.itof12(Mask::xyz, vu.vf23); vu.Q = gen.fog.x() / vu.vf13.w(); + vu.vf19.itof12(Mask::xyz, vu.vf23); vu.Q = gen.fog.x() / gen.vtx_p1.w(); // BRANCH! // ibeq vi05, vi09, L14 | add.xyzw vf12, vf12, vf04 194 - vu.vf12.add(Mask::xyzw, vu.vf12, gen.hvdf_off); bc = (vu.vi05 == vu.vi09); + gen.vtx_p0.add(Mask::xyzw, gen.vtx_p0, gen.hvdf_off); bc = (vu.vi05 == vu.vi09); // mtir vi04, vf24.x | mulaw.xyzw ACC, vf11, vf00 195 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.vi04 = vu.vf24.x_as_u16(); + vu.acc.mula(Mask::xyzw, gen.mat3, vu.vf00.w()); vu.vi04 = vu.vf24.x_as_u16(); if (bc) { goto L14; } // nop | addw.w vf15, vf15, vf01 196 - vu.vf15.add(Mask::w, vu.vf15, kFogFloatOffset); + gen.vtx_p3.add(Mask::w, gen.vtx_p3, kFogFloatOffset); L14: // R // iaddi vi10, vi10, 0x3 | maddax.xyzw ACC, vf08, vf16 197 - vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf16.x()); vu.vi10 = vu.vi10 + 3; + vu.acc.madda(Mask::xyzw, gen.mat0, gen.vtx_load0.x()); vu.vi10 = vu.vi10 + 3; // lq.xy vf25, 0(vi10) | madday.xyzw ACC, vf09, vf16 198 - vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf16.y()); lq_buffer(Mask::xy, vu.vf25, vu.vi10); + vu.acc.madda(Mask::xyzw, gen.mat1, gen.vtx_load0.y()); lq_buffer(Mask::xy, vu.vf25, vu.vi10); // lq.xyz vf17, 2(vi10) | miniz.w vf12, vf12, vf01 199 - vu.vf12.mini(Mask::w, vu.vf12, gen.fog.z()); lq_buffer(Mask::xyz, vu.vf17, vu.vi10 + 2); + gen.vtx_p0.mini(Mask::w, gen.vtx_p0, gen.fog.z()); lq_buffer(Mask::xyz, gen.vtx_load1, vu.vi10 + 2); // iand vi08, vi04, vi11 | ftoi4.xyzw vf15, vf15 200 - vu.vf15.ftoi4_check(Mask::xyzw, vu.vf15); vu.vi08 = vu.vi04 & vu.vi11; + gen.vtx_p3.ftoi4_check(Mask::xyzw, gen.vtx_p3); vu.vi08 = vu.vi04 & vu.vi11; // mfir.x vf24, vi08 | maddz.xyzw vf14, vf10, vf16 201 - vu.acc.madd(Mask::xyzw, vu.vf14, vu.vf10, vu.vf16.z()); vu.vf24.mfir(Mask::x, vu.vi08); + vu.acc.madd(Mask::xyzw, gen.vtx_p2, gen.mat2, gen.vtx_load0.z()); vu.vf24.mfir(Mask::x, vu.vi08); // sq.xyzw vf21, -12(vi10) | mul.xyz vf13, vf13, Q 202 - vu.vf13.mul(Mask::xyz, vu.vf13, vu.Q); sq_buffer(Mask::xyzw, vu.vf21, vu.vi10 + -12); + gen.vtx_p1.mul(Mask::xyz, gen.vtx_p1, vu.Q); sq_buffer(Mask::xyzw, vu.vf21, vu.vi10 + -12); // BRANCH! // ibne vi14, vi10, L10 | mul.xyz vf19, vf19, Q 203 vu.vf19.mul(Mask::xyz, vu.vf19, vu.Q); bc = (vu.vi14 != vu.vi10); // sq.xyzw vf15, -10(vi10) | maxy.w vf12, vf12, vf01 204 // fmt::print("reloop {} {}\n", vu.vi14, vu.vi10); - vu.vf12.max(Mask::w, vu.vf12, gen.fog.y()); sq_buffer(Mask::xyzw, vu.vf15, vu.vi10 + -10); + gen.vtx_p0.max(Mask::w, gen.vtx_p0, gen.fog.y()); sq_buffer(Mask::xyzw, gen.vtx_p3, vu.vi10 + -10); if (bc) { goto L10; } L15: // R @@ -444,13 +444,13 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // lq.xyzw vf09, 1(vi13) | subw.w vf20, vf00, vf00 370 vu.vf20.sub(Mask::w, vu.vf00, vu.vf00.w()); lq_buffer(Mask::xyzw, gen.mat1, vu.vi13 + 1); // lq.xyzw vf10, 2(vi13) | subw.w vf21, vf00, vf00 371 - vu.vf21.sub(Mask::w, vu.vf00, vu.vf00.w()); lq_buffer(Mask::xyzw, vu.vf10, vu.vi13 + 2); + vu.vf21.sub(Mask::w, vu.vf00, vu.vf00.w()); lq_buffer(Mask::xyzw, gen.mat2, vu.vi13 + 2); // lq.xyzw vf11, 3(vi13) | ftoi12.z vf22, vf22 372 - vu.vf22.ftoi12_check(Mask::z, vu.vf22); lq_buffer(Mask::xyzw, vu.vf11, vu.vi13 + 3); + vu.vf22.ftoi12_check(Mask::z, vu.vf22); lq_buffer(Mask::xyzw, gen.mat3, vu.vi13 + 3); // iadd vi02, vi01, vi01 | ftoi12.z vf23, vf23 373 vu.vf23.ftoi12_check(Mask::z, vu.vf23); vu.vi02 = vu.vi01 + vu.vi01; // iadd vi01, vi01, vi02 | sub.xyzw vf16, vf16, vf16 374 - vu.vf16.set_zero(); vu.vi01 = vu.vi01 + vu.vi02; + gen.vtx_load0.set_zero(); vu.vi01 = vu.vi01 + vu.vi02; // iaddi vi11, vi00, -0x2 | nop 375 vu.vi11 = -2; // iadd vi14, vi10, vi01 | ftoi12.z vf24, vf24 376 @@ -462,21 +462,21 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // lq.xy vf22, 0(vi10) | nop 379 lq_buffer(Mask::xy, vu.vf22, vu.vi10); // lq.xyz vf16, 2(vi10) | nop 380 - lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); + lq_buffer(Mask::xyz, gen.vtx_load0, vu.vi10 + 2); // nop | nop 381 // nop | nop 382 // nop | mulaw.xyzw ACC, vf11, vf00 383 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); + vu.acc.mula(Mask::xyzw, gen.mat3, vu.vf00.w()); // mtir vi02, vf22.x | maddax.xyzw ACC, vf08, vf16 384 - vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf16.x()); vu.vi02 = vu.vf22.x_as_u16(); + vu.acc.madda(Mask::xyzw, gen.mat0, gen.vtx_load0.x()); vu.vi02 = vu.vf22.x_as_u16(); // iaddi vi10, vi10, 0x3 | madday.xyzw ACC, vf09, vf16 385 - vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf16.y()); vu.vi10 = vu.vi10 + 3; + vu.acc.madda(Mask::xyzw, gen.mat1, gen.vtx_load0.y()); vu.vi10 = vu.vi10 + 3; // lq.xy vf23, 0(vi10) | maddz.xyzw vf12, vf10, vf16 386 - vu.acc.madd(Mask::xyzw, vu.vf12, vu.vf10, vu.vf16.z()); lq_buffer(Mask::xy, vu.vf23, vu.vi10); + vu.acc.madd(Mask::xyzw, gen.vtx_p0, gen.mat2, gen.vtx_load0.z()); lq_buffer(Mask::xy, vu.vf23, vu.vi10); // lq.xyz vf16, 2(vi10) | nop 387 - lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); + lq_buffer(Mask::xyz, gen.vtx_load0, vu.vi10 + 2); // iand vi06, vi02, vi11 | nop 388 vu.vi06 = vu.vi02 & vu.vi11; // mfir.x vf22, vi06 | nop 389 @@ -490,23 +490,23 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // nop | itof12.xyz vf18, vf22 393 vu.vf18.itof12(Mask::xyz, vu.vf22); // div Q, vf01.x, vf12.w | mul.xyzw vf26, vf12, vf05 394 - vu.vf26.mul(Mask::xyzw, vu.vf12, gen.hmge_scale); vu.Q = gen.fog.x() / vu.vf12.w(); + vu.vf26.mul(Mask::xyzw, gen.vtx_p0, gen.hmge_scale); vu.Q = gen.fog.x() / gen.vtx_p0.w(); // nop | nop 395 // nop | mulaw.xyzw ACC, vf11, vf00 396 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); + vu.acc.mula(Mask::xyzw, gen.mat3, vu.vf00.w()); // mtir vi03, vf23.x | maddax.xyzw ACC, vf08, vf16 397 - vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf16.x()); vu.vi03 = vu.vf23.x_as_u16(); + vu.acc.madda(Mask::xyzw, gen.mat0, gen.vtx_load0.x()); vu.vi03 = vu.vf23.x_as_u16(); // iaddi vi10, vi10, 0x3 | madday.xyzw ACC, vf09, vf16 398 - vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf16.y()); vu.vi10 = vu.vi10 + 3; + vu.acc.madda(Mask::xyzw, gen.mat1, gen.vtx_load0.y()); vu.vi10 = vu.vi10 + 3; // lq.xy vf24, 0(vi10) | maddz.xyzw vf13, vf10, vf16 399 - vu.acc.madd(Mask::xyzw, vu.vf13, vu.vf10, vu.vf16.z()); lq_buffer(Mask::xy, vu.vf24, vu.vi10); + vu.acc.madd(Mask::xyzw, gen.vtx_p1, gen.mat2, gen.vtx_load0.z()); lq_buffer(Mask::xy, vu.vf24, vu.vi10); // lq.xyz vf16, 2(vi10) | nop 400 - lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); + lq_buffer(Mask::xyz, gen.vtx_load0, vu.vi10 + 2); // iand vi07, vi03, vi11 | nop 401 vu.vi07 = vu.vi03 & vu.vi11; // mfir.x vf23, vi07 | mul.xyz vf12, vf12, Q 402 - vu.vf12.mul(Mask::xyz, vu.vf12, vu.Q); vu.vf23.mfir(Mask::x, vu.vi07); + gen.vtx_p0.mul(Mask::xyz, gen.vtx_p0, vu.Q); vu.vf23.mfir(Mask::x, vu.vi07); // fcset 0x0 | nop 403 cf = 0; // nop | nop 404 @@ -516,25 +516,25 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // nop | itof12.xyz vf19, vf23 406 vu.vf19.itof12(Mask::xyz, vu.vf23); // div Q, vf01.x, vf13.w | mulaw.xyzw ACC, vf11, vf00 407 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.Q = gen.fog.x() / vu.vf13.w(); + vu.acc.mula(Mask::xyzw, gen.mat3, vu.vf00.w()); vu.Q = gen.fog.x() / gen.vtx_p1.w(); // nop | add.xyzw vf12, vf12, vf04 408 - vu.vf12.add(Mask::xyzw, vu.vf12, gen.hvdf_off); + gen.vtx_p0.add(Mask::xyzw, gen.vtx_p0, gen.hvdf_off); // nop | maddax.xyzw ACC, vf08, vf16 409 - vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf16.x()); + vu.acc.madda(Mask::xyzw, gen.mat0, gen.vtx_load0.x()); // mtir vi04, vf24.x | madday.xyzw ACC, vf09, vf16 410 - vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf16.y()); vu.vi04 = vu.vf24.x_as_u16(); + vu.acc.madda(Mask::xyzw, gen.mat1, gen.vtx_load0.y()); vu.vi04 = vu.vf24.x_as_u16(); // iaddi vi10, vi10, 0x3 | maddz.xyzw vf14, vf10, vf16 411 - vu.acc.madd(Mask::xyzw, vu.vf14, vu.vf10, vu.vf16.z()); vu.vi10 = vu.vi10 + 3; + vu.acc.madd(Mask::xyzw, gen.vtx_p2, gen.mat2, gen.vtx_load0.z()); vu.vi10 = vu.vi10 + 3; // lq.xy vf25, 0(vi10) | miniz.w vf12, vf12, vf01 412 - vu.vf12.mini(Mask::w, vu.vf12, gen.fog.z()); lq_buffer(Mask::xy, vu.vf25, vu.vi10); + gen.vtx_p0.mini(Mask::w, gen.vtx_p0, gen.fog.z()); lq_buffer(Mask::xy, vu.vf25, vu.vi10); // lq.xyz vf16, 2(vi10) | mul.xyzw vf27, vf13, vf05 413 - vu.vf27.mul(Mask::xyzw, vu.vf13, gen.hmge_scale); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); + vu.vf27.mul(Mask::xyzw, gen.vtx_p1, gen.hmge_scale); lq_buffer(Mask::xyz, gen.vtx_load0, vu.vi10 + 2); // iand vi08, vi04, vi11 | nop 414 vu.vi08 = vu.vi04 & vu.vi11; // mfir.x vf24, vi08 | mul.xyz vf13, vf13, Q 415 - vu.vf13.mul(Mask::xyz, vu.vf13, vu.Q); vu.vf24.mfir(Mask::x, vu.vi08); + gen.vtx_p1.mul(Mask::xyz, gen.vtx_p1, vu.Q); vu.vf24.mfir(Mask::x, vu.vi08); // nop | maxy.w vf12, vf12, vf01 416 - vu.vf12.max(Mask::w, vu.vf12, gen.fog.y()); + gen.vtx_p0.max(Mask::w, gen.vtx_p0, gen.fog.y()); // nop | clipw.xyz vf26, vf26 417 cf = clip(vu.vf26, vu.vf26.w(), cf); // nop | mul.xyz vf19, vf19, Q 418 @@ -543,70 +543,70 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // ibeq vi02, vi06, L35 | itof12.xyz vf20, vf24 419 vu.vf20.itof12(Mask::xyz, vu.vf24); bc = (vu.vi02 == vu.vi06); // div Q, vf01.x, vf14.w | mulaw.xyzw ACC, vf11, vf00 420 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.Q = gen.fog.x() / vu.vf14.w(); + vu.acc.mula(Mask::xyzw, gen.mat3, vu.vf00.w()); vu.Q = gen.fog.x() / gen.vtx_p2.w(); if (bc) { goto L35; } // nop | addw.w vf12, vf12, vf01 421 - vu.vf12.add(Mask::w, vu.vf12, kFogFloatOffset); + gen.vtx_p0.add(Mask::w, gen.vtx_p0, kFogFloatOffset); L35: // nop | add.xyzw vf13, vf13, vf04 422 - vu.vf13.add(Mask::xyzw, vu.vf13, gen.hvdf_off); + gen.vtx_p1.add(Mask::xyzw, gen.vtx_p1, gen.hvdf_off); // nop | maddax.xyzw ACC, vf08, vf16 423 - vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf16.x()); + vu.acc.madda(Mask::xyzw, gen.mat0, gen.vtx_load0.x()); // mtir vi05, vf25.x | madday.xyzw ACC, vf09, vf16 424 - vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf16.y()); vu.vi05 = vu.vf25.x_as_u16(); + vu.acc.madda(Mask::xyzw, gen.mat1, gen.vtx_load0.y()); vu.vi05 = vu.vf25.x_as_u16(); // iaddi vi10, vi10, 0x3 | maddz.xyzw vf15, vf10, vf16 425 - vu.acc.madd(Mask::xyzw, vu.vf15, vu.vf10, vu.vf16.z()); vu.vi10 = vu.vi10 + 3; + vu.acc.madd(Mask::xyzw, gen.vtx_p3, gen.mat2, gen.vtx_load0.z()); vu.vi10 = vu.vi10 + 3; // lq.xy vf22, 0(vi10) | miniz.w vf13, vf13, vf01 426 - vu.vf13.mini(Mask::w, vu.vf13, gen.fog.z()); lq_buffer(Mask::xy, vu.vf22, vu.vi10); + gen.vtx_p1.mini(Mask::w, gen.vtx_p1, gen.fog.z()); lq_buffer(Mask::xy, vu.vf22, vu.vi10); // lq.xyz vf16, 2(vi10) | mul.xyzw vf28, vf14, vf05 427 - vu.vf28.mul(Mask::xyzw, vu.vf14, gen.hmge_scale); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); + vu.vf28.mul(Mask::xyzw, gen.vtx_p2, gen.hmge_scale); lq_buffer(Mask::xyz, gen.vtx_load0, vu.vi10 + 2); // iand vi09, vi05, vi11 | ftoi4.xyzw vf12, vf12 428 - vu.vf12.ftoi4_check(Mask::xyzw, vu.vf12); vu.vi09 = vu.vi05 & vu.vi11; + gen.vtx_p0.ftoi4_check(Mask::xyzw, gen.vtx_p0); vu.vi09 = vu.vi05 & vu.vi11; // mfir.x vf25, vi09 | mul.xyz vf14, vf14, Q 429 - vu.vf14.mul(Mask::xyz, vu.vf14, vu.Q); vu.vf25.mfir(Mask::x, vu.vi09); + gen.vtx_p2.mul(Mask::xyz, gen.vtx_p2, vu.Q); vu.vf25.mfir(Mask::x, vu.vi09); // sq.xyzw vf18, -12(vi10) | maxy.w vf13, vf13, vf01 430 - vu.vf13.max(Mask::w, vu.vf13, gen.fog.y()); sq_buffer(Mask::xyzw, vu.vf18, vu.vi10 + -12); + gen.vtx_p1.max(Mask::w, gen.vtx_p1, gen.fog.y()); sq_buffer(Mask::xyzw, vu.vf18, vu.vi10 + -12); // BRANCH! // ibeq vi14, vi10, L46 | clipw.xyz vf27, vf27 431 cf = clip(vu.vf27, vu.vf27.w(), cf); bc = (vu.vi14 == vu.vi10); // sq.xyzw vf12, -10(vi10) | mul.xyz vf20, vf20, Q 432 - vu.vf20.mul(Mask::xyz, vu.vf20, vu.Q); sq_buffer(Mask::xyzw, vu.vf12, vu.vi10 + -10); + vu.vf20.mul(Mask::xyz, vu.vf20, vu.Q); sq_buffer(Mask::xyzw, gen.vtx_p0, vu.vi10 + -10); if (bc) { goto L46; } // BRANCH! // ibeq vi03, vi07, L36 | itof12.xyz vf21, vf25 433 vu.vf21.itof12(Mask::xyz, vu.vf25); bc = (vu.vi03 == vu.vi07); // div Q, vf01.x, vf15.w | mulaw.xyzw ACC, vf11, vf00 434 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.Q = gen.fog.x() / vu.vf15.w(); + vu.acc.mula(Mask::xyzw, gen.mat3, vu.vf00.w()); vu.Q = gen.fog.x() / gen.vtx_p3.w(); if (bc) { goto L36; } // nop | addw.w vf13, vf13, vf01 435 - vu.vf13.add(Mask::w, vu.vf13, kFogFloatOffset); + gen.vtx_p1.add(Mask::w, gen.vtx_p1, kFogFloatOffset); L36: // nop | add.xyzw vf14, vf14, vf04 436 - vu.vf14.add(Mask::xyzw, vu.vf14, gen.hvdf_off); + gen.vtx_p2.add(Mask::xyzw, gen.vtx_p2, gen.hvdf_off); // nop | maddax.xyzw ACC, vf08, vf16 437 - vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf16.x()); + vu.acc.madda(Mask::xyzw, gen.mat0, gen.vtx_load0.x()); // mtir vi02, vf22.x | madday.xyzw ACC, vf09, vf16 438 - vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf16.y()); vu.vi02 = vu.vf22.x_as_u16(); + vu.acc.madda(Mask::xyzw, gen.mat1, gen.vtx_load0.y()); vu.vi02 = vu.vf22.x_as_u16(); // iaddi vi10, vi10, 0x3 | maddz.xyzw vf12, vf10, vf16 439 - vu.acc.madd(Mask::xyzw, vu.vf12, vu.vf10, vu.vf16.z()); vu.vi10 = vu.vi10 + 3; + vu.acc.madd(Mask::xyzw, gen.vtx_p0, gen.mat2, gen.vtx_load0.z()); vu.vi10 = vu.vi10 + 3; // lq.xy vf23, 0(vi10) | miniz.w vf14, vf14, vf01 440 - vu.vf14.mini(Mask::w, vu.vf14, gen.fog.z()); lq_buffer(Mask::xy, vu.vf23, vu.vi10); + gen.vtx_p2.mini(Mask::w, gen.vtx_p2, gen.fog.z()); lq_buffer(Mask::xy, vu.vf23, vu.vi10); // lq.xyz vf16, 2(vi10) | mul.xyzw vf29, vf15, vf05 441 - vu.vf29.mul(Mask::xyzw, vu.vf15, gen.hmge_scale); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); + vu.vf29.mul(Mask::xyzw, gen.vtx_p3, gen.hmge_scale); lq_buffer(Mask::xyz, gen.vtx_load0, vu.vi10 + 2); // iand vi06, vi02, vi11 | ftoi4.xyzw vf13, vf13 442 - vu.vf13.ftoi4_check(Mask::xyzw, vu.vf13); vu.vi06 = vu.vi02 & vu.vi11; + gen.vtx_p1.ftoi4_check(Mask::xyzw, gen.vtx_p1); vu.vi06 = vu.vi02 & vu.vi11; // mfir.x vf22, vi06 | mul.xyz vf15, vf15, Q 443 - vu.vf15.mul(Mask::xyz, vu.vf15, vu.Q); vu.vf22.mfir(Mask::x, vu.vi06); + gen.vtx_p3.mul(Mask::xyz, gen.vtx_p3, vu.Q); vu.vf22.mfir(Mask::x, vu.vi06); // sq.xyzw vf19, -12(vi10) | maxy.w vf14, vf14, vf01 444 - vu.vf14.max(Mask::w, vu.vf14, gen.fog.y()); sq_buffer(Mask::xyzw, vu.vf19, vu.vi10 + -12); + gen.vtx_p2.max(Mask::w, gen.vtx_p2, gen.fog.y()); sq_buffer(Mask::xyzw, vu.vf19, vu.vi10 + -12); // BRANCH! // ibeq vi14, vi10, L46 | clipw.xyz vf28, vf28 445 cf = clip(vu.vf28, vu.vf28.w(), cf); bc = (vu.vi14 == vu.vi10); // sq.xyzw vf13, -10(vi10) | mul.xyz vf21, vf21, Q 446 - vu.vf21.mul(Mask::xyz, vu.vf21, vu.Q); sq_buffer(Mask::xyzw, vu.vf13, vu.vi10 + -10); + vu.vf21.mul(Mask::xyz, vu.vf21, vu.Q); sq_buffer(Mask::xyzw, gen.vtx_p1, vu.vi10 + -10); if (bc) { goto L46; } L37: @@ -614,160 +614,160 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // ibeq vi04, vi08, L38 | itof12.xyz vf18, vf22 447 vu.vf18.itof12(Mask::xyz, vu.vf22); bc = (vu.vi04 == vu.vi08); // div Q, vf01.x, vf12.w | mulaw.xyzw ACC, vf11, vf00 448 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.Q = gen.fog.x() / vu.vf12.w(); + vu.acc.mula(Mask::xyzw, gen.mat3, vu.vf00.w()); vu.Q = gen.fog.x() / gen.vtx_p0.w(); if (bc) { goto L38; } // nop | addw.w vf14, vf14, vf01 449 - vu.vf14.add(Mask::w, vu.vf14, kFogFloatOffset); + gen.vtx_p2.add(Mask::w, gen.vtx_p2, kFogFloatOffset); L38: // fcand vi01, 0x3ffff | add.xyzw vf15, vf15, vf04 450 - vu.vf15.add(Mask::xyzw, vu.vf15, gen.hvdf_off); vu.vi01 = cf & 0x3ffff; + gen.vtx_p3.add(Mask::xyzw, gen.vtx_p3, gen.hvdf_off); vu.vi01 = cf & 0x3ffff; // BRANCH! // ibne vi00, vi01, L55 | maddax.xyzw ACC, vf08, vf16 451 - vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf16.x()); bc = (vu.vi01 != 0); + vu.acc.madda(Mask::xyzw, gen.mat0, gen.vtx_load0.x()); bc = (vu.vi01 != 0); // mtir vi03, vf23.x | madday.xyzw ACC, vf09, vf16 452 - vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf16.y()); vu.vi03 = vu.vf23.x_as_u16(); + vu.acc.madda(Mask::xyzw, gen.mat1, gen.vtx_load0.y()); vu.vi03 = vu.vf23.x_as_u16(); if (bc) { goto L55; } L39: // iaddi vi10, vi10, 0x3 | maddz.xyzw vf13, vf10, vf16 453 - vu.acc.madd(Mask::xyzw, vu.vf13, vu.vf10, vu.vf16.z()); vu.vi10 = vu.vi10 + 3; + vu.acc.madd(Mask::xyzw, gen.vtx_p1, gen.mat2, gen.vtx_load0.z()); vu.vi10 = vu.vi10 + 3; // lq.xy vf24, 0(vi10) | miniz.w vf15, vf15, vf01 454 - vu.vf15.mini(Mask::w, vu.vf15, gen.fog.z()); lq_buffer(Mask::xy, vu.vf24, vu.vi10); + gen.vtx_p3.mini(Mask::w, gen.vtx_p3, gen.fog.z()); lq_buffer(Mask::xy, vu.vf24, vu.vi10); // lq.xyz vf16, 2(vi10) | mul.xyzw vf26, vf12, vf05 455 - vu.vf26.mul(Mask::xyzw, vu.vf12, gen.hmge_scale); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); + vu.vf26.mul(Mask::xyzw, gen.vtx_p0, gen.hmge_scale); lq_buffer(Mask::xyz, gen.vtx_load0, vu.vi10 + 2); // iand vi07, vi03, vi11 | ftoi4.xyzw vf14, vf14 456 - vu.vf14.ftoi4_check(Mask::xyzw, vu.vf14); vu.vi07 = vu.vi03 & vu.vi11; + gen.vtx_p2.ftoi4_check(Mask::xyzw, gen.vtx_p2); vu.vi07 = vu.vi03 & vu.vi11; // mfir.x vf23, vi07 | mul.xyz vf12, vf12, Q 457 - vu.vf12.mul(Mask::xyz, vu.vf12, vu.Q); vu.vf23.mfir(Mask::x, vu.vi07); + gen.vtx_p0.mul(Mask::xyz, gen.vtx_p0, vu.Q); vu.vf23.mfir(Mask::x, vu.vi07); // sq.xyzw vf20, -12(vi10) | maxy.w vf15, vf15, vf01 458 - vu.vf15.max(Mask::w, vu.vf15, gen.fog.y()); sq_buffer(Mask::xyzw, vu.vf20, vu.vi10 + -12); + gen.vtx_p3.max(Mask::w, gen.vtx_p3, gen.fog.y()); sq_buffer(Mask::xyzw, vu.vf20, vu.vi10 + -12); // BRANCH! // ibeq vi14, vi10, L46 | clipw.xyz vf29, vf29 459 cf = clip(vu.vf29, vu.vf29.w(), cf); bc = (vu.vi14 == vu.vi10); // sq.xyzw vf14, -10(vi10) | mul.xyz vf18, vf18, Q 460 - vu.vf18.mul(Mask::xyz, vu.vf18, vu.Q); sq_buffer(Mask::xyzw, vu.vf14, vu.vi10 + -10); + vu.vf18.mul(Mask::xyz, vu.vf18, vu.Q); sq_buffer(Mask::xyzw, gen.vtx_p2, vu.vi10 + -10); if (bc) { goto L46; } // BRANCH! // ibeq vi05, vi09, L40 | itof12.xyz vf19, vf23 461 vu.vf19.itof12(Mask::xyz, vu.vf23); bc = (vu.vi05 == vu.vi09); // div Q, vf01.x, vf13.w | mulaw.xyzw ACC, vf11, vf00 462 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.Q = gen.fog.x() / vu.vf13.w(); + vu.acc.mula(Mask::xyzw, gen.mat3, vu.vf00.w()); vu.Q = gen.fog.x() / gen.vtx_p1.w(); if (bc) { goto L40; } // nop | addw.w vf15, vf15, vf01 463 - vu.vf15.add(Mask::w, vu.vf15, kFogFloatOffset); + gen.vtx_p3.add(Mask::w, gen.vtx_p3, kFogFloatOffset); L40: // fcand vi01, 0x3ffff | add.xyzw vf12, vf12, vf04 464 - vu.vf12.add(Mask::xyzw, vu.vf12, gen.hvdf_off); vu.vi01 = cf & 0x3ffff; + gen.vtx_p0.add(Mask::xyzw, gen.vtx_p0, gen.hvdf_off); vu.vi01 = cf & 0x3ffff; // BRANCH! // ibne vi00, vi01, L59 | maddax.xyzw ACC, vf08, vf16 465 - vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf16.x()); bc = (vu.vi01 != 0); + vu.acc.madda(Mask::xyzw, gen.mat0, gen.vtx_load0.x()); bc = (vu.vi01 != 0); // mtir vi04, vf24.x | madday.xyzw ACC, vf09, vf16 466 - vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf16.y()); vu.vi04 = vu.vf24.x_as_u16(); + vu.acc.madda(Mask::xyzw, gen.mat1, gen.vtx_load0.y()); vu.vi04 = vu.vf24.x_as_u16(); if (bc) { goto L59; } L41: // iaddi vi10, vi10, 0x3 | maddz.xyzw vf14, vf10, vf16 467 - vu.acc.madd(Mask::xyzw, vu.vf14, vu.vf10, vu.vf16.z()); vu.vi10 = vu.vi10 + 3; + vu.acc.madd(Mask::xyzw, gen.vtx_p2, gen.mat2, gen.vtx_load0.z()); vu.vi10 = vu.vi10 + 3; // lq.xy vf25, 0(vi10) | miniz.w vf12, vf12, vf01 468 - vu.vf12.mini(Mask::w, vu.vf12, gen.fog.z()); lq_buffer(Mask::xy, vu.vf25, vu.vi10); + gen.vtx_p0.mini(Mask::w, gen.vtx_p0, gen.fog.z()); lq_buffer(Mask::xy, vu.vf25, vu.vi10); // lq.xyz vf16, 2(vi10) | mul.xyzw vf27, vf13, vf05 469 - vu.vf27.mul(Mask::xyzw, vu.vf13, gen.hmge_scale); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); + vu.vf27.mul(Mask::xyzw, gen.vtx_p1, gen.hmge_scale); lq_buffer(Mask::xyz, gen.vtx_load0, vu.vi10 + 2); // iand vi08, vi04, vi11 | ftoi4.xyzw vf15, vf15 470 - vu.vf15.ftoi4_check(Mask::xyzw, vu.vf15); vu.vi08 = vu.vi04 & vu.vi11; + gen.vtx_p3.ftoi4_check(Mask::xyzw, gen.vtx_p3); vu.vi08 = vu.vi04 & vu.vi11; // mfir.x vf24, vi08 | mul.xyz vf13, vf13, Q 471 - vu.vf13.mul(Mask::xyz, vu.vf13, vu.Q); vu.vf24.mfir(Mask::x, vu.vi08); + gen.vtx_p1.mul(Mask::xyz, gen.vtx_p1, vu.Q); vu.vf24.mfir(Mask::x, vu.vi08); // sq.xyzw vf21, -12(vi10) | maxy.w vf12, vf12, vf01 472 - vu.vf12.max(Mask::w, vu.vf12, gen.fog.y()); sq_buffer(Mask::xyzw, vu.vf21, vu.vi10 + -12); + gen.vtx_p0.max(Mask::w, gen.vtx_p0, gen.fog.y()); sq_buffer(Mask::xyzw, vu.vf21, vu.vi10 + -12); // BRANCH! // ibeq vi14, vi10, L46 | clipw.xyz vf26, vf26 473 cf = clip(vu.vf26, vu.vf26.w(), cf); bc = (vu.vi14 == vu.vi10); // sq.xyzw vf15, -10(vi10) | mul.xyz vf19, vf19, Q 474 - vu.vf19.mul(Mask::xyz, vu.vf19, vu.Q); sq_buffer(Mask::xyzw, vu.vf15, vu.vi10 + -10); + vu.vf19.mul(Mask::xyz, vu.vf19, vu.Q); sq_buffer(Mask::xyzw, gen.vtx_p3, vu.vi10 + -10); if (bc) { goto L46; } // BRANCH! // ibeq vi02, vi06, L42 | itof12.xyz vf20, vf24 475 vu.vf20.itof12(Mask::xyz, vu.vf24); bc = (vu.vi02 == vu.vi06); // div Q, vf01.x, vf14.w | mulaw.xyzw ACC, vf11, vf00 476 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.Q = gen.fog.x() / vu.vf14.w(); + vu.acc.mula(Mask::xyzw, gen.mat3, vu.vf00.w()); vu.Q = gen.fog.x() / gen.vtx_p2.w(); if (bc) { goto L42; } // nop | addw.w vf12, vf12, vf01 477 - vu.vf12.add(Mask::w, vu.vf12, kFogFloatOffset); + gen.vtx_p0.add(Mask::w, gen.vtx_p0, kFogFloatOffset); L42: // fcand vi01, 0x3ffff | add.xyzw vf13, vf13, vf04 478 - vu.vf13.add(Mask::xyzw, vu.vf13, gen.hvdf_off); vu.vi01 = cf & 0x3ffff; + gen.vtx_p1.add(Mask::xyzw, gen.vtx_p1, gen.hvdf_off); vu.vi01 = cf & 0x3ffff; // BRANCH! // ibne vi00, vi01, L47 | maddax.xyzw ACC, vf08, vf16 479 - vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf16.x()); bc = (vu.vi01 != 0); + vu.acc.madda(Mask::xyzw, gen.mat0, gen.vtx_load0.x()); bc = (vu.vi01 != 0); // mtir vi05, vf25.x | madday.xyzw ACC, vf09, vf16 480 - vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf16.y()); vu.vi05 = vu.vf25.x_as_u16(); + vu.acc.madda(Mask::xyzw, gen.mat1, gen.vtx_load0.y()); vu.vi05 = vu.vf25.x_as_u16(); if (bc) { goto L47; } L43: // iaddi vi10, vi10, 0x3 | maddz.xyzw vf15, vf10, vf16 481 - vu.acc.madd(Mask::xyzw, vu.vf15, vu.vf10, vu.vf16.z()); vu.vi10 = vu.vi10 + 3; + vu.acc.madd(Mask::xyzw, gen.vtx_p3, gen.mat2, gen.vtx_load0.z()); vu.vi10 = vu.vi10 + 3; // lq.xy vf22, 0(vi10) | miniz.w vf13, vf13, vf01 482 - vu.vf13.mini(Mask::w, vu.vf13, gen.fog.z()); lq_buffer(Mask::xy, vu.vf22, vu.vi10); + gen.vtx_p1.mini(Mask::w, gen.vtx_p1, gen.fog.z()); lq_buffer(Mask::xy, vu.vf22, vu.vi10); // lq.xyz vf16, 2(vi10) | mul.xyzw vf28, vf14, vf05 483 - vu.vf28.mul(Mask::xyzw, vu.vf14, gen.hmge_scale); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); + vu.vf28.mul(Mask::xyzw, gen.vtx_p2, gen.hmge_scale); lq_buffer(Mask::xyz, gen.vtx_load0, vu.vi10 + 2); // iand vi09, vi05, vi11 | ftoi4.xyzw vf12, vf12 484 - vu.vf12.ftoi4_check(Mask::xyzw, vu.vf12); vu.vi09 = vu.vi05 & vu.vi11; + gen.vtx_p0.ftoi4_check(Mask::xyzw, gen.vtx_p0); vu.vi09 = vu.vi05 & vu.vi11; // mfir.x vf25, vi09 | mul.xyz vf14, vf14, Q 485 - vu.vf14.mul(Mask::xyz, vu.vf14, vu.Q); vu.vf25.mfir(Mask::x, vu.vi09); + gen.vtx_p2.mul(Mask::xyz, gen.vtx_p2, vu.Q); vu.vf25.mfir(Mask::x, vu.vi09); // sq.xyzw vf18, -12(vi10) | maxy.w vf13, vf13, vf01 486 - vu.vf13.max(Mask::w, vu.vf13, gen.fog.y()); sq_buffer(Mask::xyzw, vu.vf18, vu.vi10 + -12); + gen.vtx_p1.max(Mask::w, gen.vtx_p1, gen.fog.y()); sq_buffer(Mask::xyzw, vu.vf18, vu.vi10 + -12); // BRANCH! // ibeq vi14, vi10, L46 | clipw.xyz vf27, vf27 487 cf = clip(vu.vf27, vu.vf27.w(), cf); bc = (vu.vi14 == vu.vi10); // sq.xyzw vf12, -10(vi10) | mul.xyz vf20, vf20, Q 488 - vu.vf20.mul(Mask::xyz, vu.vf20, vu.Q); sq_buffer(Mask::xyzw, vu.vf12, vu.vi10 + -10); + vu.vf20.mul(Mask::xyz, vu.vf20, vu.Q); sq_buffer(Mask::xyzw, gen.vtx_p0, vu.vi10 + -10); if (bc) { goto L46; } // BRANCH! // ibeq vi03, vi07, L44 | itof12.xyz vf21, vf25 489 vu.vf21.itof12(Mask::xyz, vu.vf25); bc = (vu.vi03 == vu.vi07); // div Q, vf01.x, vf15.w | mulaw.xyzw ACC, vf11, vf00 490 - vu.acc.mula(Mask::xyzw, vu.vf11, vu.vf00.w()); vu.Q = gen.fog.x() / vu.vf15.w(); + vu.acc.mula(Mask::xyzw, gen.mat3, vu.vf00.w()); vu.Q = gen.fog.x() / gen.vtx_p3.w(); if (bc) { goto L44; } // nop | addw.w vf13, vf13, vf01 491 - vu.vf13.add(Mask::w, vu.vf13, kFogFloatOffset); + gen.vtx_p1.add(Mask::w, gen.vtx_p1, kFogFloatOffset); L44: // fcand vi01, 0x3ffff | add.xyzw vf14, vf14, vf04 492 - vu.vf14.add(Mask::xyzw, vu.vf14, gen.hvdf_off); vu.vi01 = cf & 0x3ffff; + gen.vtx_p2.add(Mask::xyzw, gen.vtx_p2, gen.hvdf_off); vu.vi01 = cf & 0x3ffff; // BRANCH! // ibne vi00, vi01, L51 | maddax.xyzw ACC, vf08, vf16 493 - vu.acc.madda(Mask::xyzw, gen.mat0, vu.vf16.x()); bc = (vu.vi01 != 0); + vu.acc.madda(Mask::xyzw, gen.mat0, gen.vtx_load0.x()); bc = (vu.vi01 != 0); // mtir vi02, vf22.x | madday.xyzw ACC, vf09, vf16 494 - vu.acc.madda(Mask::xyzw, gen.mat1, vu.vf16.y()); vu.vi02 = vu.vf22.x_as_u16(); + vu.acc.madda(Mask::xyzw, gen.mat1, gen.vtx_load0.y()); vu.vi02 = vu.vf22.x_as_u16(); if (bc) { goto L51; } L45: // iaddi vi10, vi10, 0x3 | maddz.xyzw vf12, vf10, vf16 495 - vu.acc.madd(Mask::xyzw, vu.vf12, vu.vf10, vu.vf16.z()); vu.vi10 = vu.vi10 + 3; + vu.acc.madd(Mask::xyzw, gen.vtx_p0, gen.mat2, gen.vtx_load0.z()); vu.vi10 = vu.vi10 + 3; // lq.xy vf23, 0(vi10) | miniz.w vf14, vf14, vf01 496 - vu.vf14.mini(Mask::w, vu.vf14, gen.fog.z()); lq_buffer(Mask::xy, vu.vf23, vu.vi10); + gen.vtx_p2.mini(Mask::w, gen.vtx_p2, gen.fog.z()); lq_buffer(Mask::xy, vu.vf23, vu.vi10); // lq.xyz vf16, 2(vi10) | mul.xyzw vf29, vf15, vf05 497 - vu.vf29.mul(Mask::xyzw, vu.vf15, gen.hmge_scale); lq_buffer(Mask::xyz, vu.vf16, vu.vi10 + 2); + vu.vf29.mul(Mask::xyzw, gen.vtx_p3, gen.hmge_scale); lq_buffer(Mask::xyz, gen.vtx_load0, vu.vi10 + 2); // iand vi06, vi02, vi11 | ftoi4.xyzw vf13, vf13 498 - vu.vf13.ftoi4_check(Mask::xyzw, vu.vf13); vu.vi06 = vu.vi02 & vu.vi11; + gen.vtx_p1.ftoi4_check(Mask::xyzw, gen.vtx_p1); vu.vi06 = vu.vi02 & vu.vi11; // mfir.x vf22, vi06 | mul.xyz vf15, vf15, Q 499 - vu.vf15.mul(Mask::xyz, vu.vf15, vu.Q); vu.vf22.mfir(Mask::x, vu.vi06); + gen.vtx_p3.mul(Mask::xyz, gen.vtx_p3, vu.Q); vu.vf22.mfir(Mask::x, vu.vi06); // sq.xyzw vf19, -12(vi10) | maxy.w vf14, vf14, vf01 500 - vu.vf14.max(Mask::w, vu.vf14, gen.fog.y()); sq_buffer(Mask::xyzw, vu.vf19, vu.vi10 + -12); + gen.vtx_p2.max(Mask::w, gen.vtx_p2, gen.fog.y()); sq_buffer(Mask::xyzw, vu.vf19, vu.vi10 + -12); // BRANCH! // ibne vi14, vi10, L37 | clipw.xyz vf28, vf28 501 cf = clip(vu.vf28, vu.vf28.w(), cf); bc = (vu.vi14 != vu.vi10); // sq.xyzw vf13, -10(vi10) | mul.xyz vf21, vf21, Q 502 - vu.vf21.mul(Mask::xyz, vu.vf21, vu.Q); sq_buffer(Mask::xyzw, vu.vf13, vu.vi10 + -10); + vu.vf21.mul(Mask::xyz, vu.vf21, vu.Q); sq_buffer(Mask::xyzw, gen.vtx_p1, vu.vi10 + -10); if (bc) { goto L37; } L46: @@ -783,7 +783,7 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // ibne vi02, vi06, L43 | nop 505 bc = (vu.vi02 != vu.vi06); // sq.xyzw vf23, 998(vi00) | addw.w vf12, vf12, vf01 506 - vu.vf12.add(Mask::w, vu.vf12, kFogFloatOffset); sq_buffer(Mask::xyzw, vu.vf23, 998); + gen.vtx_p0.add(Mask::w, gen.vtx_p0, kFogFloatOffset); sq_buffer(Mask::xyzw, vu.vf23, 998); if (bc) { goto L43; } // sq.xyzw vf24, 999(vi00) | mul.xyzw vf23, vf28, vf07 507 @@ -828,7 +828,7 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S L48: // div Q, vf01.x, vf14.w | nop 524 - vu.Q = gen.fog.x() / vu.vf14.w(); + vu.Q = gen.fog.x() / gen.vtx_p2.w(); // lq.xyzw vf23, 998(vi00) | nop 525 lq_buffer(Mask::xyzw, vu.vf23, 998); // lq.xyzw vf24, 999(vi00) | nop 526 @@ -856,7 +856,7 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // ibne vi03, vi07, L45 | nop 588 bc = (vu.vi03 != vu.vi07); // sq.xyzw vf23, 998(vi00) | addw.w vf13, vf13, vf01 589 - vu.vf13.add(Mask::w, vu.vf13, kFogFloatOffset); sq_buffer(Mask::xyzw, vu.vf23, 998); + gen.vtx_p1.add(Mask::w, gen.vtx_p1, kFogFloatOffset); sq_buffer(Mask::xyzw, vu.vf23, 998); if (bc) { goto L45; } // sq.xyzw vf24, 999(vi00) | mul.xyzw vf23, vf29, vf07 590 @@ -900,7 +900,7 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S L52: // div Q, vf01.x, vf15.w | nop 607 - vu.Q = gen.fog.x() / vu.vf15.w(); + vu.Q = gen.fog.x() / gen.vtx_p3.w(); // lq.xyzw vf23, 998(vi00) | nop 608 lq_buffer(Mask::xyzw, vu.vf23, 998); // lq.xyzw vf24, 999(vi00) | nop 609 @@ -928,7 +928,7 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // ibne vi04, vi08, L39 | nop 671 bc = (vu.vi04 != vu.vi08); // sq.xyzw vf23, 998(vi00) | addw.w vf14, vf14, vf01 672 - vu.vf14.add(Mask::w, vu.vf14, kFogFloatOffset); sq_buffer(Mask::xyzw, vu.vf23, 998); + gen.vtx_p2.add(Mask::w, gen.vtx_p2, kFogFloatOffset); sq_buffer(Mask::xyzw, vu.vf23, 998); if (bc) { goto L39; } // sq.xyzw vf24, 999(vi00) | mul.xyzw vf23, vf26, vf07 673 @@ -972,7 +972,7 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S L56: // div Q, vf01.x, vf12.w | nop 690 - vu.Q = gen.fog.x() / vu.vf12.w(); + vu.Q = gen.fog.x() / gen.vtx_p0.w(); // lq.xyzw vf23, 998(vi00) | nop 691 lq_buffer(Mask::xyzw, vu.vf23, 998); // lq.xyzw vf24, 999(vi00) | nop 692 @@ -1000,7 +1000,7 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // ibne vi05, vi09, L41 | nop 754 bc = (vu.vi05 != vu.vi09); // sq.xyzw vf23, 998(vi00) | addw.w vf15, vf15, vf01 755 - vu.vf15.add(Mask::w, vu.vf15, kFogFloatOffset); sq_buffer(Mask::xyzw, vu.vf23, 998); + gen.vtx_p3.add(Mask::w, gen.vtx_p3, kFogFloatOffset); sq_buffer(Mask::xyzw, vu.vf23, 998); if (bc) { goto L41; } // sq.xyzw vf24, 999(vi00) | mul.xyzw vf23, vf27, vf07 756 @@ -1044,7 +1044,7 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S L60: // div Q, vf01.x, vf13.w | nop 773 - vu.Q = gen.fog.x() / vu.vf13.w(); + vu.Q = gen.fog.x() / gen.vtx_p1.w(); // lq.xyzw vf23, 998(vi00) | nop 774 lq_buffer(Mask::xyzw, vu.vf23, 998); // lq.xyzw vf24, 999(vi00) | nop 775 @@ -1085,9 +1085,11 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // ilw.w vi04, 1(vi14) | nop 1066 ilw_buffer(Mask::w, vu.vi04, vu.vi14 + 1); // lqi.xyzw vf16, vi14 | nop 1067 - lq_buffer(Mask::xyzw, vu.vf16, vu.vi14++); + Vf adgif_temp0; + lq_buffer(Mask::xyzw, adgif_temp0, vu.vi14++); // lqi.xyzw vf17, vi14 | nop 1068 - lq_buffer(Mask::xyzw, vu.vf17, vu.vi14++); + Vf adgif_temp1; + lq_buffer(Mask::xyzw, adgif_temp1, vu.vi14++); // lqi.xyzw vf18, vi14 | nop 1069 lq_buffer(Mask::xyzw, vu.vf18, vu.vi14++); // lqi.xyzw vf19, vi14 | nop 1070 @@ -1099,9 +1101,9 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S // sqi.xyzw vf02, vi06 | nop 1073 sq_buffer(Mask::xyzw, gen.adgif_tmpl, vu.vi06++); // sqi.xyzw vf16, vi06 | nop 1074 - sq_buffer(Mask::xyzw, vu.vf16, vu.vi06++); + sq_buffer(Mask::xyzw, adgif_temp0, vu.vi06++); // sqi.xyzw vf17, vi06 | nop 1075 - sq_buffer(Mask::xyzw, vu.vf17, vu.vi06++); + sq_buffer(Mask::xyzw, adgif_temp1, vu.vi06++); // sqi.xyzw vf18, vi06 | nop 1076 sq_buffer(Mask::xyzw, vu.vf18, vu.vi06++); // sqi.xyzw vf19, vi06 | nop 1077 diff --git a/game/graphics/opengl_renderer/GenericRenderer.h b/game/graphics/opengl_renderer/GenericRenderer.h index ec7ae0e8cc..f1ddd8475a 100644 --- a/game/graphics/opengl_renderer/GenericRenderer.h +++ b/game/graphics/opengl_renderer/GenericRenderer.h @@ -47,9 +47,8 @@ class GenericRenderer : public BucketRenderer { struct Vu { u32 row[4]; u32 stcycl = 0; - Vf vf03, vf10, vf11, vf12, vf13, vf14, vf15, - vf16, vf17, vf18, vf19, vf20, vf21, vf22, vf23, vf24, vf25, vf26, vf27, vf28, vf29, vf30, - vf31; + Vf vf03, vf18, vf19, vf20, vf21, vf22, vf23, vf24, vf25, vf26, vf27, vf28, vf29, + vf30, vf31; const Vf vf00; u16 vi01, vi02, vi03, vi04, vi05, vi06, vi07, vi09, vi08, vi11, vi12, vi13, vi10, vi14, vi15; float I, P, Q; @@ -66,7 +65,11 @@ class GenericRenderer : public BucketRenderer { Vf hmge_scale; Vf guard; - Vf mat0, mat1; + Vf mat0, mat1, mat2, mat3; + + Vf vtx_p0, vtx_p1, vtx_p2, vtx_p3; + + Vf vtx_load0, vtx_load1; } gen; struct alignas(16) BufferMemory { From 543194ca9b6bd89ad94c0096adf8fb54e3a100c8 Mon Sep 17 00:00:00 2001 From: water Date: Fri, 4 Mar 2022 23:07:12 -0500 Subject: [PATCH 03/12] un-pipelined the fast case in generic vu1 --- .../opengl_renderer/GenericProgram.cpp | 427 ++++++++---------- .../opengl_renderer/GenericRenderer.h | 1 + goal_src/engine/anim/bones.gc | 1 + 3 files changed, 184 insertions(+), 245 deletions(-) diff --git a/game/graphics/opengl_renderer/GenericProgram.cpp b/game/graphics/opengl_renderer/GenericProgram.cpp index 8b9b0d24d6..bb1d31daec 100644 --- a/game/graphics/opengl_renderer/GenericProgram.cpp +++ b/game/graphics/opengl_renderer/GenericProgram.cpp @@ -156,6 +156,186 @@ void GenericRenderer::ilw_buffer(Mask mask, u16& dest, u16 addr) { memcpy(&dest, m_buffer.data + addr * 16 + offset, 2); } +void GenericRenderer::mscal_noclip_nopipe(SharedRenderState *render_state, ScopedProfilerNode &prof) { + // buffer crap + vu.vi02 = vu.vi13 - 0x363; + vu.vi13 = vu.vi13 + 0x1e; + if (vu.vi02 == 0) { + vu.vi13 = 0x345; /* 837 */ + } + vu.vi03 = vu.vi13 + 7; + ilw_buffer(Mask::w, vu.vi01, vu.vi13 + 5); + isw_buffer(Mask::x, vu.vi03, 906); + vu.vi10 = vu.vi12 + 9; + + lq_buffer(Mask::xyzw, gen.mat0, vu.vi13); + lq_buffer(Mask::xyzw, gen.mat1, vu.vi13 + 1); + lq_buffer(Mask::xyzw, gen.mat2, vu.vi13 + 2); + lq_buffer(Mask::xyzw, gen.mat3, vu.vi13 + 3); + vu.vi02 = vu.vi01 + vu.vi01; + vu.vi01 = vu.vi01 + vu.vi02; + vu.vi11 = -2; + vu.vi14 = vu.vi10 + vu.vi01; + isw_buffer(Mask::w, vu.vi12, 906); + + vu.vf18.sub(Mask::w, vu.vf00, vu.vf00.w()); + + vu.vf22.add(Mask::z, vu.vf00, vu.vf00.w()); + vu.vf22.ftoi12_check(Mask::z, vu.vf22); + + // this is the vertex transformation loop, unpipelined. + while (vu.vi10 != vu.vi14) { + // lq.xy vf22, 0(vi10) texture load? + lq_buffer(Mask::xy, vu.vf22, vu.vi10); + // lq.xyz vf16, 2(vi10) vertex load + lq_buffer(Mask::xyz, gen.vtx_load0, vu.vi10 + 2); + // mtir vi02, vf22.x grab s coordinate of texture + vu.vi02 = vu.vf22.x_as_u16(); + + // mulaw.xyzw ACC, vf11, vf00 matrix multiply W + vu.acc.mula(Mask::xyzw, gen.mat3, vu.vf00.w()); + + // maddax.xyzw ACC, vf08, vf16 matrix multiply X + vu.acc.madda(Mask::xyzw, gen.mat0, gen.vtx_load0.x()); + + // madday.xyzw ACC, vf09, vf16 matrix multiply Y + vu.acc.madda(Mask::xyzw, gen.mat1, gen.vtx_load0.y()); + + // iand vi06, vi02, vi11 mask s tex coord + vu.vi06 = vu.vi02 & vu.vi11; + + // mfir.x vf22, vi06 replace s coord + vu.vf22.mfir(Mask::x, vu.vi06); + + // maddz.xyzw vf12, vf10, vf16 matrix multiply Z + vu.acc.madd(Mask::xyzw, gen.vtx_p0, gen.mat2, gen.vtx_load0.z()); + + // div Q, vf01.x, vf12.w perspective divide + vu.Q = gen.fog.x() / gen.vtx_p0.w(); + + // itof12.xyz vf18, vf22 texture int to float + vu.vf18.itof12(Mask::xyz, vu.vf22); + + // mul.xyz vf12, vf12, Q persepective divide + gen.vtx_p0.mul(Mask::xyz, gen.vtx_p0, vu.Q); + + // mul.xyz vf18, vf18, Q texture perspective divide + vu.vf18.mul(Mask::xyz, vu.vf18, vu.Q); + + // add.xyzw vf12, vf12, vf04 apply hvdf + gen.vtx_p0.add(Mask::xyzw, gen.vtx_p0, gen.hvdf_off); + + // miniz.w vf12, vf12, vf01 fog clamp + gen.vtx_p0.mini(Mask::w, gen.vtx_p0, gen.fog.z()); + + // maxy.w vf12, vf12, vf01 fog clamp 2 + gen.vtx_p0.max(Mask::w, gen.vtx_p0, gen.fog.y()); + + // addw.w vf12, vf12, vf01 ONLY if vi02 != vi06 fog offset. + if (vu.vi02 != vu.vi06) { + gen.vtx_p0.add(Mask::w, gen.vtx_p0, kFogFloatOffset); + } + // ftoi4.xyzw vf12, vf12 to ints for GS + gen.vtx_p0.ftoi4_check(Mask::xyzw, gen.vtx_p0); + + // store! + sq_buffer(Mask::xyzw, gen.vtx_p0, vu.vi10 + 2); + sq_buffer(Mask::xyzw, vu.vf18, vu.vi10); + + // iaddi vi10, vi10, 0x3 inc vertex pointer + vu.vi10 = vu.vi10 + 3; + } + + + // this loop places giftag templates and adgif shaders + // it allows the same vertices to be drawn several times with different shaders. + bool bc; + // iaddi vi14, vi13, 0x7 | nop 1060 + vu.vi14 = vu.vi13 + 7; + // lq.xyzw vf03, 4(vi13) | nop 1061 + Vf draw_hdr2; + lq_buffer(Mask::xyzw, draw_hdr2, vu.vi13 + 4); + // ilw.w vi02, 6(vi13) | nop 1062 + ilw_buffer(Mask::w, vu.vi02, vu.vi13 + 6); + // lq.xyzw vf21, 5(vi13) | nop 1063 + Vf draw_hdr0; + lq_buffer(Mask::xyzw, draw_hdr0, vu.vi13 + 5); + // lq.xyzw vf22, 6(vi13) | nop 1064 + Vf draw_hdr1; + lq_buffer(Mask::xyzw, draw_hdr1, vu.vi13 + 6); + L83: + // ilwr.w vi03, vi14 | nop 1065 + ilw_buffer(Mask::w, vu.vi03, vu.vi14); + // ilw.w vi04, 1(vi14) | nop 1066 + ilw_buffer(Mask::w, vu.vi04, vu.vi14 + 1); + // lqi.xyzw vf16, vi14 | nop 1067 + Vf adgif_temp0; + lq_buffer(Mask::xyzw, adgif_temp0, vu.vi14++); + // lqi.xyzw vf17, vi14 | nop 1068 + Vf adgif_temp1; + lq_buffer(Mask::xyzw, adgif_temp1, vu.vi14++); + // lqi.xyzw vf18, vi14 | nop 1069 + Vf adgif_temp2; + lq_buffer(Mask::xyzw, adgif_temp2, vu.vi14++); + // lqi.xyzw vf19, vi14 | nop 1070 + Vf adgif_temp3; + lq_buffer(Mask::xyzw, adgif_temp3, vu.vi14++); + // lqi.xyzw vf20, vi14 | nop 1071 + Vf adgif_temp4; + lq_buffer(Mask::xyzw, adgif_temp4, vu.vi14++); + // iadd vi06, vi03, vi12 | nop 1072 + vu.vi06 = vu.vi03 + vu.vi12; + // sqi.xyzw vf02, vi06 | nop 1073 + sq_buffer(Mask::xyzw, gen.adgif_tmpl, vu.vi06++); + // sqi.xyzw vf16, vi06 | nop 1074 + sq_buffer(Mask::xyzw, adgif_temp0, vu.vi06++); + // sqi.xyzw vf17, vi06 | nop 1075 + sq_buffer(Mask::xyzw, adgif_temp1, vu.vi06++); + // sqi.xyzw vf18, vi06 | nop 1076 + sq_buffer(Mask::xyzw, adgif_temp2, vu.vi06++); + // sqi.xyzw vf19, vi06 | nop 1077 + sq_buffer(Mask::xyzw, adgif_temp3, vu.vi06++); + // sqi.xyzw vf20, vi06 | nop 1078 + sq_buffer(Mask::xyzw, adgif_temp4, vu.vi06++); + // sqi.xyzw vf21, vi06 | nop 1079 + sq_buffer(Mask::xyzw, draw_hdr0, vu.vi06++); + // sqi.xyzw vf22, vi06 | nop 1080 + sq_buffer(Mask::xyzw, draw_hdr1, vu.vi06++); + // sqi.xyzw vf03, vi06 | nop 1081 + sq_buffer(Mask::xyzw, draw_hdr2, vu.vi06++); + // BRANCH! + // ibgez vi04, L83 | nop 1082 + bc = ((s16)vu.vi04) >= 0; + // isw.x vi04, -1(vi06) | nop 1083 + isw_buffer(Mask::x, vu.vi04, vu.vi06 + -1); + if (bc) { goto L83; } + + // iadd vi02, vi12, vi02 | nop 1084 + vu.vi02 = vu.vi12 + vu.vi02; + // nop | nop 1085 + + // xgkick vi02 | nop 1086 + xgkick(vu.vi02, render_state, prof); + // isubiu vi01, vi12, 0x22e | nop 1087 + vu.vi01 = vu.vi12 - 0x22e; /* 558 */ + // nop | nop 1088 + + // BRANCH! + // ibltz vi01, L84 | nop 1089 + bc = ((s16)vu.vi01) < 0; + // iaddiu vi12, vi12, 0x117 | nop 1090 + vu.vi12 = vu.vi12 + 0x117; /* 279 */ + if (bc) { goto L84; } + + // iaddi vi12, vi00, 0x0 | nop 1091 + vu.vi12 = 0; + L84: + // nop | nop :e 1092 + + // nop | nop 1093 + + return; +} void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, ScopedProfilerNode& prof) { @@ -166,256 +346,13 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S case 6: goto L33; case 8: - goto L8; + mscal_noclip_nopipe(render_state, prof); + return; default: fmt::print("Generic dispatch mscal: {}\n", imm); ASSERT(false); } - L8: // R - // isubiu vi02, vi13, 0x363 | addw.z vf22, vf00, vf00 109 - vu.vf22.add(Mask::z, vu.vf00, vu.vf00.w()); vu.vi02 = vu.vi13 - 0x363; /* 867 */ - // iaddiu vi13, vi13, 0x1e | addw.z vf23, vf00, vf00 110 - vu.vf23.add(Mask::z, vu.vf00, vu.vf00.w()); vu.vi13 = vu.vi13 + 0x1e; /* 30 */ - // BRANCH! - // ibne vi00, vi02, L9 | addw.z vf24, vf00, vf00 111 - vu.vf24.add(Mask::z, vu.vf00, vu.vf00.w()); bc = (vu.vi02 != 0); - // nop | addw.z vf25, vf00, vf00 112 - vu.vf25.add(Mask::z, vu.vf00, vu.vf00.w()); - if (bc) { goto L9; } - - // iaddiu vi13, vi00, 0x345 | nop 113 - vu.vi13 = 0x345; /* 837 */ - L9: // R - // iaddi vi03, vi13, 0x7 | nop 114 - vu.vi03 = vu.vi13 + 7; - // ilw.w vi01, 5(vi13) | nop 115 - ilw_buffer(Mask::w, vu.vi01, vu.vi13 + 5); - // isw.x vi03, 906(vi00) | nop 116 - isw_buffer(Mask::x, vu.vi03, 906); - // iaddi vi10, vi12, 0x9 | subw.w vf18, vf00, vf00 117 - vu.vf18.sub(Mask::w, vu.vf00, vu.vf00.w()); vu.vi10 = vu.vi12 + 9; - // lq.xyzw vf08, 0(vi13) | subw.w vf19, vf00, vf00 118 - vu.vf19.sub(Mask::w, vu.vf00, vu.vf00.w()); lq_buffer(Mask::xyzw, gen.mat0, vu.vi13); - // lq.xyzw vf09, 1(vi13) | subw.w vf20, vf00, vf00 119 - vu.vf20.sub(Mask::w, vu.vf00, vu.vf00.w()); lq_buffer(Mask::xyzw, gen.mat1, vu.vi13 + 1); - // lq.xyzw vf10, 2(vi13) | subw.w vf21, vf00, vf00 120 - vu.vf21.sub(Mask::w, vu.vf00, vu.vf00.w()); lq_buffer(Mask::xyzw, gen.mat2, vu.vi13 + 2); - // lq.xyzw vf11, 3(vi13) | ftoi12.z vf22, vf22 121 - // fmt::print("a: [{}] [{}]\n", vu.vf22.print(), vu.vf23.print()); - vu.vf22.ftoi12_check(Mask::z, vu.vf22); lq_buffer(Mask::xyzw, gen.mat3, vu.vi13 + 3); - // iadd vi02, vi01, vi01 | ftoi12.z vf23, vf23 122 - vu.vf23.ftoi12_check(Mask::z, vu.vf23); vu.vi02 = vu.vi01 + vu.vi01; - // iadd vi01, vi01, vi02 | sub.xyzw vf16, vf16, vf16 123 - gen.vtx_load0.set_zero(); vu.vi01 = vu.vi01 + vu.vi02; - // iaddi vi11, vi00, -0x2 | sub.xyzw vf17, vf17, vf17 124 - gen.vtx_load1.set_zero(); vu.vi11 = -2; - // lq.xy vf22, 0(vi10) | nop 125 - lq_buffer(Mask::xy, vu.vf22, vu.vi10); - // lq.xyz vf16, 2(vi10) | nop 126 - lq_buffer(Mask::xyz, gen.vtx_load0, vu.vi10 + 2); - // mtir vi02, vf22.x | mulaw.xyzw ACC, vf11, vf00 127 - vu.acc.mula(Mask::xyzw, gen.mat3, vu.vf00.w()); vu.vi02 = vu.vf22.x_as_u16(); - // iaddi vi10, vi10, 0x3 | maddax.xyzw ACC, vf08, vf16 128 - vu.acc.madda(Mask::xyzw, gen.mat0, gen.vtx_load0.x()); vu.vi10 = vu.vi10 + 3; - // lq.xy vf23, 0(vi10) | madday.xyzw ACC, vf09, vf16 129 - vu.acc.madda(Mask::xyzw, gen.mat1, gen.vtx_load0.y()); lq_buffer(Mask::xy, vu.vf23, vu.vi10); - // lq.xyz vf17, 2(vi10) | nop 130 - lq_buffer(Mask::xyz, gen.vtx_load1, vu.vi10 + 2); - // iand vi06, vi02, vi11 | nop 131 - vu.vi06 = vu.vi02 & vu.vi11; - // mfir.x vf22, vi06 | maddz.xyzw vf12, vf10, vf16 132 - vu.acc.madd(Mask::xyzw, gen.vtx_p0, gen.mat2, gen.vtx_load0.z()); vu.vf22.mfir(Mask::x, vu.vi06); - // iadd vi14, vi10, vi01 | ftoi12.z vf24, vf24 133 - // fmt::print("b: [{}] [{}]\n", vu.vf24.print(), vu.vf25.print()); - vu.vf24.ftoi12_check(Mask::z, vu.vf24); vu.vi14 = vu.vi10 + vu.vi01; - // isw.w vi12, 906(vi00) | ftoi12.z vf25, vf25 134 - vu.vf25.ftoi12_check(Mask::z, vu.vf25); isw_buffer(Mask::w, vu.vi12, 906); - // nop | nop 135 - - // div Q, vf01.x, vf12.w | itof12.xyz vf18, vf22 136 - vu.vf18.itof12(Mask::xyz, vu.vf22); vu.Q = gen.fog.x() / gen.vtx_p0.w(); - // mtir vi03, vf23.x | mulaw.xyzw ACC, vf11, vf00 137 - vu.acc.mula(Mask::xyzw, gen.mat3, vu.vf00.w()); vu.vi03 = vu.vf23.x_as_u16(); - // iaddi vi10, vi10, 0x3 | maddax.xyzw ACC, vf08, vf17 138 - vu.acc.madda(Mask::xyzw, gen.mat0, gen.vtx_load1.x()); vu.vi10 = vu.vi10 + 3; - // lq.xy vf24, 0(vi10) | madday.xyzw ACC, vf09, vf17 139 - vu.acc.madda(Mask::xyzw, gen.mat1, gen.vtx_load1.y()); lq_buffer(Mask::xy, vu.vf24, vu.vi10); - // lq.xyz vf16, 2(vi10) | nop 140 - lq_buffer(Mask::xyz, gen.vtx_load0, vu.vi10 + 2); - // iand vi07, vi03, vi11 | nop 141 - vu.vi07 = vu.vi03 & vu.vi11; - // mfir.x vf23, vi07 | maddz.xyzw vf13, vf10, vf17 142 - vu.acc.madd(Mask::xyzw, gen.vtx_p1, gen.mat2, gen.vtx_load1.z()); vu.vf23.mfir(Mask::x, vu.vi07); - // nop | mul.xyz vf12, vf12, Q 143 - gen.vtx_p0.mul(Mask::xyz, gen.vtx_p0, vu.Q); - // nop | mul.xyz vf18, vf18, Q 144 - vu.vf18.mul(Mask::xyz, vu.vf18, vu.Q); - // nop | nop 145 - - // div Q, vf01.x, vf13.w | itof12.xyz vf19, vf23 146 - vu.vf19.itof12(Mask::xyz, vu.vf23); vu.Q = gen.fog.x() / gen.vtx_p1.w(); - // nop | add.xyzw vf12, vf12, vf04 147 - gen.vtx_p0.add(Mask::xyzw, gen.vtx_p0, gen.hvdf_off); - // mtir vi04, vf24.x | mulaw.xyzw ACC, vf11, vf00 148 - vu.acc.mula(Mask::xyzw, gen.mat3, vu.vf00.w()); vu.vi04 = vu.vf24.x_as_u16(); - // iaddi vi10, vi10, 0x3 | maddax.xyzw ACC, vf08, vf16 149 - vu.acc.madda(Mask::xyzw, gen.mat0, gen.vtx_load0.x()); vu.vi10 = vu.vi10 + 3; - // lq.xy vf25, 0(vi10) | madday.xyzw ACC, vf09, vf16 150 - vu.acc.madda(Mask::xyzw, gen.mat1, gen.vtx_load0.y()); lq_buffer(Mask::xy, vu.vf25, vu.vi10); - // lq.xyz vf17, 2(vi10) | miniz.w vf12, vf12, vf01 151 - gen.vtx_p0.mini(Mask::w, gen.vtx_p0, gen.fog.z()); lq_buffer(Mask::xyz, gen.vtx_load1, vu.vi10 + 2); - // iand vi08, vi04, vi11 | nop 152 - vu.vi08 = vu.vi04 & vu.vi11; - // mfir.x vf24, vi08 | maddz.xyzw vf14, vf10, vf16 153 - vu.acc.madd(Mask::xyzw, gen.vtx_p2, gen.mat2, gen.vtx_load0.z()); vu.vf24.mfir(Mask::x, vu.vi08); - // nop | mul.xyz vf13, vf13, Q 154 - gen.vtx_p1.mul(Mask::xyz, gen.vtx_p1, vu.Q); - // nop | mul.xyz vf19, vf19, Q 155 - vu.vf19.mul(Mask::xyz, vu.vf19, vu.Q); - // iaddi vi14, vi14, 0x9 | maxy.w vf12, vf12, vf01 156 - gen.vtx_p0.max(Mask::w, gen.vtx_p0, gen.fog.y()); vu.vi14 = vu.vi14 + 9; - // fmt::print("vf12-1a: [{}]\n", gen.vtx_p0.print()); - -L10: // R - // fmt::print("vf12-1b: [{}]\n", gen.vtx_p0.print()); - - // div Q, vf01.x, vf14.w | itof12.xyz vf20, vf24 157 - vu.vf20.itof12(Mask::xyz, vu.vf24); vu.Q = gen.fog.x() / gen.vtx_p2.w(); - // BRANCH! - // ibeq vi02, vi06, L11 | add.xyzw vf13, vf13, vf04 158 - gen.vtx_p1.add(Mask::xyzw, gen.vtx_p1, gen.hvdf_off); bc = (vu.vi02 == vu.vi06); - // mtir vi05, vf25.x | mulaw.xyzw ACC, vf11, vf00 159 - vu.acc.mula(Mask::xyzw, gen.mat3, vu.vf00.w()); vu.vi05 = vu.vf25.x_as_u16(); - if (bc) { goto L11; } - - // nop | addw.w vf12, vf12, vf01 160 - gen.vtx_p0.add(Mask::w, gen.vtx_p0, kFogFloatOffset); - L11: // R - // iaddi vi10, vi10, 0x3 | maddax.xyzw ACC, vf08, vf17 161 - vu.acc.madda(Mask::xyzw, gen.mat0, gen.vtx_load1.x()); vu.vi10 = vu.vi10 + 3; - // lq.xy vf22, 0(vi10) | madday.xyzw ACC, vf09, vf17 162 - vu.acc.madda(Mask::xyzw, gen.mat1, gen.vtx_load1.y()); lq_buffer(Mask::xy, vu.vf22, vu.vi10); - // lq.xyz vf16, 2(vi10) | miniz.w vf13, vf13, vf01 163 - gen.vtx_p1.mini(Mask::w, gen.vtx_p1, gen.fog.z()); lq_buffer(Mask::xyz, gen.vtx_load0, vu.vi10 + 2); - // fmt::print("vf16 vertex [{}] @ \n", gen.vtx_load0.print(), vu.vi10 + 2); - // iand vi09, vi05, vi11 | ftoi4.xyzw vf12, vf12 164 - gen.vtx_p0.ftoi4_check(Mask::xyzw, gen.vtx_p0); vu.vi09 = vu.vi05 & vu.vi11; - // mfir.x vf25, vi09 | maddz.xyzw vf15, vf10, vf17 165 - vu.acc.madd(Mask::xyzw, gen.vtx_p3, gen.mat2, gen.vtx_load1.z()); vu.vf25.mfir(Mask::x, vu.vi09); - // sq.xyzw vf18, -12(vi10) | mul.xyz vf14, vf14, Q 166 - gen.vtx_p2.mul(Mask::xyz, gen.vtx_p2, vu.Q); sq_buffer(Mask::xyzw, vu.vf18, vu.vi10 + -12); - // BRANCH! - // ibeq vi14, vi10, L15 | mul.xyz vf20, vf20, Q 167 - vu.vf20.mul(Mask::xyz, vu.vf20, vu.Q); bc = (vu.vi14 == vu.vi10); - // fmt::print("store: {} {}\n", vu.vi10 - 10, gen.vtx_p0.print_hex()); - // sq.xyzw vf12, -10(vi10) | maxy.w vf13, vf13, vf01 168 - gen.vtx_p1.max(Mask::w, gen.vtx_p1, gen.fog.y()); sq_buffer(Mask::xyzw, gen.vtx_p0, vu.vi10 + -10); - if (bc) { goto L15; } - - // div Q, vf01.x, vf15.w | itof12.xyz vf21, vf25 169 - vu.vf21.itof12(Mask::xyz, vu.vf25); vu.Q = gen.fog.x() / gen.vtx_p3.w(); - // BRANCH! - // ibeq vi03, vi07, L12 | add.xyzw vf14, vf14, vf04 170 - gen.vtx_p2.add(Mask::xyzw, gen.vtx_p2, gen.hvdf_off); bc = (vu.vi03 == vu.vi07); - // mtir vi02, vf22.x | mulaw.xyzw ACC, vf11, vf00 171 - vu.acc.mula(Mask::xyzw, gen.mat3, vu.vf00.w()); vu.vi02 = vu.vf22.x_as_u16(); - if (bc) { goto L12; } - - // nop | addw.w vf13, vf13, vf01 172 - gen.vtx_p1.add(Mask::w, gen.vtx_p1, kFogFloatOffset); - L12: // R - // iaddi vi10, vi10, 0x3 | maddax.xyzw ACC, vf08, vf16 173 - vu.acc.madda(Mask::xyzw, gen.mat0, gen.vtx_load0.x()); vu.vi10 = vu.vi10 + 3; - // lq.xy vf23, 0(vi10) | madday.xyzw ACC, vf09, vf16 174 - vu.acc.madda(Mask::xyzw, gen.mat1, gen.vtx_load0.y()); lq_buffer(Mask::xy, vu.vf23, vu.vi10); - // lq.xyz vf17, 2(vi10) | miniz.w vf14, vf14, vf01 175 - gen.vtx_p2.mini(Mask::w, gen.vtx_p2, gen.fog.z()); lq_buffer(Mask::xyz, gen.vtx_load1, vu.vi10 + 2); - // iand vi06, vi02, vi11 | ftoi4.xyzw vf13, vf13 176 - gen.vtx_p1.ftoi4_check(Mask::xyzw, gen.vtx_p1); vu.vi06 = vu.vi02 & vu.vi11; - // mfir.x vf22, vi06 | maddz.xyzw vf12, vf10, vf16 177 - vu.acc.madd(Mask::xyzw, gen.vtx_p0, gen.mat2, gen.vtx_load0.z()); vu.vf22.mfir(Mask::x, vu.vi06); - // fmt::print("vf12 transformed: [{}]\n", gen.vtx_p0.print()); - // sq.xyzw vf19, -12(vi10) | mul.xyz vf15, vf15, Q 178 - gen.vtx_p3.mul(Mask::xyz, gen.vtx_p3, vu.Q); sq_buffer(Mask::xyzw, vu.vf19, vu.vi10 + -12); - // BRANCH! - // ibeq vi14, vi10, L15 | mul.xyz vf21, vf21, Q 179 - vu.vf21.mul(Mask::xyz, vu.vf21, vu.Q); bc = (vu.vi14 == vu.vi10); - // sq.xyzw vf13, -10(vi10) | maxy.w vf14, vf14, vf01 180 - gen.vtx_p2.max(Mask::w, gen.vtx_p2, gen.fog.y()); sq_buffer(Mask::xyzw, gen.vtx_p1, vu.vi10 + -10); - if (bc) { goto L15; } - - // div Q, vf01.x, vf12.w | itof12.xyz vf18, vf22 181 - vu.vf18.itof12(Mask::xyz, vu.vf22); vu.Q = gen.fog.x() / gen.vtx_p0.w(); - // BRANCH! - // ibeq vi04, vi08, L13 | add.xyzw vf15, vf15, vf04 182 - gen.vtx_p3.add(Mask::xyzw, gen.vtx_p3, gen.hvdf_off); bc = (vu.vi04 == vu.vi08); - // mtir vi03, vf23.x | mulaw.xyzw ACC, vf11, vf00 183 - vu.acc.mula(Mask::xyzw, gen.mat3, vu.vf00.w()); vu.vi03 = vu.vf23.x_as_u16(); - if (bc) { goto L13; } - - // nop | addw.w vf14, vf14, vf01 184 - gen.vtx_p2.add(Mask::w, gen.vtx_p2, kFogFloatOffset); - L13: // R - // iaddi vi10, vi10, 0x3 | maddax.xyzw ACC, vf08, vf17 185 - vu.acc.madda(Mask::xyzw, gen.mat0, gen.vtx_load1.x()); vu.vi10 = vu.vi10 + 3; - // lq.xy vf24, 0(vi10) | madday.xyzw ACC, vf09, vf17 186 - vu.acc.madda(Mask::xyzw, gen.mat1, gen.vtx_load1.y()); lq_buffer(Mask::xy, vu.vf24, vu.vi10); - // lq.xyz vf16, 2(vi10) | miniz.w vf15, vf15, vf01 187 - gen.vtx_p3.mini(Mask::w, gen.vtx_p3, gen.fog.z()); lq_buffer(Mask::xyz, gen.vtx_load0, vu.vi10 + 2); - // iand vi07, vi03, vi11 | ftoi4.xyzw vf14, vf14 188 - gen.vtx_p2.ftoi4_check(Mask::xyzw, gen.vtx_p2); vu.vi07 = vu.vi03 & vu.vi11; - // mfir.x vf23, vi07 | maddz.xyzw vf13, vf10, vf17 189 - vu.acc.madd(Mask::xyzw, gen.vtx_p1, gen.mat2, gen.vtx_load1.z()); vu.vf23.mfir(Mask::x, vu.vi07); - // sq.xyzw vf20, -12(vi10) | mul.xyz vf12, vf12, Q 190 - gen.vtx_p0.mul(Mask::xyz, gen.vtx_p0, vu.Q); sq_buffer(Mask::xyzw, vu.vf20, vu.vi10 + -12); - // BRANCH! - // ibeq vi14, vi10, L15 | mul.xyz vf18, vf18, Q 191 - vu.vf18.mul(Mask::xyz, vu.vf18, vu.Q); bc = (vu.vi14 == vu.vi10); - // sq.xyzw vf14, -10(vi10) | maxy.w vf15, vf15, vf01 192 - gen.vtx_p3.max(Mask::w, gen.vtx_p3, gen.fog.y()); sq_buffer(Mask::xyzw, gen.vtx_p2, vu.vi10 + -10); - if (bc) { goto L15; } - - // div Q, vf01.x, vf13.w | itof12.xyz vf19, vf23 193 - vu.vf19.itof12(Mask::xyz, vu.vf23); vu.Q = gen.fog.x() / gen.vtx_p1.w(); - // BRANCH! - // ibeq vi05, vi09, L14 | add.xyzw vf12, vf12, vf04 194 - gen.vtx_p0.add(Mask::xyzw, gen.vtx_p0, gen.hvdf_off); bc = (vu.vi05 == vu.vi09); - // mtir vi04, vf24.x | mulaw.xyzw ACC, vf11, vf00 195 - vu.acc.mula(Mask::xyzw, gen.mat3, vu.vf00.w()); vu.vi04 = vu.vf24.x_as_u16(); - if (bc) { goto L14; } - - // nop | addw.w vf15, vf15, vf01 196 - gen.vtx_p3.add(Mask::w, gen.vtx_p3, kFogFloatOffset); - L14: // R - // iaddi vi10, vi10, 0x3 | maddax.xyzw ACC, vf08, vf16 197 - vu.acc.madda(Mask::xyzw, gen.mat0, gen.vtx_load0.x()); vu.vi10 = vu.vi10 + 3; - // lq.xy vf25, 0(vi10) | madday.xyzw ACC, vf09, vf16 198 - vu.acc.madda(Mask::xyzw, gen.mat1, gen.vtx_load0.y()); lq_buffer(Mask::xy, vu.vf25, vu.vi10); - // lq.xyz vf17, 2(vi10) | miniz.w vf12, vf12, vf01 199 - gen.vtx_p0.mini(Mask::w, gen.vtx_p0, gen.fog.z()); lq_buffer(Mask::xyz, gen.vtx_load1, vu.vi10 + 2); - // iand vi08, vi04, vi11 | ftoi4.xyzw vf15, vf15 200 - gen.vtx_p3.ftoi4_check(Mask::xyzw, gen.vtx_p3); vu.vi08 = vu.vi04 & vu.vi11; - // mfir.x vf24, vi08 | maddz.xyzw vf14, vf10, vf16 201 - vu.acc.madd(Mask::xyzw, gen.vtx_p2, gen.mat2, gen.vtx_load0.z()); vu.vf24.mfir(Mask::x, vu.vi08); - // sq.xyzw vf21, -12(vi10) | mul.xyz vf13, vf13, Q 202 - gen.vtx_p1.mul(Mask::xyz, gen.vtx_p1, vu.Q); sq_buffer(Mask::xyzw, vu.vf21, vu.vi10 + -12); - // BRANCH! - // ibne vi14, vi10, L10 | mul.xyz vf19, vf19, Q 203 - vu.vf19.mul(Mask::xyz, vu.vf19, vu.Q); bc = (vu.vi14 != vu.vi10); - // sq.xyzw vf15, -10(vi10) | maxy.w vf12, vf12, vf01 204 - // fmt::print("reloop {} {}\n", vu.vi14, vu.vi10); - gen.vtx_p0.max(Mask::w, gen.vtx_p0, gen.fog.y()); sq_buffer(Mask::xyzw, gen.vtx_p3, vu.vi10 + -10); - if (bc) { goto L10; } - - L15: // R - // BRANCH! - // b L82 | nop 205 - bc = true; - // ilw.w vi12, 906(vi00) | nop 206 - ilw_buffer(Mask::w, vu.vi12, 906); - if (bc) { goto L82; } - L33: // R // isubiu vi02, vi13, 0x363 | addw.z vf22, vf00, vf00 360 vu.vf22.add(Mask::z, vu.vf00, vu.vf00.w()); vu.vi02 = vu.vi13 - 0x363; /* 867 */ diff --git a/game/graphics/opengl_renderer/GenericRenderer.h b/game/graphics/opengl_renderer/GenericRenderer.h index f1ddd8475a..2c878e15e4 100644 --- a/game/graphics/opengl_renderer/GenericRenderer.h +++ b/game/graphics/opengl_renderer/GenericRenderer.h @@ -21,6 +21,7 @@ class GenericRenderer : public BucketRenderer { void mscal(int imm, SharedRenderState* render_state, ScopedProfilerNode& prof); void mscal0(); void mscal_dispatch(int imm, SharedRenderState* render_state, ScopedProfilerNode& prof); + void mscal_noclip_nopipe(SharedRenderState* render_state, ScopedProfilerNode& prof); void handle_dma_stream(const u8* data, u32 bytes, SharedRenderState* render_state, diff --git a/goal_src/engine/anim/bones.gc b/goal_src/engine/anim/bones.gc index 18f1dd576f..714f6e1aad 100644 --- a/goal_src/engine/anim/bones.gc +++ b/goal_src/engine/anim/bones.gc @@ -1147,6 +1147,7 @@ ) ) ;; final two cases are for all other effects that don't matter generic/normal + ;; here is a good place to force mercneric. ((nonzero? (-> *merc-bucket-info* must-use-mercneric-for-clip)) (set! (-> *merc-bucket-info* effect effect-idx use-mercneric) (the-as uint 1)) (set! used-mercneric 1) From 4e6c3049753425da6bd266d99ac7c9cd1c9bf1bd Mon Sep 17 00:00:00 2001 From: water Date: Sat, 5 Mar 2022 17:18:13 -0500 Subject: [PATCH 04/12] generic2 dma --- common/dma/gs.h | 2 + game/CMakeLists.txt | 10 +- .../opengl_renderer/GenericProgram.cpp | 8 + .../opengl_renderer/GenericRenderer.cpp | 12 +- .../opengl_renderer/GenericRenderer.h | 2 + .../opengl_renderer/OpenGLRenderer.cpp | 4 +- game/graphics/opengl_renderer/SkyRenderer.h | 2 +- game/graphics/opengl_renderer/Sprite3.cpp | 2 +- game/graphics/opengl_renderer/Sprite3.h | 2 +- .../opengl_renderer/SpriteRenderer.cpp | 2 +- .../{tfrag => background}/TFragment.cpp | 0 .../{tfrag => background}/TFragment.h | 3 +- .../{tfrag => background}/Tfrag3.cpp | 0 .../{tfrag => background}/Tfrag3.h | 3 +- .../{tfrag => background}/Tie3.cpp | 0 .../{tfrag => background}/Tie3.h | 2 +- .../background_common.cpp} | 2 +- .../background_common.h} | 0 .../opengl_renderer/foreground/Generic2.cpp | 63 +++ .../opengl_renderer/foreground/Generic2.h | 105 +++++ .../foreground/Generic2_DMA.cpp | 375 ++++++++++++++++++ 21 files changed, 583 insertions(+), 16 deletions(-) rename game/graphics/opengl_renderer/{tfrag => background}/TFragment.cpp (100%) rename game/graphics/opengl_renderer/{tfrag => background}/TFragment.h (95%) rename game/graphics/opengl_renderer/{tfrag => background}/Tfrag3.cpp (100%) rename game/graphics/opengl_renderer/{tfrag => background}/Tfrag3.h (97%) rename game/graphics/opengl_renderer/{tfrag => background}/Tie3.cpp (100%) rename game/graphics/opengl_renderer/{tfrag => background}/Tie3.h (98%) rename game/graphics/opengl_renderer/{tfrag/tfrag_common.cpp => background/background_common.cpp} (99%) rename game/graphics/opengl_renderer/{tfrag/tfrag_common.h => background/background_common.h} (100%) create mode 100644 game/graphics/opengl_renderer/foreground/Generic2.cpp create mode 100644 game/graphics/opengl_renderer/foreground/Generic2.h create mode 100644 game/graphics/opengl_renderer/foreground/Generic2_DMA.cpp diff --git a/common/dma/gs.h b/common/dma/gs.h index 08a0375b6d..67b181bd8f 100644 --- a/common/dma/gs.h +++ b/common/dma/gs.h @@ -354,6 +354,8 @@ struct AdGifData { u64 alpha_addr; }; +static_assert(sizeof(AdGifData) == 5 * 16); + // this represents all of the drawing state, stored as an integer. // it can also represent "invalid". class DrawMode { diff --git a/game/CMakeLists.txt b/game/CMakeLists.txt index d153db8f38..7c446a4e70 100644 --- a/game/CMakeLists.txt +++ b/game/CMakeLists.txt @@ -74,6 +74,12 @@ set(RUNTIME_SOURCE graphics/gfx.cpp graphics/display.cpp graphics/sceGraphicsInterface.cpp + graphics/opengl_renderer/background/background_common.cpp + graphics/opengl_renderer/background/Tfrag3.cpp + graphics/opengl_renderer/background/TFragment.cpp + graphics/opengl_renderer/background/Tie3.cpp + graphics/opengl_renderer/foreground/Generic2.cpp + graphics/opengl_renderer/foreground/Generic2_DMA.cpp graphics/opengl_renderer/BucketRenderer.cpp graphics/opengl_renderer/debug_gui.cpp graphics/opengl_renderer/DirectRenderer.cpp @@ -95,10 +101,6 @@ set(RUNTIME_SOURCE graphics/opengl_renderer/Sprite3.cpp graphics/opengl_renderer/SpriteRenderer.cpp graphics/opengl_renderer/TextureUploadHandler.cpp - graphics/opengl_renderer/tfrag/Tfrag3.cpp - graphics/opengl_renderer/tfrag/tfrag_common.cpp - graphics/opengl_renderer/tfrag/TFragment.cpp - graphics/opengl_renderer/tfrag/Tie3.cpp graphics/texture/TextureConverter.cpp graphics/texture/TexturePool.cpp graphics/pipelines/opengl.cpp diff --git a/game/graphics/opengl_renderer/GenericProgram.cpp b/game/graphics/opengl_renderer/GenericProgram.cpp index bb1d31daec..582a980565 100644 --- a/game/graphics/opengl_renderer/GenericProgram.cpp +++ b/game/graphics/opengl_renderer/GenericProgram.cpp @@ -163,6 +163,8 @@ void GenericRenderer::mscal_noclip_nopipe(SharedRenderState *render_state, Scope if (vu.vi02 == 0) { vu.vi13 = 0x345; /* 837 */ } + + vu.vi03 = vu.vi13 + 7; ilw_buffer(Mask::w, vu.vi01, vu.vi13 + 5); isw_buffer(Mask::x, vu.vi03, 906); @@ -240,8 +242,14 @@ void GenericRenderer::mscal_noclip_nopipe(SharedRenderState *render_state, Scope // store! sq_buffer(Mask::xyzw, gen.vtx_p0, vu.vi10 + 2); + vu.vf18.x() /= vu.vf18.z(); + vu.vf18.y() /= vu.vf18.z(); + vu.vf18.z() = 1.f; + + // fmt::print("tex.z = {}\n", vu.vf18.z()); sq_buffer(Mask::xyzw, vu.vf18, vu.vi10); + // iaddi vi10, vi10, 0x3 inc vertex pointer vu.vi10 = vu.vi10 + 3; } diff --git a/game/graphics/opengl_renderer/GenericRenderer.cpp b/game/graphics/opengl_renderer/GenericRenderer.cpp index 8b687e6280..3efe1a8684 100644 --- a/game/graphics/opengl_renderer/GenericRenderer.cpp +++ b/game/graphics/opengl_renderer/GenericRenderer.cpp @@ -4,7 +4,8 @@ GenericRenderer::GenericRenderer(const std::string& name, BucketId my_id) : BucketRenderer(name, my_id), m_direct(name, my_id, 0x30000), - m_direct2(30000, 60000, 1000, name, true) {} + m_direct2(30000, 60000, 1000, name, true), + m_debug_gen2(name, my_id, 50000, 1000, 1000) {} void GenericRenderer::init_shaders(ShaderLibrary& shaders) { m_direct2.init_shaders(shaders); @@ -32,6 +33,9 @@ void GenericRenderer::render(DmaFollower& dma, return; } + // todo remove + DmaFollower gen2_follower = dma; + while (dma.current_tag_offset() != render_state->next_bucket) { auto data = dma.read_and_advance(); m_debug += fmt::format("{} : {} {}\n", data.size_bytes, data.vifcode0().print(), @@ -142,6 +146,12 @@ void GenericRenderer::render(DmaFollower& dma, } else { m_direct.flush_pending(render_state, prof); } + + { + // todo remove + auto pp = prof.make_scoped_child("gen2"); + m_debug_gen2.render(gen2_follower, render_state, pp); + } } void GenericRenderer::handle_dma_stream(const u8* data, diff --git a/game/graphics/opengl_renderer/GenericRenderer.h b/game/graphics/opengl_renderer/GenericRenderer.h index 2c878e15e4..02b67cc502 100644 --- a/game/graphics/opengl_renderer/GenericRenderer.h +++ b/game/graphics/opengl_renderer/GenericRenderer.h @@ -3,6 +3,7 @@ #include "game/graphics/opengl_renderer/BucketRenderer.h" #include "game/graphics/opengl_renderer/DirectRenderer.h" #include "game/graphics/opengl_renderer/DirectRenderer2.h" +#include "game/graphics/opengl_renderer/foreground/Generic2.h" #include "game/common/vu.h" class GenericRenderer : public BucketRenderer { @@ -43,6 +44,7 @@ class GenericRenderer : public BucketRenderer { int m_skipped_tags = 0; DirectRenderer m_direct; DirectRenderer2 m_direct2; + Generic2 m_debug_gen2; std::string m_debug; struct Vu { diff --git a/game/graphics/opengl_renderer/OpenGLRenderer.cpp b/game/graphics/opengl_renderer/OpenGLRenderer.cpp index f214635f4a..b9fa9a82c3 100644 --- a/game/graphics/opengl_renderer/OpenGLRenderer.cpp +++ b/game/graphics/opengl_renderer/OpenGLRenderer.cpp @@ -9,8 +9,8 @@ #include "common/util/FileUtil.h" #include "game/graphics/opengl_renderer/SkyRenderer.h" #include "game/graphics/opengl_renderer/Sprite3.h" -#include "game/graphics/opengl_renderer/tfrag/TFragment.h" -#include "game/graphics/opengl_renderer/tfrag/Tie3.h" +#include "game/graphics/opengl_renderer/background/TFragment.h" +#include "game/graphics/opengl_renderer/background/Tie3.h" #include "game/graphics/opengl_renderer/MercRenderer.h" #include "game/graphics/opengl_renderer/EyeRenderer.h" #include "game/graphics/opengl_renderer/GenericRenderer.h" diff --git a/game/graphics/opengl_renderer/SkyRenderer.h b/game/graphics/opengl_renderer/SkyRenderer.h index 4b7542e6a2..019518413f 100644 --- a/game/graphics/opengl_renderer/SkyRenderer.h +++ b/game/graphics/opengl_renderer/SkyRenderer.h @@ -2,7 +2,7 @@ #pragma once #include "game/graphics/opengl_renderer/BucketRenderer.h" #include "game/graphics/opengl_renderer/DirectRenderer.h" -#include "game/graphics/opengl_renderer/tfrag/TFragment.h" +#include "game/graphics/opengl_renderer/background/TFragment.h" #include "game/graphics//opengl_renderer/SkyBlendGPU.h" #include "game/graphics//opengl_renderer/SkyBlendCPU.h" diff --git a/game/graphics/opengl_renderer/Sprite3.cpp b/game/graphics/opengl_renderer/Sprite3.cpp index a589629cd6..04a0e35766 100644 --- a/game/graphics/opengl_renderer/Sprite3.cpp +++ b/game/graphics/opengl_renderer/Sprite3.cpp @@ -5,7 +5,7 @@ #include "third-party/fmt/core.h" #include "third-party/imgui/imgui.h" #include "game/graphics/opengl_renderer/dma_helpers.h" -#include "game/graphics/opengl_renderer/tfrag/tfrag_common.h" +#include "game/graphics/opengl_renderer/background/background_common.h" namespace { diff --git a/game/graphics/opengl_renderer/Sprite3.h b/game/graphics/opengl_renderer/Sprite3.h index 86fd083fca..3ebc43d519 100644 --- a/game/graphics/opengl_renderer/Sprite3.h +++ b/game/graphics/opengl_renderer/Sprite3.h @@ -6,7 +6,7 @@ #include "common/dma/gs.h" #include "common/math/Vector.h" #include "game/graphics/opengl_renderer/sprite_common.h" -#include "game/graphics/opengl_renderer/tfrag/tfrag_common.h" +#include "game/graphics/opengl_renderer/background/background_common.h" #include diff --git a/game/graphics/opengl_renderer/SpriteRenderer.cpp b/game/graphics/opengl_renderer/SpriteRenderer.cpp index 6b24414c16..73a15ecfd3 100644 --- a/game/graphics/opengl_renderer/SpriteRenderer.cpp +++ b/game/graphics/opengl_renderer/SpriteRenderer.cpp @@ -2,7 +2,7 @@ #include "third-party/imgui/imgui.h" #include "SpriteRenderer.h" #include "game/graphics/opengl_renderer/dma_helpers.h" -#include "game/graphics/opengl_renderer/tfrag/tfrag_common.h" +#include "game/graphics/opengl_renderer/background/background_common.h" namespace { diff --git a/game/graphics/opengl_renderer/tfrag/TFragment.cpp b/game/graphics/opengl_renderer/background/TFragment.cpp similarity index 100% rename from game/graphics/opengl_renderer/tfrag/TFragment.cpp rename to game/graphics/opengl_renderer/background/TFragment.cpp diff --git a/game/graphics/opengl_renderer/tfrag/TFragment.h b/game/graphics/opengl_renderer/background/TFragment.h similarity index 95% rename from game/graphics/opengl_renderer/tfrag/TFragment.h rename to game/graphics/opengl_renderer/background/TFragment.h index a3120f90b7..b684ec61d2 100644 --- a/game/graphics/opengl_renderer/tfrag/TFragment.h +++ b/game/graphics/opengl_renderer/background/TFragment.h @@ -2,7 +2,8 @@ #include "game/graphics/opengl_renderer/BucketRenderer.h" #include "game/graphics/opengl_renderer/DirectRenderer.h" -#include "game/graphics/opengl_renderer/tfrag/Tfrag3.h" +#include "game/graphics/opengl_renderer/background/Tfrag3.h" +#include "game/graphics/opengl_renderer/background/Tie3.h" #include "common/dma/gs.h" #include "common/math/Vector.h" diff --git a/game/graphics/opengl_renderer/tfrag/Tfrag3.cpp b/game/graphics/opengl_renderer/background/Tfrag3.cpp similarity index 100% rename from game/graphics/opengl_renderer/tfrag/Tfrag3.cpp rename to game/graphics/opengl_renderer/background/Tfrag3.cpp diff --git a/game/graphics/opengl_renderer/tfrag/Tfrag3.h b/game/graphics/opengl_renderer/background/Tfrag3.h similarity index 97% rename from game/graphics/opengl_renderer/tfrag/Tfrag3.h rename to game/graphics/opengl_renderer/background/Tfrag3.h index bbfe6f656a..5e7f282b55 100644 --- a/game/graphics/opengl_renderer/tfrag/Tfrag3.h +++ b/game/graphics/opengl_renderer/background/Tfrag3.h @@ -5,8 +5,7 @@ #include "game/graphics/gfx.h" #include "game/graphics/opengl_renderer/BucketRenderer.h" #include "game/graphics/pipelines/opengl.h" -#include "game/graphics/opengl_renderer/tfrag/tfrag_common.h" -#include "game/graphics/opengl_renderer/tfrag/Tie3.h" +#include "game/graphics/opengl_renderer/background/background_common.h" class Tfrag3 { public: diff --git a/game/graphics/opengl_renderer/tfrag/Tie3.cpp b/game/graphics/opengl_renderer/background/Tie3.cpp similarity index 100% rename from game/graphics/opengl_renderer/tfrag/Tie3.cpp rename to game/graphics/opengl_renderer/background/Tie3.cpp diff --git a/game/graphics/opengl_renderer/tfrag/Tie3.h b/game/graphics/opengl_renderer/background/Tie3.h similarity index 98% rename from game/graphics/opengl_renderer/tfrag/Tie3.h rename to game/graphics/opengl_renderer/background/Tie3.h index 1305eb7ca0..2478c25420 100644 --- a/game/graphics/opengl_renderer/tfrag/Tie3.h +++ b/game/graphics/opengl_renderer/background/Tie3.h @@ -3,7 +3,7 @@ #include #include "game/graphics/gfx.h" -#include "game/graphics/opengl_renderer/tfrag/tfrag_common.h" +#include "game/graphics/opengl_renderer/background/background_common.h" #include "game/graphics/opengl_renderer/BucketRenderer.h" #include "game/graphics/pipelines/opengl.h" #include "common/util/FilteredValue.h" diff --git a/game/graphics/opengl_renderer/tfrag/tfrag_common.cpp b/game/graphics/opengl_renderer/background/background_common.cpp similarity index 99% rename from game/graphics/opengl_renderer/tfrag/tfrag_common.cpp rename to game/graphics/opengl_renderer/background/background_common.cpp index 4354da4c5c..92027c5734 100644 --- a/game/graphics/opengl_renderer/tfrag/tfrag_common.cpp +++ b/game/graphics/opengl_renderer/background/background_common.cpp @@ -1,6 +1,6 @@ -#include "tfrag_common.h" +#include "background_common.h" #include "game/graphics/opengl_renderer/BucketRenderer.h" #include "game/graphics/pipelines/opengl.h" #include "common/util/os.h" diff --git a/game/graphics/opengl_renderer/tfrag/tfrag_common.h b/game/graphics/opengl_renderer/background/background_common.h similarity index 100% rename from game/graphics/opengl_renderer/tfrag/tfrag_common.h rename to game/graphics/opengl_renderer/background/background_common.h diff --git a/game/graphics/opengl_renderer/foreground/Generic2.cpp b/game/graphics/opengl_renderer/foreground/Generic2.cpp new file mode 100644 index 0000000000..9cc89ed29b --- /dev/null +++ b/game/graphics/opengl_renderer/foreground/Generic2.cpp @@ -0,0 +1,63 @@ +#include "Generic2.h" + +Generic2::Generic2(const std::string& name, + BucketId my_id, + u32 num_verts, + u32 num_frags, + u32 num_adgif) + : BucketRenderer(name, my_id) { + m_verts.resize(num_verts); + m_fragments.resize(num_frags); + m_adgifs.resize(num_adgif); +} + +void Generic2::draw_debug_window() {} + +/*! + * Main render function for Generic2. This will be passed a DMA "follower" from the main + * OpenGLRenderer that can read a DMA chain, starting at the DMA "bucket" that was filled by the + * generic renderer. This renderer is expected to follow the chain until it reaches "next_bucket" + * and then return. + */ +void Generic2::render(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof) { + // completely clear out state. These will get populated by the rendering functions, then displayed + // by draw_debug_window() if the user opens that window + m_debug.clear(); + m_stats = Stats(); + + // if the user has asked to disable the renderer, just advance the dma follower to the next + // bucket and return immediately. + if (!m_enabled) { + while (dma.current_tag_offset() != render_state->next_bucket) { + dma.read_and_advance(); + } + return; + } + + // Generic2 has 3 passes. + { + // our first pass is to go over the DMA chain from the game and extract the data into buffers + // Timer proc_dma_timer; + auto p = prof.make_scoped_child("dma"); + process_dma(dma, render_state->next_bucket); + // fmt::print("dma: {} in {:.3f} ms\n", m_next_free_vert, proc_dma_timer.getMs()); + } + + { + // the next pass is to look at all of that data, and figure out the best order to draw it + // using OpenGL + auto p = prof.make_scoped_child("setup"); + setup_draws(); + } + + { + // the final pass is the actual drawing. + auto p = prof.make_scoped_child("drawing"); + do_draws(); + } +} + + +void Generic2::setup_draws() {} + +void Generic2::do_draws() {} \ No newline at end of file diff --git a/game/graphics/opengl_renderer/foreground/Generic2.h b/game/graphics/opengl_renderer/foreground/Generic2.h new file mode 100644 index 0000000000..672df4f546 --- /dev/null +++ b/game/graphics/opengl_renderer/foreground/Generic2.h @@ -0,0 +1,105 @@ +#pragma once + +#include "game/graphics/opengl_renderer/BucketRenderer.h" + +class Generic2 : public BucketRenderer { + public: + Generic2(const std::string& name, BucketId my_id, u32 num_verts, u32 num_frags, u32 num_adgif); + void render(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof) override; + void draw_debug_window() override; + // void init_shaders(ShaderLibrary& shaders) override; + + struct Vertex { + math::Vector xyz; + math::Vector rgba; + math::Vector st; // 16 + u8 tex_unit; + u8 flags; + u8 fog; + u8 pad; + u32 pad2; + }; + static_assert(sizeof(Vertex) == 32); + + private: + void reset_buffers(); + void process_dma(DmaFollower& dma, u32 next_bucket); + void setup_draws(); + void do_draws(); + bool check_for_end_of_generic_data(DmaFollower& dma, u32 next_bucket); + + bool handle_bucket_setup_dma(DmaFollower& dma, u32 next_bucket); + + struct GenericDraw { + u32 first_vert = -1; + u32 verts = -1; + u16 mscal = 0; + }; + + struct { + u32 stcycl; + } m_dma_unpack; + + struct DrawingConfig { + bool zmsk = false; + // horizontal, vertical, depth, fog offsets. + math::Vector4f hvdf_offset; + float pfog0; // scale factor for perspective divide + float fog_min, fog_max; // clamp for fog + + } m_drawing_config; + + static constexpr u32 FRAG_HEADER_SIZE = 16 * 7; + struct Fragment { + u8 header[FRAG_HEADER_SIZE]; + u32 adgif_idx = 0; + u32 adgif_count = 0; + + u32 vtx_idx = 0; + u32 vtx_count = 0; + u8 mscal_addr = 0; + }; + u32 handle_fragments_after_unpack_v4_32(const u8* data, + u32 off, + u32 first_unpack_bytes, + u32 next_bucket, + u32 end_of_vif, + Fragment* frag, bool loop); + + + + u32 m_next_free_frag = 0; + std::vector m_fragments; + + u32 m_next_free_vert = 0; + std::vector m_verts; + + struct Adgif { + AdGifData data; + u32 ee_mem_addr; + }; + + u32 m_next_free_adgif = 0; + std::vector m_adgifs; + + Fragment& next_frag() { + ASSERT(m_next_free_frag < m_fragments.size()); + return m_fragments[m_next_free_frag++]; + } + + Adgif& next_adgif() { + ASSERT(m_next_free_adgif < m_adgifs.size()); + return m_adgifs[m_next_free_adgif++]; + } + + void alloc_vtx(int count) { + m_next_free_vert += count; + ASSERT(m_next_free_vert < m_verts.size()); + } + + std::string m_debug; + + struct Stats { + u32 dma_tags = 0; + } m_stats; +}; diff --git a/game/graphics/opengl_renderer/foreground/Generic2_DMA.cpp b/game/graphics/opengl_renderer/foreground/Generic2_DMA.cpp new file mode 100644 index 0000000000..be45d644c5 --- /dev/null +++ b/game/graphics/opengl_renderer/foreground/Generic2_DMA.cpp @@ -0,0 +1,375 @@ +#include "Generic2.h" + +/*! + * Advance through DMA data that has no effect on rendering (NOP codes) and see if this is the + * end of the data. + * The DmaFollower will either point to the start of the next bucket (and the function will return + * true), or to the beginning of the next non-NOP DMA for this bucket. + */ +bool Generic2::check_for_end_of_generic_data(DmaFollower& dma, u32 next_bucket) { + while (dma.current_tag().qwc == 0 && dma.current_tag_vifcode0().kind == VifCode::Kind::NOP && + dma.current_tag_vifcode1().kind == VifCode::Kind::NOP) { + // this "CALL" tag is inserted by the engine to reset the GS. It's always inserted at the end of + // the bucket. if we see it here, we should be able to skip over this resetting stuff (always 4 + // tags) and then see the start of the next bucket. + if (dma.current_tag().kind == DmaTag::Kind::CALL) { + for (int i = 0; i < 4; i++) { + dma.read_and_advance(); + m_stats.dma_tags++; + } + ASSERT(dma.current_tag_offset() == next_bucket); + return true; + } + m_stats.dma_tags++; + dma.read_and_advance(); + } + return false; +} + +/*! + * Process the first DMA of a generic bucket. + * Return true if the generic bucket is empty and there is nothing to do. + * + * Otherwise, populates m_drawing_config which contains the common draw settings for all data being + * rendered in this bucket. + */ +bool Generic2::handle_bucket_setup_dma(DmaFollower& dma, u32 next_bucket) { + // if the engine didn't run the generic renderer setup function, this bucket will end here. + if (check_for_end_of_generic_data(dma, next_bucket)) { + return true; + } + + // next, the generic setup. This reads the data generated by generic-init-buf. + + // setup packet 1 is GS settings + auto test_and_zbuf = dma.read_and_advance(); + ASSERT(test_and_zbuf.size_bytes == 48); + // first qw is the gif tag. Can ignore. + // second qw is test, this is always the same, so can ignore it too. + // (new 'static 'gs-test + // :ate #x1 + // :atst (gs-atest greater-equal) + // :aref #x26 + // :afail #x1 + // :zte #x1 + // :ztst (gs-ztest greater-equal) + // ) + // third qw is zbuf: + // the only thing that changes is zmsk, we need to store this value for later. + u64 zbuf_val; + memcpy(&zbuf_val, test_and_zbuf.data + 32, 8); + m_drawing_config.zmsk = GsZbuf(zbuf_val).zmsk(); + + // setup packet 2 is constants that normally go to VU1 data memory. + // we're not going to be super strict checking the exact details of the unpack command, it's + // a waste of time since we're the ones generating it anyway. + auto constants = dma.read_and_advance(); + ASSERT(constants.size_bytes == 160); + ASSERT(constants.vifcode0().kind == VifCode::Kind::STCYCL); + ASSERT(constants.vifcode1().kind == VifCode::Kind::UNPACK_V4_32); + + // (fog vector :inline :offset-assert 0) + memcpy(&m_drawing_config.pfog0, constants.data + 0, 4); + memcpy(&m_drawing_config.fog_min, constants.data + 4, 4); + memcpy(&m_drawing_config.fog_max, constants.data + 8, 4); + + // (adgif gs-gif-tag :inline :offset-assert 16) ;; was qword + // (giftag gs-gif-tag :inline :offset-assert 32) ;; was qword + // (hvdf-offset vector :inline :offset-assert 48) + memcpy(m_drawing_config.hvdf_offset.data(), constants.data + 48, 16); + // (hmge-scale vector :inline :offset-assert 64) + // (invh-scale vector :inline :offset-assert 80) + // (guard vector :inline :offset-assert 96) + // (adnop qword :inline :offset-assert 112) + // (flush qword :inline :offset-assert 128) + // (stores qword :inline :offset-assert 144) + + auto vu_setup = dma.read_and_advance(); + ASSERT(vu_setup.size_bytes == 32); + // this sets offset/base to 0, sets row to 0 and runs program 0 to set up VU regs + // todo: any setup required from running this program. + + // if there was nothing rendered by generic on this frame in this bucket, the bucket will end + // here. + if (check_for_end_of_generic_data(dma, next_bucket)) { + return true; + } + + return false; +} + +void Generic2::reset_buffers() { + m_next_free_frag = 0; + m_next_free_vert = 0; + m_next_free_adgif = 0; +} + +bool is_nop_vif(const u8* data) { + u32 tag0_data; + memcpy(&tag0_data, data, 4); + return VifCode(tag0_data).kind == VifCode::Kind::NOP; +} + +bool is_nop_or_flushe_vif(const u8* data) { + u32 tag0_data; + memcpy(&tag0_data, data, 4); + auto k = VifCode(tag0_data).kind; + return k == VifCode::Kind::NOP || k == VifCode::Kind::FLUSHE; +} + +u32 unpack_vtx_positions(Generic2::Vertex* vtx, const u8* data, int vtx_count) { + for (int i = 0; i < vtx_count; i++) { + memcpy(vtx[i].xyz.data(), data + (i * 12), 12); + } + return vtx_count * 12; +} + +u32 unpack_vertex_colors(Generic2::Vertex* vtx, const u8* data, int vtx_count) { + for (int i = 0; i < vtx_count; i++) { + memcpy(vtx[i].rgba.data(), data + (i * 4), 4); + } + return vtx_count * 4; +} + +u32 unpack_vtx_tcs(Generic2::Vertex* vtx, const u8* data, int vtx_count) { + for (int i = 0; i < vtx_count; i++) { + s16 s, t; + memcpy(&s, data + (i * 4), 2); + memcpy(&t, data + (i * 4) + 2, 2); + // note: int to float happening here. + // if this is a bottleneck, we can possible keep integers and do this in the shader. + // I've avoided this for now because only some integer formats are efficient on the GPU + vtx[i].st[0] = s; + vtx[i].st[1] = t; + } + return vtx_count * 4; +} + +u32 Generic2::handle_fragments_after_unpack_v4_32(const u8* data, + u32 off, + u32 first_unpack_bytes, + u32 next_bucket, + u32 end_of_vif, + Fragment* frag, + bool loop) { + // each header should have 7 qw header + at least 5 qw for a single adgif. + ASSERT(first_unpack_bytes >= FRAG_HEADER_SIZE + sizeof(AdGifData)); + // grab the 7 qw header + memcpy(frag->header, data + off, FRAG_HEADER_SIZE); + + // figure out how many adgifs and grab those. + u32 adgif_bytes = (first_unpack_bytes - FRAG_HEADER_SIZE); + u32 adgifs = adgif_bytes / sizeof(AdGifData); + frag->adgif_idx = m_next_free_adgif; + frag->adgif_count = adgifs; + ASSERT(frag->adgif_count > 0); + ASSERT(adgif_bytes == adgifs * sizeof(AdGifData)); + for (u32 i = 0; i < adgifs; i++) { + memcpy(&next_adgif().data, data + off + FRAG_HEADER_SIZE + (i * sizeof(AdGifData)), + sizeof(AdGifData)); + } + + // continue in this transfer + off += first_unpack_bytes; + if (off == end_of_vif) { + fmt::print("nothing after header upload\n"); + ASSERT(false); + } + + // the next thing is the vertex positions. + while (is_nop_vif(data + off) && off < end_of_vif) { + off += 4; + } + u32 stcycl_tag_data; + memcpy(&stcycl_tag_data, data + off, 4); + off += 4; + VifCode stcycl_tag(stcycl_tag_data); + ASSERT(stcycl_tag.kind == VifCode::Kind::STCYCL); + ASSERT(stcycl_tag.immediate == 0x103); + + u32 vtx_pos_unpack_tag_data; + memcpy(&vtx_pos_unpack_tag_data, data + off, 4); + VifCode vtx_pos_unpack_tag(vtx_pos_unpack_tag_data); + + if (vtx_pos_unpack_tag.kind == VifCode::Kind::UNPACK_V4_8) { + ASSERT(loop); + } else { + ASSERT(!loop); + ASSERT(vtx_pos_unpack_tag.kind == VifCode::Kind::UNPACK_V3_32); + off += 4; + + frag->vtx_idx = m_next_free_vert; + frag->vtx_count = vtx_pos_unpack_tag.num; + alloc_vtx(frag->vtx_count); + + off += unpack_vtx_positions(&m_verts[frag->vtx_idx], data + off, frag->vtx_count); + + ASSERT(off < end_of_vif); + while (is_nop_vif(data + off) && off < end_of_vif) { + off += 4; + } + ASSERT(off < end_of_vif); + } + + if (loop) { + // next, vertex colors + u32 unpack_vtx_color_tag_data; + memcpy(&unpack_vtx_color_tag_data, data + off, 4); + off += 4; + VifCode unpack_vtx_color_tag(unpack_vtx_color_tag_data); + ASSERT(unpack_vtx_color_tag.kind == VifCode::Kind::UNPACK_V4_8); + frag->vtx_idx = m_next_free_vert; + frag->vtx_count = unpack_vtx_color_tag.num; + alloc_vtx(frag->vtx_count); + off += unpack_vertex_colors(&m_verts[frag->vtx_idx], data + off, frag->vtx_count); + } else { + // next, vertex colors + u32 unpack_vtx_color_tag_data; + memcpy(&unpack_vtx_color_tag_data, data + off, 4); + off += 4; + VifCode unpack_vtx_color_tag(unpack_vtx_color_tag_data); + ASSERT(unpack_vtx_color_tag.kind == VifCode::Kind::UNPACK_V4_8); + ASSERT(unpack_vtx_color_tag.num == frag->vtx_count); + off += unpack_vertex_colors(&m_verts[frag->vtx_idx], data + off, frag->vtx_count); + } + + ASSERT(off < end_of_vif); + while (is_nop_vif(data + off) && off < end_of_vif) { + off += 4; + } + ASSERT(off < end_of_vif); + + // next, vertex tcs + u32 unpack_vtx_tc_tag_data; + memcpy(&unpack_vtx_tc_tag_data, data + off, 4); + off += 4; + VifCode unpack_vtx_tc_tag(unpack_vtx_tc_tag_data); + ASSERT(unpack_vtx_tc_tag.kind == VifCode::Kind::UNPACK_V2_16); + ASSERT(unpack_vtx_tc_tag.num == frag->vtx_count); + off += unpack_vtx_tcs(&m_verts[frag->vtx_idx], data + off, frag->vtx_count); + + if (off == end_of_vif) { + return off; + } + + ASSERT(off < end_of_vif); + while (is_nop_vif(data + off) && off < end_of_vif) { + off += 4; + } + ASSERT(off < end_of_vif); + + u32 stcycl_reset_data; + memcpy(&stcycl_reset_data, data + off, 4); + off += 4; + VifCode stcycl_reset(stcycl_reset_data); + if (stcycl_reset.kind == VifCode::Kind::STCYCL) { + ASSERT(off < end_of_vif); + while (is_nop_vif(data + off) && off < end_of_vif) { + off += 4; + } + ASSERT(off < end_of_vif); + + u32 mscal_data; + memcpy(&mscal_data, data + off, 4); + off += 4; + VifCode mscal(mscal_data); + ASSERT(mscal.kind == VifCode::Kind::MSCAL); + frag->mscal_addr = mscal.immediate; + } else { + ASSERT(stcycl_reset.kind == VifCode::Kind::MSCAL); + frag->mscal_addr = stcycl_reset.immediate; + + ASSERT(off < end_of_vif); + while (is_nop_vif(data + off) && off < end_of_vif) { + off += 4; + } + ASSERT(off < end_of_vif); + + u32 stcycl_data; + memcpy(&stcycl_data, data + off, 4); + off += 4; + VifCode stcycl(stcycl_data); + ASSERT(stcycl.kind == VifCode::Kind::STCYCL); + } + + ASSERT(off < end_of_vif); + while (is_nop_or_flushe_vif(data + off) && off < end_of_vif) { + off += 4; + } + return off; +} + +void Generic2::process_dma(DmaFollower& dma, u32 next_bucket) { + reset_buffers(); + + // handle the stuff at the beginning. + if (handle_bucket_setup_dma(dma, next_bucket)) { + return; + } + + // loop over "fragments" + // each "fragment" consists of a series of uploads, followed by a MSCAL VIFCODE that runs + // VU program that does vertex transformation and sends to the GS. + Fragment* continued_fragment = nullptr; + + while (dma.current_tag_offset() != next_bucket) { + if (continued_fragment) { + auto continue_vif_transfer = dma.read_and_advance(); + ASSERT(continue_vif_transfer.vifcode0().kind == VifCode::Kind::NOP); + auto up = continue_vif_transfer.vifcode1(); + ASSERT(up.kind == VifCode::Kind::UNPACK_V3_32); + ASSERT(continue_vif_transfer.size_bytes * 4 / 48 == up.num); + ASSERT(up.num == continued_fragment->vtx_count); + unpack_vtx_positions(&m_verts[continued_fragment->vtx_idx], continue_vif_transfer.data, + continued_fragment->vtx_count); + continued_fragment = nullptr; + auto call = dma.read_and_advance(); + ASSERT(call.size_bytes == 0); + ASSERT(call.vifcode1().kind == VifCode::Kind::MSCAL); + + if (check_for_end_of_generic_data(dma, next_bucket)) { + return; + } + + } else { + auto vif_transfer = dma.read_and_advance(); + auto v1 = vif_transfer.vifcode1(); + // if (vif_transfer.vifcode0().kind != VifCode::Kind::STCYCL || + // vif_transfer.vifcode1().kind != VifCode::Kind::UNPACK_V4_32) { + // fmt::print("failing tag: {} {} {}\n", vif_transfer.vifcode0().print(), + // vif_transfer.vifcode1().print(), vif_transfer.size_bytes); + // } + ASSERT(vif_transfer.vifcode0().kind == VifCode::Kind::STCYCL); + ASSERT(v1.kind == VifCode::Kind::UNPACK_V4_32); + u32 unpack_bytes = v1.num * 16; + auto& frag = next_frag(); + u32 off = handle_fragments_after_unpack_v4_32(vif_transfer.data, 0, unpack_bytes, next_bucket, + vif_transfer.size_bytes, &frag, false); + + if (check_for_end_of_generic_data(dma, next_bucket)) { + return; + } + + if (off < vif_transfer.size_bytes) { + u32 stcycl_reset; + memcpy(&stcycl_reset, vif_transfer.data + off, 4); + ASSERT(VifCode(stcycl_reset).kind == VifCode::Kind::STCYCL); + off += 4; + // while (off < vif_transfer.size_bytes) { + u32 next; + memcpy(&next, vif_transfer.data + off, 4); + VifCode next_unpack(next); + + ASSERT(next_unpack.kind == VifCode::Kind::UNPACK_V4_32); + + auto& continue_frag = next_frag(); + off = handle_fragments_after_unpack_v4_32(vif_transfer.data, off, next_unpack.num * 16, + next_bucket, vif_transfer.size_bytes, + &continue_frag, true); + continued_fragment = &continue_frag; + ASSERT(off == vif_transfer.size_bytes); + // } + } + } + } +} \ No newline at end of file From d0cbf95d4afdb9d629159cc15980f17693732bbb Mon Sep 17 00:00:00 2001 From: water Date: Sat, 5 Mar 2022 20:30:48 -0500 Subject: [PATCH 05/12] pass2 --- common/dma/gs.h | 9 + game/CMakeLists.txt | 1 + game/graphics/opengl_renderer/AdgifHandler.h | 7 + .../opengl_renderer/GenericProgram.cpp | 2 +- .../opengl_renderer/GenericRenderer.cpp | 2 +- .../opengl_renderer/foreground/Generic2.cpp | 21 +- .../opengl_renderer/foreground/Generic2.h | 84 +++++- .../foreground/Generic2_Build.cpp | 251 ++++++++++++++++++ .../foreground/Generic2_DMA.cpp | 17 +- 9 files changed, 368 insertions(+), 26 deletions(-) create mode 100644 game/graphics/opengl_renderer/foreground/Generic2_Build.cpp diff --git a/common/dma/gs.h b/common/dma/gs.h index 67b181bd8f..f2342413ef 100644 --- a/common/dma/gs.h +++ b/common/dma/gs.h @@ -352,6 +352,15 @@ struct AdGifData { u64 clamp_addr; u64 alpha_data; u64 alpha_addr; + + bool is_normal_adgif() const { + return (u8)tex0_addr == (u32)GsRegisterAddress::TEX0_1 && + (u8)tex1_addr == (u32)GsRegisterAddress::TEX1_1 && + (u8)mip_addr == (u32)GsRegisterAddress::MIPTBP1_1 && + (u8)clamp_addr == (u32)GsRegisterAddress::CLAMP_1 && + ((u8)alpha_addr == (u32)GsRegisterAddress::ALPHA_1 || + (u8)alpha_addr == (u32)GsRegisterAddress::MIPTBP2_1); + } }; static_assert(sizeof(AdGifData) == 5 * 16); diff --git a/game/CMakeLists.txt b/game/CMakeLists.txt index 7c446a4e70..ac8d769594 100644 --- a/game/CMakeLists.txt +++ b/game/CMakeLists.txt @@ -80,6 +80,7 @@ set(RUNTIME_SOURCE graphics/opengl_renderer/background/Tie3.cpp graphics/opengl_renderer/foreground/Generic2.cpp graphics/opengl_renderer/foreground/Generic2_DMA.cpp + graphics/opengl_renderer/foreground/Generic2_Build.cpp graphics/opengl_renderer/BucketRenderer.cpp graphics/opengl_renderer/debug_gui.cpp graphics/opengl_renderer/DirectRenderer.cpp diff --git a/game/graphics/opengl_renderer/AdgifHandler.h b/game/graphics/opengl_renderer/AdgifHandler.h index 2b8c8d6b23..b90959df7f 100644 --- a/game/graphics/opengl_renderer/AdgifHandler.h +++ b/game/graphics/opengl_renderer/AdgifHandler.h @@ -12,6 +12,13 @@ class AdgifHelper { m_alpha = GsAlpha(m_data.alpha_data); } + explicit AdgifHelper(const AdGifData& data) : m_data(data) { + m_tex0 = GsTex0(m_data.tex0_data); + m_tex1 = GsTex1(m_data.tex1_data); + m_alpha = GsAlpha(m_data.alpha_data); + } + + bool is_normal_adgif() const { return (u8)m_data.tex0_addr == (u32)GsRegisterAddress::TEX0_1 && (u8)m_data.tex1_addr == (u32)GsRegisterAddress::TEX1_1 && diff --git a/game/graphics/opengl_renderer/GenericProgram.cpp b/game/graphics/opengl_renderer/GenericProgram.cpp index 582a980565..fa7d2a0ed1 100644 --- a/game/graphics/opengl_renderer/GenericProgram.cpp +++ b/game/graphics/opengl_renderer/GenericProgram.cpp @@ -158,7 +158,7 @@ void GenericRenderer::ilw_buffer(Mask mask, u16& dest, u16 addr) { void GenericRenderer::mscal_noclip_nopipe(SharedRenderState *render_state, ScopedProfilerNode &prof) { // buffer crap - vu.vi02 = vu.vi13 - 0x363; + vu.vi02 = vu.vi13 - 0x363; // 867 vu.vi13 = vu.vi13 + 0x1e; if (vu.vi02 == 0) { vu.vi13 = 0x345; /* 837 */ diff --git a/game/graphics/opengl_renderer/GenericRenderer.cpp b/game/graphics/opengl_renderer/GenericRenderer.cpp index 3efe1a8684..be1a0f4406 100644 --- a/game/graphics/opengl_renderer/GenericRenderer.cpp +++ b/game/graphics/opengl_renderer/GenericRenderer.cpp @@ -5,7 +5,7 @@ GenericRenderer::GenericRenderer(const std::string& name, BucketId my_id) : BucketRenderer(name, my_id), m_direct(name, my_id, 0x30000), m_direct2(30000, 60000, 1000, name, true), - m_debug_gen2(name, my_id, 50000, 1000, 1000) {} + m_debug_gen2(name, my_id, 50000, 1000, 1000, 300) {} void GenericRenderer::init_shaders(ShaderLibrary& shaders) { m_direct2.init_shaders(shaders); diff --git a/game/graphics/opengl_renderer/foreground/Generic2.cpp b/game/graphics/opengl_renderer/foreground/Generic2.cpp index 9cc89ed29b..10c07649c9 100644 --- a/game/graphics/opengl_renderer/foreground/Generic2.cpp +++ b/game/graphics/opengl_renderer/foreground/Generic2.cpp @@ -1,14 +1,19 @@ #include "Generic2.h" +#include "game/graphics/opengl_renderer/AdgifHandler.h" + Generic2::Generic2(const std::string& name, BucketId my_id, u32 num_verts, u32 num_frags, - u32 num_adgif) + u32 num_adgif, + u32 num_buckets) : BucketRenderer(name, my_id) { m_verts.resize(num_verts); m_fragments.resize(num_frags); m_adgifs.resize(num_adgif); + m_buckets.resize(num_buckets); + m_indices.resize(num_verts * 3); } void Generic2::draw_debug_window() {} @@ -37,17 +42,24 @@ void Generic2::render(DmaFollower& dma, SharedRenderState* render_state, ScopedP // Generic2 has 3 passes. { // our first pass is to go over the DMA chain from the game and extract the data into buffers - // Timer proc_dma_timer; + Timer proc_dma_timer; auto p = prof.make_scoped_child("dma"); process_dma(dma, render_state->next_bucket); - // fmt::print("dma: {} in {:.3f} ms\n", m_next_free_vert, proc_dma_timer.getMs()); + if (m_next_free_vert > 10000) { + fmt::print("dma: {} in {:.3f} ms\n", m_next_free_vert, proc_dma_timer.getMs()); + } } { // the next pass is to look at all of that data, and figure out the best order to draw it // using OpenGL + Timer setup_timer; auto p = prof.make_scoped_child("setup"); setup_draws(); + if (m_next_free_vert > 10000) { + fmt::print("setup: {} buckets, {} adgifs {} indices in {:.3f} ms\n", m_next_free_bucket, + m_next_free_adgif, m_next_free_idx, setup_timer.getMs()); + } } { @@ -57,7 +69,4 @@ void Generic2::render(DmaFollower& dma, SharedRenderState* render_state, ScopedP } } - -void Generic2::setup_draws() {} - void Generic2::do_draws() {} \ No newline at end of file diff --git a/game/graphics/opengl_renderer/foreground/Generic2.h b/game/graphics/opengl_renderer/foreground/Generic2.h index 672df4f546..b008a2966b 100644 --- a/game/graphics/opengl_renderer/foreground/Generic2.h +++ b/game/graphics/opengl_renderer/foreground/Generic2.h @@ -4,7 +4,12 @@ class Generic2 : public BucketRenderer { public: - Generic2(const std::string& name, BucketId my_id, u32 num_verts, u32 num_frags, u32 num_adgif); + Generic2(const std::string& name, + BucketId my_id, + u32 num_verts, + u32 num_frags, + u32 num_adgif, + u32 num_buckets); void render(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof) override; void draw_debug_window() override; // void init_shaders(ShaderLibrary& shaders) override; @@ -15,19 +20,24 @@ class Generic2 : public BucketRenderer { math::Vector st; // 16 u8 tex_unit; u8 flags; - u8 fog; - u8 pad; - u32 pad2; + u8 adc; + u8 pad0; + u32 pad1; }; static_assert(sizeof(Vertex) == 32); private: + void determine_draw_modes(); + void build_index_buffer(); + void link_adgifs_back_to_frags(); + void draws_to_buckets(); void reset_buffers(); + void process_matrices(); void process_dma(DmaFollower& dma, u32 next_bucket); void setup_draws(); void do_draws(); bool check_for_end_of_generic_data(DmaFollower& dma, u32 next_bucket); - + void final_vertex_update(); bool handle_bucket_setup_dma(DmaFollower& dma, u32 next_bucket); struct GenericDraw { @@ -46,9 +56,26 @@ class Generic2 : public BucketRenderer { math::Vector4f hvdf_offset; float pfog0; // scale factor for perspective divide float fog_min, fog_max; // clamp for fog - + math::Vector3f scale; + float mat_23, mat_32; } m_drawing_config; + struct GsState { + DrawMode as_mode; + u16 tbp; + GsTest gs_test; + GsTex0 gs_tex0; + GsPrim gs_prim; + GsAlpha gs_alpha; + u8 tex_unit = 0; + + u8 vertex_flags = 0; + void set_tcc_flag(bool value) { vertex_flags ^= (-(u8)value ^ vertex_flags) & 1; } + void set_decal_flag(bool value) { vertex_flags ^= (-(u8)value ^ vertex_flags) & 2; } + void set_fog_flag(bool value) { vertex_flags ^= (-(u8)value ^ vertex_flags) & 4; } + + } m_gs; + static constexpr u32 FRAG_HEADER_SIZE = 16 * 7; struct Fragment { u8 header[FRAG_HEADER_SIZE]; @@ -59,14 +86,42 @@ class Generic2 : public BucketRenderer { u32 vtx_count = 0; u8 mscal_addr = 0; }; + + struct Adgif { + AdGifData data; + DrawMode mode; + u32 tbp; + u8 vtx_flags; + u32 frag; + u32 vtx_idx; + u32 vtx_count; + + u32 next = -2; + + u64 key() const { + u64 result = mode.as_int(); + result |= (((u64)tbp) << 32); + return result; + } + }; + + struct Bucket { + DrawMode mode; + u32 tbp; + u32 start = UINT32_MAX; + u32 last = UINT32_MAX; + + u32 idx_idx; + u32 idx_count; + }; + u32 handle_fragments_after_unpack_v4_32(const u8* data, u32 off, u32 first_unpack_bytes, u32 next_bucket, u32 end_of_vif, - Fragment* frag, bool loop); - - + Fragment* frag, + bool loop); u32 m_next_free_frag = 0; std::vector m_fragments; @@ -74,14 +129,15 @@ class Generic2 : public BucketRenderer { u32 m_next_free_vert = 0; std::vector m_verts; - struct Adgif { - AdGifData data; - u32 ee_mem_addr; - }; - u32 m_next_free_adgif = 0; std::vector m_adgifs; + u32 m_next_free_bucket = 0; + std::vector m_buckets; + + u32 m_next_free_idx = 0; + std::vector m_indices; + Fragment& next_frag() { ASSERT(m_next_free_frag < m_fragments.size()); return m_fragments[m_next_free_frag++]; diff --git a/game/graphics/opengl_renderer/foreground/Generic2_Build.cpp b/game/graphics/opengl_renderer/foreground/Generic2_Build.cpp new file mode 100644 index 0000000000..42cacd023c --- /dev/null +++ b/game/graphics/opengl_renderer/foreground/Generic2_Build.cpp @@ -0,0 +1,251 @@ +#include "Generic2.h" + +/*! + * Main function to set up Generic2 draw lists. + * This function figures out which vertices belong to which draw settings. + */ +void Generic2::setup_draws() { + if (m_next_free_frag == 0) { + return; + } + m_gs = GsState(); + determine_draw_modes(); + link_adgifs_back_to_frags(); + draws_to_buckets(); + process_matrices(); + final_vertex_update(); + build_index_buffer(); +} + +/*! + * For each adgif, determine the draw mode. + * There's a bunch of stuff in adgifs that don't really matter, and this filters out all that junk + * They also do a bunch of tricks where some of the GS state is left over from the previous draw. + * + * For each adgif, it determines the "draw mode" which is used as a unique identifier for OpenGL + * settings, the tbp (texture vram address), and the "vertex flags" that need to be set for each + * vertex. This information is used in later steps. + */ +void Generic2::determine_draw_modes() { + // initialize draw mode + DrawMode current_mode; + current_mode.set_at(true); + current_mode.set_alpha_test(DrawMode::AlphaTest::GEQUAL); + current_mode.set_aref(0x26); + current_mode.set_alpha_fail(GsTest::AlphaFail::FB_ONLY); + current_mode.set_zt(true); + current_mode.set_depth_test(GsTest::ZTest::GEQUAL); + current_mode.set_depth_write_enable(m_drawing_config.zmsk); + + u32 tbp = -1; + + // these are copies of the state + GsTex0 tex0; + tex0.data = UINT64_MAX; + + // iterate over all adgifs + for (u32 i = 0; i < m_next_free_adgif; i++) { + auto& ad = m_adgifs[i].data; + + // ADGIF 0 + ASSERT((u8)ad.tex0_addr == (u32)GsRegisterAddress::TEX0_1); + if (ad.tex0_data != tex0.data) { + tex0.data = ad.tex0_data; + GsTex0 reg(ad.tex0_data); + tbp = reg.tbp0(); + // tbw + if (reg.psm() == GsTex0::PSM::PSMT4HH) { + tbp |= 0x8000; + } + // tw/th + current_mode.set_tcc(reg.tcc()); + m_gs.set_tcc_flag(reg.tcc()); + bool decal = reg.tfx() == GsTex0::TextureFunction::DECAL; + current_mode.set_decal(decal); + m_gs.set_decal_flag(decal); + ASSERT(reg.tfx() == GsTex0::TextureFunction::DECAL || + reg.tfx() == GsTex0::TextureFunction::MODULATE); + } + + // ADGIF 1 + ASSERT((u8)ad.tex1_addr == (u32)GsRegisterAddress::TEX1_1); + { + GsTex1 reg(ad.tex1_data); + current_mode.set_filt_enable(reg.mmag()); + } + + // ADGIF 2 + ASSERT((u8)ad.mip_addr == (u32)GsRegisterAddress::MIPTBP1_1); + + // ADGIF 3 + ASSERT((u8)ad.clamp_addr == (u32)GsRegisterAddress::CLAMP_1); + { + bool clamp_s = ad.clamp_data & 0b001; + bool clamp_t = ad.clamp_data & 0b100; + current_mode.set_clamp_s_enable(clamp_s); + current_mode.set_clamp_t_enable(clamp_t); + } + + if ((u8)ad.alpha_addr == (u32)GsRegisterAddress::ALPHA_1) { + GsAlpha reg(ad.alpha_data); + if (m_gs.gs_alpha != reg) { + m_gs.gs_alpha = reg; + auto a = reg.a_mode(); + auto b = reg.b_mode(); + auto c = reg.c_mode(); + auto d = reg.d_mode(); + if (a == GsAlpha::BlendMode::SOURCE && b == GsAlpha::BlendMode::DEST && + c == GsAlpha::BlendMode::SOURCE && d == GsAlpha::BlendMode::DEST) { + current_mode.set_alpha_blend(DrawMode::AlphaBlend::SRC_DST_SRC_DST); + } else if (a == GsAlpha::BlendMode::SOURCE && b == GsAlpha::BlendMode::ZERO_OR_FIXED && + c == GsAlpha::BlendMode::SOURCE && d == GsAlpha::BlendMode::DEST) { + current_mode.set_alpha_blend(DrawMode::AlphaBlend::SRC_0_SRC_DST); + } else if (a == GsAlpha::BlendMode::ZERO_OR_FIXED && b == GsAlpha::BlendMode::SOURCE && + c == GsAlpha::BlendMode::SOURCE && d == GsAlpha::BlendMode::DEST) { + current_mode.set_alpha_blend(DrawMode::AlphaBlend::ZERO_SRC_SRC_DST); + } else if (a == GsAlpha::BlendMode::SOURCE && b == GsAlpha::BlendMode::DEST && + c == GsAlpha::BlendMode::ZERO_OR_FIXED && d == GsAlpha::BlendMode::DEST) { + current_mode.set_alpha_blend(DrawMode::AlphaBlend::SRC_DST_FIX_DST); + } else if (a == GsAlpha::BlendMode::SOURCE && b == GsAlpha::BlendMode::SOURCE && + c == GsAlpha::BlendMode::SOURCE && d == GsAlpha::BlendMode::SOURCE) { + current_mode.set_alpha_blend(DrawMode::AlphaBlend::SRC_SRC_SRC_SRC); + } else if (a == GsAlpha::BlendMode::SOURCE && b == GsAlpha::BlendMode::ZERO_OR_FIXED && + c == GsAlpha::BlendMode::DEST && d == GsAlpha::BlendMode::DEST) { + current_mode.set_alpha_blend(DrawMode::AlphaBlend::SRC_0_DST_DST); + } else { + // unsupported blend: a 0 b 2 c 2 d 1 + // lg::error("unsupported blend: a {} b {} c {} d {}", (int)a, (int)b, (int)c, (int)d); + // ASSERT(false); + } + } + } else { + ASSERT((u8)ad.alpha_addr == (u32)GsRegisterAddress::MIPTBP2_1); + } + + m_adgifs[i].mode = current_mode; + m_adgifs[i].vtx_flags = m_gs.vertex_flags; + m_adgifs[i].tbp = tbp; + } +} + +/*! + * For each adgif, figure out the vertices that it belongs to, in the giant vertex buffer. + */ +void Generic2::link_adgifs_back_to_frags() { + for (u32 i = 0; i < m_next_free_frag; i++) { + auto& frag = m_fragments[i]; + for (u32 j = 0; j < frag.adgif_count; j++) { + auto& ad = m_adgifs[frag.adgif_idx + j]; + ad.vtx_count = (ad.data.tex1_addr >> 32) & 0xfff; // drop the eop flag + ad.vtx_idx = frag.vtx_idx + ((ad.data.tex0_addr >> 32) & 0xffff) / 3; + ASSERT(ad.vtx_count + ad.vtx_idx <= frag.vtx_count + frag.vtx_idx); + ad.frag = i; + } + } +} + +/*! + * Build linked lists of adgifs that share the same settings. + * TODO: also determine texture units per bucket here. + */ +void Generic2::draws_to_buckets() { + std::unordered_map draw_key_to_bucket; + for (u32 i = 0; i < m_next_free_adgif; i++) { + auto& ad = m_adgifs[i]; + u64 key = ad.key(); + const auto& bucket_it = draw_key_to_bucket.find(key); + if (bucket_it == draw_key_to_bucket.end()) { + // new bucket! + u32 bucket_idx = m_next_free_bucket++; + ASSERT(bucket_idx < m_buckets.size()); + draw_key_to_bucket[key] = bucket_idx; + auto& bucket = m_buckets[bucket_idx]; + bucket.tbp = ad.tbp; + bucket.mode = ad.mode; + bucket.start = i; + bucket.last = i; + ad.next = UINT32_MAX; + } else { + // existing bucket! + auto& bucket = m_buckets[bucket_it->second]; + m_adgifs[bucket.last].next = i; + ad.next = UINT32_MAX; + bucket.last = i; + } + } +} + +/*! + * Extract the matrix. They are exactly a perspective projection and they are all the same. + * I don't think this will hold for TIE... + */ +void Generic2::process_matrices() { + std::array reference_mat; + memcpy(&reference_mat, m_fragments[0].header, 64); + m_drawing_config.scale[0] = reference_mat[0][0]; + m_drawing_config.scale[1] = reference_mat[1][1]; + m_drawing_config.scale[2] = reference_mat[2][2]; + m_drawing_config.mat_23 = reference_mat[2][3]; + m_drawing_config.mat_32 = reference_mat[3][2]; + + ASSERT(reference_mat[0][1] == 0); + ASSERT(reference_mat[0][2] == 0); + ASSERT(reference_mat[0][3] == 0); + ASSERT(reference_mat[1][0] == 0); + ASSERT(reference_mat[1][2] == 0); + ASSERT(reference_mat[1][3] == 0); + ASSERT(reference_mat[2][0] == 0); + ASSERT(reference_mat[2][1] == 0); + ASSERT(reference_mat[3][0] == 0); + ASSERT(reference_mat[3][1] == 0); + ASSERT(reference_mat[3][3] == 0); + + for (u32 i = 0; i < m_next_free_frag; i++) { + std::array mat; + memcpy(&mat, m_fragments[i].header, 64); + ASSERT(mat == reference_mat); + } +} + +/*! + * After all bucketing/draw modes have been determined, fill out the flag fields of all vertices. + * TODO: fill out texture units + */ +void Generic2::final_vertex_update() { + for (u32 i = 0; i < m_next_free_adgif; i++) { + auto& ad = m_adgifs[i]; + for (u32 j = 0; j < ad.vtx_count; j++) { + m_verts[ad.vtx_idx + j].flags = ad.vtx_flags; + } + } +} + +/*! + * Build the index buffer. + * TODO: this de-strips the strips... + */ +void Generic2::build_index_buffer() { + for (u32 bucket_idx = 0; bucket_idx < m_next_free_bucket; bucket_idx++) { + auto& bucket = m_buckets[bucket_idx]; + bucket.idx_idx = m_next_free_idx; + + u32 adgif_idx = bucket.start; + while (adgif_idx != UINT32_MAX) { + auto& adgif = m_adgifs[adgif_idx]; + + u32 warmup = 0; + for (u32 vidx = adgif.vtx_idx; vidx < adgif.vtx_idx + adgif.vtx_count; vidx++) { + auto& vtx = m_verts[vidx]; + warmup++; + if (!vtx.adc && warmup >= 3) { + m_indices[m_next_free_idx++] = vidx; + m_indices[m_next_free_idx++] = vidx - 1; + m_indices[m_next_free_idx++] = vidx - 2; + } + } + adgif_idx = adgif.next; + } + + bucket.idx_count = m_next_free_idx - bucket.idx_idx; + } +} \ No newline at end of file diff --git a/game/graphics/opengl_renderer/foreground/Generic2_DMA.cpp b/game/graphics/opengl_renderer/foreground/Generic2_DMA.cpp index be45d644c5..9020944a5e 100644 --- a/game/graphics/opengl_renderer/foreground/Generic2_DMA.cpp +++ b/game/graphics/opengl_renderer/foreground/Generic2_DMA.cpp @@ -1,4 +1,5 @@ #include "Generic2.h" +#include "game/graphics/opengl_renderer/AdgifHandler.h" /*! * Advance through DMA data that has no effect on rendering (NOP codes) and see if this is the @@ -102,6 +103,8 @@ void Generic2::reset_buffers() { m_next_free_frag = 0; m_next_free_vert = 0; m_next_free_adgif = 0; + m_next_free_bucket = 0; + m_next_free_idx = 0; } bool is_nop_vif(const u8* data) { @@ -136,11 +139,14 @@ u32 unpack_vtx_tcs(Generic2::Vertex* vtx, const u8* data, int vtx_count) { s16 s, t; memcpy(&s, data + (i * 4), 2); memcpy(&t, data + (i * 4) + 2, 2); + s16 s_masked = s & (s16)0xfffe; // note: int to float happening here. // if this is a bottleneck, we can possible keep integers and do this in the shader. - // I've avoided this for now because only some integer formats are efficient on the GPU - vtx[i].st[0] = s; + // I've avoided this for now because only some integer formats are inefficient on the GPU + // and it's hard to know what's supported well on all drivers/GPUs + vtx[i].st[0] = s_masked; vtx[i].st[1] = t; + vtx[i].adc = s_masked == s; } return vtx_count * 4; } @@ -152,10 +158,12 @@ u32 Generic2::handle_fragments_after_unpack_v4_32(const u8* data, u32 end_of_vif, Fragment* frag, bool loop) { + // note: they rely on _something_ aligning this? + u32 off_aligned = (off + 15) & ~15; // each header should have 7 qw header + at least 5 qw for a single adgif. ASSERT(first_unpack_bytes >= FRAG_HEADER_SIZE + sizeof(AdGifData)); // grab the 7 qw header - memcpy(frag->header, data + off, FRAG_HEADER_SIZE); + memcpy(frag->header, data + off_aligned, FRAG_HEADER_SIZE); // figure out how many adgifs and grab those. u32 adgif_bytes = (first_unpack_bytes - FRAG_HEADER_SIZE); @@ -165,7 +173,8 @@ u32 Generic2::handle_fragments_after_unpack_v4_32(const u8* data, ASSERT(frag->adgif_count > 0); ASSERT(adgif_bytes == adgifs * sizeof(AdGifData)); for (u32 i = 0; i < adgifs; i++) { - memcpy(&next_adgif().data, data + off + FRAG_HEADER_SIZE + (i * sizeof(AdGifData)), + auto& add = next_adgif(); + memcpy(&add.data, data + off_aligned + FRAG_HEADER_SIZE + (i * sizeof(AdGifData)), sizeof(AdGifData)); } From c775dedb7e6b0118a28957a6f2c1e127267755c3 Mon Sep 17 00:00:00 2001 From: water Date: Sat, 5 Mar 2022 22:44:48 -0500 Subject: [PATCH 06/12] first pass at opengl --- game/CMakeLists.txt | 1 + .../graphics/opengl_renderer/BucketRenderer.h | 1 + .../opengl_renderer/GenericRenderer.cpp | 21 +- .../opengl_renderer/OpenGLRenderer.cpp | 1 + game/graphics/opengl_renderer/Shader.cpp | 1 + game/graphics/opengl_renderer/Shader.h | 1 + .../opengl_renderer/foreground/Generic2.cpp | 21 +- .../opengl_renderer/foreground/Generic2.h | 31 +- .../foreground/Generic2_Build.cpp | 45 +-- .../foreground/Generic2_OpenGL.cpp | 276 ++++++++++++++++++ .../opengl_renderer/shaders/generic.frag | 78 +++++ .../opengl_renderer/shaders/generic.vert | 88 ++++++ 12 files changed, 522 insertions(+), 43 deletions(-) create mode 100644 game/graphics/opengl_renderer/foreground/Generic2_OpenGL.cpp create mode 100644 game/graphics/opengl_renderer/shaders/generic.frag create mode 100644 game/graphics/opengl_renderer/shaders/generic.vert diff --git a/game/CMakeLists.txt b/game/CMakeLists.txt index ac8d769594..35b2be9d42 100644 --- a/game/CMakeLists.txt +++ b/game/CMakeLists.txt @@ -81,6 +81,7 @@ set(RUNTIME_SOURCE graphics/opengl_renderer/foreground/Generic2.cpp graphics/opengl_renderer/foreground/Generic2_DMA.cpp graphics/opengl_renderer/foreground/Generic2_Build.cpp + graphics/opengl_renderer/foreground/Generic2_OpenGL.cpp graphics/opengl_renderer/BucketRenderer.cpp graphics/opengl_renderer/debug_gui.cpp graphics/opengl_renderer/DirectRenderer.cpp diff --git a/game/graphics/opengl_renderer/BucketRenderer.h b/game/graphics/opengl_renderer/BucketRenderer.h index bc991d7268..581cbd34cc 100644 --- a/game/graphics/opengl_renderer/BucketRenderer.h +++ b/game/graphics/opengl_renderer/BucketRenderer.h @@ -92,6 +92,7 @@ struct SharedRenderState { bool enable_merc_xgkick = true; bool enable_generic_xgkick = true; bool use_direct2 = true; + bool use_generic2 = true; math::Vector fog_color; float fog_intensity = 1.f; diff --git a/game/graphics/opengl_renderer/GenericRenderer.cpp b/game/graphics/opengl_renderer/GenericRenderer.cpp index be1a0f4406..b2af245031 100644 --- a/game/graphics/opengl_renderer/GenericRenderer.cpp +++ b/game/graphics/opengl_renderer/GenericRenderer.cpp @@ -9,11 +9,16 @@ GenericRenderer::GenericRenderer(const std::string& name, BucketId my_id) void GenericRenderer::init_shaders(ShaderLibrary& shaders) { m_direct2.init_shaders(shaders); + m_debug_gen2.init_shaders(shaders); } void GenericRenderer::render(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof) { + if (render_state->use_generic2) { + m_debug_gen2.render(dma, render_state, prof); + return; + } m_xgkick_idx = 0; m_skipped_tags = 0; m_debug.clear(); @@ -146,12 +151,6 @@ void GenericRenderer::render(DmaFollower& dma, } else { m_direct.flush_pending(render_state, prof); } - - { - // todo remove - auto pp = prof.make_scoped_child("gen2"); - m_debug_gen2.render(gen2_follower, render_state, pp); - } } void GenericRenderer::handle_dma_stream(const u8* data, @@ -325,10 +324,12 @@ void GenericRenderer::mscal(int imm, SharedRenderState* render_state, ScopedProf void GenericRenderer::xgkick(u16 addr, SharedRenderState* render_state, ScopedProfilerNode& prof) { if (render_state->enable_generic_xgkick && m_xgkick_idx >= m_min_xgkick && m_xgkick_idx < m_max_xgkick) { - if (render_state->use_direct2) { - m_direct2.render_gif_data(m_buffer.data + (16 * addr), render_state, prof); - } else { - m_direct.render_gif(m_buffer.data + (16 * addr), UINT32_MAX, render_state, prof); + if (!render_state->use_generic2) { + if (render_state->use_direct2) { + m_direct2.render_gif_data(m_buffer.data + (16 * addr), render_state, prof); + } else { + m_direct.render_gif(m_buffer.data + (16 * addr), UINT32_MAX, render_state, prof); + } } } m_xgkick_idx++; diff --git a/game/graphics/opengl_renderer/OpenGLRenderer.cpp b/game/graphics/opengl_renderer/OpenGLRenderer.cpp index b9fa9a82c3..5cf07202e3 100644 --- a/game/graphics/opengl_renderer/OpenGLRenderer.cpp +++ b/game/graphics/opengl_renderer/OpenGLRenderer.cpp @@ -300,6 +300,7 @@ void OpenGLRenderer::draw_renderer_selection_window() { ImGui::Checkbox("Merc XGKICK", &m_render_state.enable_merc_xgkick); ImGui::Checkbox("Generic XGKICK", &m_render_state.enable_generic_xgkick); ImGui::Checkbox("Direct 2", &m_render_state.use_direct2); + ImGui::Checkbox("Generic 2", &m_render_state.use_generic2); for (size_t i = 0; i < m_bucket_renderers.size(); i++) { auto renderer = m_bucket_renderers[i].get(); diff --git a/game/graphics/opengl_renderer/Shader.cpp b/game/graphics/opengl_renderer/Shader.cpp index 0d69501529..5c4d21bfe5 100644 --- a/game/graphics/opengl_renderer/Shader.cpp +++ b/game/graphics/opengl_renderer/Shader.cpp @@ -77,4 +77,5 @@ ShaderLibrary::ShaderLibrary() { at(ShaderId::SPRITE3) = {"sprite3_3d"}; at(ShaderId::DIRECT2) = {"direct2"}; at(ShaderId::EYE) = {"eye"}; + at(ShaderId::GENERIC) = {"generic"}; } diff --git a/game/graphics/opengl_renderer/Shader.h b/game/graphics/opengl_renderer/Shader.h index 7e314cdb5b..1a0f01740f 100644 --- a/game/graphics/opengl_renderer/Shader.h +++ b/game/graphics/opengl_renderer/Shader.h @@ -34,6 +34,7 @@ enum class ShaderId { SPRITE3 = 9, DIRECT2 = 10, EYE = 11, + GENERIC = 12, MAX_SHADERS }; diff --git a/game/graphics/opengl_renderer/foreground/Generic2.cpp b/game/graphics/opengl_renderer/foreground/Generic2.cpp index 10c07649c9..cab0bf0cae 100644 --- a/game/graphics/opengl_renderer/foreground/Generic2.cpp +++ b/game/graphics/opengl_renderer/foreground/Generic2.cpp @@ -14,6 +14,12 @@ Generic2::Generic2(const std::string& name, m_adgifs.resize(num_adgif); m_buckets.resize(num_buckets); m_indices.resize(num_verts * 3); + + opengl_setup(); +} + +Generic2::~Generic2() { + opengl_cleanup(); } void Generic2::draw_debug_window() {} @@ -46,7 +52,7 @@ void Generic2::render(DmaFollower& dma, SharedRenderState* render_state, ScopedP auto p = prof.make_scoped_child("dma"); process_dma(dma, render_state->next_bucket); if (m_next_free_vert > 10000) { - fmt::print("dma: {} in {:.3f} ms\n", m_next_free_vert, proc_dma_timer.getMs()); + // fmt::print("dma: {} in {:.3f} ms\n", m_next_free_vert, proc_dma_timer.getMs()); } } @@ -57,16 +63,19 @@ void Generic2::render(DmaFollower& dma, SharedRenderState* render_state, ScopedP auto p = prof.make_scoped_child("setup"); setup_draws(); if (m_next_free_vert > 10000) { - fmt::print("setup: {} buckets, {} adgifs {} indices in {:.3f} ms\n", m_next_free_bucket, - m_next_free_adgif, m_next_free_idx, setup_timer.getMs()); +// fmt::print("setup: {} buckets, {} adgifs {} indices in {:.3f} ms\n", m_next_free_bucket, +// m_next_free_adgif, m_next_free_idx, setup_timer.getMs()); } } { // the final pass is the actual drawing. + Timer draw_timer; auto p = prof.make_scoped_child("drawing"); - do_draws(); + do_draws(render_state, prof); + if (m_next_free_vert > 10000) { + // fmt::print("draw {:.3f} ms\n", draw_timer.getMs()); + } + } } - -void Generic2::do_draws() {} \ No newline at end of file diff --git a/game/graphics/opengl_renderer/foreground/Generic2.h b/game/graphics/opengl_renderer/foreground/Generic2.h index b008a2966b..b81008bbae 100644 --- a/game/graphics/opengl_renderer/foreground/Generic2.h +++ b/game/graphics/opengl_renderer/foreground/Generic2.h @@ -10,8 +10,10 @@ class Generic2 : public BucketRenderer { u32 num_frags, u32 num_adgif, u32 num_buckets); + ~Generic2(); void render(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof) override; void draw_debug_window() override; + void init_shaders(ShaderLibrary& shaders) override; // void init_shaders(ShaderLibrary& shaders) override; struct Vertex { @@ -35,16 +37,24 @@ class Generic2 : public BucketRenderer { void process_matrices(); void process_dma(DmaFollower& dma, u32 next_bucket); void setup_draws(); - void do_draws(); + void do_draws(SharedRenderState* render_state, ScopedProfilerNode& prof); bool check_for_end_of_generic_data(DmaFollower& dma, u32 next_bucket); void final_vertex_update(); bool handle_bucket_setup_dma(DmaFollower& dma, u32 next_bucket); - struct GenericDraw { - u32 first_vert = -1; - u32 verts = -1; - u16 mscal = 0; - }; + void opengl_setup(); + void opengl_cleanup(); + void opengl_bind(SharedRenderState* render_state); + void setup_opengl_for_draw_mode(const DrawMode& draw_mode, + u8 fix, + SharedRenderState* render_state); + + void setup_opengl_tex(u16 unit, + u16 tbp, + bool filter, + bool clamp_s, + bool clamp_t, + SharedRenderState* render_state); struct { u32 stcycl; @@ -91,6 +101,7 @@ class Generic2 : public BucketRenderer { AdGifData data; DrawMode mode; u32 tbp; + u32 fix; u8 vtx_flags; u32 frag; u32 vtx_idx; @@ -101,6 +112,7 @@ class Generic2 : public BucketRenderer { u64 key() const { u64 result = mode.as_int(); result |= (((u64)tbp) << 32); + result |= (((u64)fix) << 48); return result; } }; @@ -158,4 +170,11 @@ class Generic2 : public BucketRenderer { struct Stats { u32 dma_tags = 0; } m_stats; + + struct { + GLuint vao; + GLuint vertex_buffer; + GLuint index_buffer; + GLuint alpha_reject, color_mult, fog_color, scale, mat_23, mat_32, fog_consts, hvdf_offset; + } m_ogl; }; diff --git a/game/graphics/opengl_renderer/foreground/Generic2_Build.cpp b/game/graphics/opengl_renderer/foreground/Generic2_Build.cpp index 42cacd023c..f56e2578a6 100644 --- a/game/graphics/opengl_renderer/foreground/Generic2_Build.cpp +++ b/game/graphics/opengl_renderer/foreground/Generic2_Build.cpp @@ -35,7 +35,9 @@ void Generic2::determine_draw_modes() { current_mode.set_alpha_fail(GsTest::AlphaFail::FB_ONLY); current_mode.set_zt(true); current_mode.set_depth_test(GsTest::ZTest::GEQUAL); - current_mode.set_depth_write_enable(m_drawing_config.zmsk); + current_mode.set_depth_write_enable(!m_drawing_config.zmsk); + current_mode.set_alpha_blend(DrawMode::AlphaBlend::SRC_SRC_SRC_SRC); + m_gs.set_fog_flag(true); u32 tbp = -1; @@ -88,7 +90,7 @@ void Generic2::determine_draw_modes() { if ((u8)ad.alpha_addr == (u32)GsRegisterAddress::ALPHA_1) { GsAlpha reg(ad.alpha_data); - if (m_gs.gs_alpha != reg) { + //if (m_gs.gs_alpha != reg) { m_gs.gs_alpha = reg; auto a = reg.a_mode(); auto b = reg.b_mode(); @@ -117,7 +119,7 @@ void Generic2::determine_draw_modes() { // lg::error("unsupported blend: a {} b {} c {} d {}", (int)a, (int)b, (int)c, (int)d); // ASSERT(false); } - } + // } } else { ASSERT((u8)ad.alpha_addr == (u32)GsRegisterAddress::MIPTBP2_1); } @@ -125,6 +127,7 @@ void Generic2::determine_draw_modes() { m_adgifs[i].mode = current_mode; m_adgifs[i].vtx_flags = m_gs.vertex_flags; m_adgifs[i].tbp = tbp; + m_adgifs[i].fix = m_gs.gs_alpha.fix(); } } @@ -188,23 +191,23 @@ void Generic2::process_matrices() { m_drawing_config.mat_23 = reference_mat[2][3]; m_drawing_config.mat_32 = reference_mat[3][2]; - ASSERT(reference_mat[0][1] == 0); - ASSERT(reference_mat[0][2] == 0); - ASSERT(reference_mat[0][3] == 0); - ASSERT(reference_mat[1][0] == 0); - ASSERT(reference_mat[1][2] == 0); - ASSERT(reference_mat[1][3] == 0); - ASSERT(reference_mat[2][0] == 0); - ASSERT(reference_mat[2][1] == 0); - ASSERT(reference_mat[3][0] == 0); - ASSERT(reference_mat[3][1] == 0); - ASSERT(reference_mat[3][3] == 0); - - for (u32 i = 0; i < m_next_free_frag; i++) { - std::array mat; - memcpy(&mat, m_fragments[i].header, 64); - ASSERT(mat == reference_mat); - } +// ASSERT(reference_mat[0][1] == 0); +// ASSERT(reference_mat[0][2] == 0); +// ASSERT(reference_mat[0][3] == 0); +// ASSERT(reference_mat[1][0] == 0); +// ASSERT(reference_mat[1][2] == 0); +// ASSERT(reference_mat[1][3] == 0); +// ASSERT(reference_mat[2][0] == 0); +// ASSERT(reference_mat[2][1] == 0); +// ASSERT(reference_mat[3][0] == 0); +// ASSERT(reference_mat[3][1] == 0); +// ASSERT(reference_mat[3][3] == 0); +// +// for (u32 i = 0; i < m_next_free_frag; i++) { +// std::array mat; +// memcpy(&mat, m_fragments[i].header, 64); +// ASSERT(mat == reference_mat); +// } } /*! @@ -237,7 +240,7 @@ void Generic2::build_index_buffer() { for (u32 vidx = adgif.vtx_idx; vidx < adgif.vtx_idx + adgif.vtx_count; vidx++) { auto& vtx = m_verts[vidx]; warmup++; - if (!vtx.adc && warmup >= 3) { + if (vtx.adc && warmup >= 3) { m_indices[m_next_free_idx++] = vidx; m_indices[m_next_free_idx++] = vidx - 1; m_indices[m_next_free_idx++] = vidx - 2; diff --git a/game/graphics/opengl_renderer/foreground/Generic2_OpenGL.cpp b/game/graphics/opengl_renderer/foreground/Generic2_OpenGL.cpp new file mode 100644 index 0000000000..1c9334b113 --- /dev/null +++ b/game/graphics/opengl_renderer/foreground/Generic2_OpenGL.cpp @@ -0,0 +1,276 @@ +#include "Generic2.h" + +void Generic2::opengl_setup() { + // create OpenGL objects + glGenBuffers(1, &m_ogl.vertex_buffer); + glGenBuffers(1, &m_ogl.index_buffer); + glGenVertexArrays(1, &m_ogl.vao); + + // set up the vertex array + glBindVertexArray(m_ogl.vao); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_ogl.index_buffer); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, m_indices.size() * sizeof(u32), nullptr, GL_STREAM_DRAW); + glBindBuffer(GL_ARRAY_BUFFER, m_ogl.vertex_buffer); + glBufferData(GL_ARRAY_BUFFER, m_verts.size() * sizeof(Vertex), nullptr, GL_STREAM_DRAW); + + // xyz + glEnableVertexAttribArray(0); + glVertexAttribPointer(0, // location 0 in the shader + 3, // 3 floats per vert + GL_FLOAT, // floats + GL_TRUE, // normalized, ignored, + sizeof(Vertex), // + (void*)offsetof(Vertex, xyz) // offset in array + ); + + // rgba + glEnableVertexAttribArray(1); + glVertexAttribPointer(1, // location 1 in the shader + 4, // 4 color components + GL_UNSIGNED_BYTE, // u8 + GL_TRUE, // normalized (255 becomes 1) + sizeof(Vertex), // + (void*)offsetof(Vertex, rgba) // + ); + + // stq + glEnableVertexAttribArray(2); + glVertexAttribPointer(2, // location 2 in the shader + 2, // 2 floats per vert + GL_FLOAT, // floats + GL_FALSE, // normalized, ignored + sizeof(Vertex), // + (void*)offsetof(Vertex, st) // offset in array + ); + + // byte data + glEnableVertexAttribArray(3); + glVertexAttribIPointer(3, // location 3 in the shader + 4, // + GL_UNSIGNED_BYTE, // u8's + sizeof(Vertex), // + (void*)offsetof(Vertex, tex_unit) // offset in array + ); + + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindVertexArray(0); +} + +void Generic2::opengl_cleanup() { + glDeleteBuffers(1, &m_ogl.vertex_buffer); + glDeleteBuffers(1, &m_ogl.index_buffer); + glDeleteVertexArrays(1, &m_ogl.vao); +} + +void Generic2::init_shaders(ShaderLibrary& shaders) { + shaders[ShaderId::GENERIC].activate(); + m_ogl.alpha_reject = glGetUniformLocation(shaders[ShaderId::GENERIC].id(), "alpha_reject"); + m_ogl.color_mult = glGetUniformLocation(shaders[ShaderId::GENERIC].id(), "color_mult"); + m_ogl.fog_color = glGetUniformLocation(shaders[ShaderId::GENERIC].id(), "fog_color"); + + m_ogl.scale = glGetUniformLocation(shaders[ShaderId::GENERIC].id(), "scale"); + m_ogl.mat_23 = glGetUniformLocation(shaders[ShaderId::GENERIC].id(), "mat_23"); + m_ogl.mat_32 = glGetUniformLocation(shaders[ShaderId::GENERIC].id(), "mat_32"); + m_ogl.fog_consts = glGetUniformLocation(shaders[ShaderId::GENERIC].id(), "fog_constants"); + m_ogl.hvdf_offset = glGetUniformLocation(shaders[ShaderId::GENERIC].id(), "hvdf_offset"); +} + +void Generic2::opengl_bind(SharedRenderState* render_state) { + render_state->shaders[ShaderId::GENERIC].activate(); + glUniform4f(m_ogl.fog_color, render_state->fog_color[0], render_state->fog_color[1], + render_state->fog_color[2], render_state->fog_intensity); + glUniform4f(m_ogl.scale, m_drawing_config.scale[0], m_drawing_config.scale[1], + m_drawing_config.scale[2], 0); + glUniform1f(m_ogl.mat_23, m_drawing_config.mat_23); + glUniform1f(m_ogl.mat_32, m_drawing_config.mat_32); + glUniform3f(m_ogl.fog_consts, m_drawing_config.pfog0, m_drawing_config.fog_min, + m_drawing_config.fog_max); + glUniform4f(m_ogl.hvdf_offset, m_drawing_config.hvdf_offset[0], m_drawing_config.hvdf_offset[1], + m_drawing_config.hvdf_offset[2], m_drawing_config.hvdf_offset[3]); +} + +void Generic2::setup_opengl_for_draw_mode(const DrawMode& draw_mode, + u8 fix, + SharedRenderState* render_state) { + // compute alpha_reject: + float alpha_reject = 0.f; + if (draw_mode.get_at_enable()) { + switch (draw_mode.get_alpha_test()) { + case DrawMode::AlphaTest::ALWAYS: + break; + case DrawMode::AlphaTest::GEQUAL: + alpha_reject = draw_mode.get_aref() / 128.f; + break; + case DrawMode::AlphaTest::NEVER: + break; + default: + fmt::print("unknown alpha test: {}\n", (int)draw_mode.get_alpha_test()); + ASSERT(false); + } + } + + // setup blending and color mult + float color_mult = 1.f; + if (!draw_mode.get_ab_enable()) { + glDisable(GL_BLEND); + } else { + glEnable(GL_BLEND); + glBlendColor(1, 1, 1, 1); + if (draw_mode.get_alpha_blend() == DrawMode::AlphaBlend::SRC_DST_SRC_DST) { + // (Cs - Cd) * As + Cd + // Cs * As + (1 - As) * Cd + // s, d + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + glBlendEquation(GL_FUNC_ADD); + } else if (draw_mode.get_alpha_blend() == DrawMode::AlphaBlend::SRC_0_SRC_DST) { + // (Cs - 0) * As + Cd + // Cs * As + (1) * Cd + // s, d + ASSERT(fix == 0); + glBlendFunc(GL_SRC_ALPHA, GL_ONE); + glBlendEquation(GL_FUNC_ADD); + } else if (draw_mode.get_alpha_blend() == DrawMode::AlphaBlend::ZERO_SRC_SRC_DST) { + // (0 - Cs) * As + Cd + // Cd - Cs * As + // s, d + glBlendFunc(GL_SRC_ALPHA, GL_ONE); + glBlendEquation(GL_FUNC_REVERSE_SUBTRACT); + } else if (draw_mode.get_alpha_blend() == DrawMode::AlphaBlend::SRC_DST_FIX_DST) { + // (Cs - Cd) * fix + Cd + // Cs * fix + (1 - fx) * Cd + glBlendFunc(GL_CONSTANT_ALPHA, GL_ONE_MINUS_CONSTANT_ALPHA); + glBlendColor(0, 0, 0, fix / 127.f); + glBlendEquation(GL_FUNC_ADD); + } else if (draw_mode.get_alpha_blend() == DrawMode::AlphaBlend::SRC_SRC_SRC_SRC) { + // this is very weird... + // Cs + glBlendFunc(GL_ONE, GL_ZERO); + glBlendEquation(GL_FUNC_ADD); + } else if (draw_mode.get_alpha_blend() == DrawMode::AlphaBlend::SRC_0_DST_DST) { + // (Cs - 0) * Ad + Cd + glBlendFunc(GL_DST_ALPHA, GL_ONE); + glBlendEquation(GL_FUNC_ADD); + color_mult = 0.5; + } else { + ASSERT(false); + } + } + + // setup ztest + if (draw_mode.get_zt_enable()) { + glEnable(GL_DEPTH_TEST); + switch (draw_mode.get_depth_test()) { + case GsTest::ZTest::NEVER: + glDepthFunc(GL_NEVER); + break; + case GsTest::ZTest::ALWAYS: + glDepthFunc(GL_ALWAYS); + break; + case GsTest::ZTest::GEQUAL: + glDepthFunc(GL_GEQUAL); + break; + case GsTest::ZTest::GREATER: + glDepthFunc(GL_GREATER); + break; + default: + ASSERT(false); + } + } else { + // you aren't supposed to turn off z test enable, the GS had some bugs + ASSERT(false); + } + + if (draw_mode.get_depth_write_enable()) { + glDepthMask(GL_TRUE); + } else { + glDepthMask(GL_FALSE); + } + + glUniform1f(m_ogl.alpha_reject, alpha_reject); + glUniform1f(m_ogl.color_mult, color_mult); + glUniform4f(m_ogl.fog_color, render_state->fog_color[0], render_state->fog_color[1], + render_state->fog_color[2], render_state->fog_intensity); +} + +void Generic2::setup_opengl_tex(u16 unit, + u16 tbp, + bool filter, + bool clamp_s, + bool clamp_t, + SharedRenderState* render_state) { + // look up the texture + std::optional tex; + u32 tbp_to_lookup = tbp & 0x7fff; + bool use_mt4hh = tbp & 0x8000; + + if (use_mt4hh) { + tex = render_state->texture_pool->lookup_mt4hh(tbp_to_lookup); + } else { + tex = render_state->texture_pool->lookup(tbp_to_lookup); + } + + if (!tex) { + // TODO Add back + if (tbp_to_lookup >= 8160 && tbp_to_lookup <= 8600) { + fmt::print("Failed to find texture at {}, using random (eye zone)\n", tbp_to_lookup); + + tex = render_state->texture_pool->get_placeholder_texture(); + } else { + fmt::print("Failed to find texture at {}, using random\n", tbp_to_lookup); + tex = render_state->texture_pool->get_placeholder_texture(); + } + } + + glActiveTexture(GL_TEXTURE0 + unit); + glBindTexture(GL_TEXTURE_2D, *tex); + if (clamp_s) { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + } else { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + } + + if (clamp_t) { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + } else { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); + } + + if (filter) { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, + true ? GL_LINEAR : GL_LINEAR_MIPMAP_LINEAR); // todo + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + } else { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + } +} + +void Generic2::do_draws(SharedRenderState* render_state, ScopedProfilerNode& prof) { + glBindVertexArray(m_ogl.vao); + glBindBuffer(GL_ARRAY_BUFFER, m_ogl.vertex_buffer); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_ogl.index_buffer); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, m_next_free_idx * sizeof(u32), m_indices.data(), + GL_STREAM_DRAW); + glBufferData(GL_ARRAY_BUFFER, m_next_free_vert * sizeof(Vertex), m_verts.data(), GL_STREAM_DRAW); + + // hack + // glDisable(GL_DEPTH_TEST); + // glDisable(GL_BLEND); + + glEnable(GL_PRIMITIVE_RESTART); + glPrimitiveRestartIndex(UINT32_MAX); + + opengl_bind(render_state); + + for (u32 i = 0; i < m_next_free_bucket; i++) { + auto& bucket = m_buckets[i]; + auto& first = m_adgifs[bucket.start]; + setup_opengl_for_draw_mode(first.mode, first.fix, render_state); + setup_opengl_tex(0, first.tbp, first.mode.get_filt_enable(), first.mode.get_clamp_s_enable(), + first.mode.get_clamp_t_enable(), render_state); + glDrawElements(GL_TRIANGLES, bucket.idx_count, GL_UNSIGNED_INT, + (void*)(sizeof(u32) * bucket.idx_idx)); + prof.add_draw_call(); + prof.add_tri(bucket.idx_count / 3); + } +} \ No newline at end of file diff --git a/game/graphics/opengl_renderer/shaders/generic.frag b/game/graphics/opengl_renderer/shaders/generic.frag new file mode 100644 index 0000000000..d150aeabb4 --- /dev/null +++ b/game/graphics/opengl_renderer/shaders/generic.frag @@ -0,0 +1,78 @@ +#version 430 core + + +out vec4 color; +in vec2 tex_coord; + +uniform float alpha_reject; +uniform float color_mult; +uniform vec4 fog_color; +in float fog; +in vec4 fragment_color; + +in flat uvec2 tex_info; + +layout (binding = 0) uniform sampler2D tex_T0; +layout (binding = 1) uniform sampler2D tex_T1; +layout (binding = 2) uniform sampler2D tex_T2; +layout (binding = 3) uniform sampler2D tex_T3; +layout (binding = 4) uniform sampler2D tex_T4; +layout (binding = 5) uniform sampler2D tex_T5; +layout (binding = 6) uniform sampler2D tex_T6; +layout (binding = 7) uniform sampler2D tex_T7; +layout (binding = 8) uniform sampler2D tex_T8; +layout (binding = 9) uniform sampler2D tex_T9; + +vec4 sample_tex(vec2 coord, uint unit) { + return texture(tex_T0, coord); + +// switch (unit) { +// case 0: return texture(tex_T0, coord); +// case 1: return texture(tex_T1, coord); +// case 2: return texture(tex_T2, coord); +// case 3: return texture(tex_T3, coord); +// case 4: return texture(tex_T4, coord); +// case 5: return texture(tex_T5, coord); +// case 6: return texture(tex_T6, coord); +// case 7: return texture(tex_T7, coord); +// case 8: return texture(tex_T8, coord); +// case 9: return texture(tex_T9, coord); +// default : return vec4(1.0, 0, 1.0, 1.0); +// } +} + +void main() { + vec4 T0 = sample_tex(tex_coord.xy, tex_info.x); + // y is tcc + // z is decal + + if ((tex_info.y & 1u) == 0) { + if ((tex_info.y & 2u) == 0) { + // modulate + no tcc + color.xyz = fragment_color.xyz * T0.xyz; + color.w = fragment_color.w; + } else { + // decal + no tcc + color.xyz = T0.xyz * 0.5; + color.w = fragment_color.w; + } + } else { + if ((tex_info.y & 2u) == 0) { + // modulate + tcc + color = fragment_color * T0; + } else { + // decal + tcc + color.xyz = T0.xyz * 0.5; + color.w = T0.w; + } + } + color *= 2; + color.xyz *= color_mult; + + if (color.a < alpha_reject) { + discard; + } + if ((tex_info.y & 4u) != 0) { + color.xyz = mix(color.xyz, fog_color.xyz / 255., clamp(fog_color.w * (1 - fog), 0, 1)); + } +} \ No newline at end of file diff --git a/game/graphics/opengl_renderer/shaders/generic.vert b/game/graphics/opengl_renderer/shaders/generic.vert new file mode 100644 index 0000000000..f6bcd62f22 --- /dev/null +++ b/game/graphics/opengl_renderer/shaders/generic.vert @@ -0,0 +1,88 @@ +#version 430 core + +layout (location = 0) in vec3 position_in; +layout (location = 1) in vec4 rgba_in; +layout (location = 2) in vec2 tex_coord_in; +layout (location = 3) in uvec4 byte_info; + +uniform float mat_32; +uniform vec3 fog_constants; +uniform vec4 scale; +uniform float mat_23; +uniform vec4 hvdf_offset; + +out vec2 tex_coord; + +out vec4 fragment_color; +out float fog; + +out flat uvec2 tex_info; + +void main() { + // lq.xy vf22, 0(vi10) texture load? + // lq_buffer(Mask::xy, vu.vf22, vu.vi10); + + // lq.xyz vf16, 2(vi10) vertex load + // lq_buffer(Mask::xyz, gen.vtx_load0, vu.vi10 + 2); + + // mtir vi02, vf22.x grab s coordinate of texture + // vu.vi02 = vu.vf22.x_as_u16(); + + // mulaw.xyzw ACC, vf11, vf00 matrix multiply W + // vu.acc.mula(Mask::xyzw, gen.mat3, vu.vf00.w()); + vec4 transformed; + + // maddax.xyzw ACC, vf08, vf16 matrix multiply X + // vu.acc.madda(Mask::xyzw, gen.mat0, gen.vtx_load0.x()); + transformed.xyz = position_in * scale.xyz; + transformed.z += mat_32; + transformed.w = mat_23 * position_in.z; + + transformed *= -1; // todo? + + + // div Q, vf01.x, vf12.w perspective divide + // vu.Q = gen.fog.x() / gen.vtx_p0.w(); + float Q = fog_constants.x / transformed.w; + + float fog1 = -transformed.w + hvdf_offset.w; + float fog2 = min(fog1, fog_constants.z); + float fog3 = max(fog2, fog_constants.y); + fog = 255; // 255 - fog3; TODO + + // itof12.xyz vf18, vf22 texture int to float + // vu.vf18.itof12(Mask::xyz, vu.vf22); + tex_coord = tex_coord_in / 4096.f; // TODO, more and wrong. + + // mul.xyz vf12, vf12, Q persepective divide + // gen.vtx_p0.mul(Mask::xyz, gen.vtx_p0, vu.Q); + transformed.xyz *= Q; + + // mul.xyz vf18, vf18, Q texture perspective divide + // vu.vf18.mul(Mask::xyz, vu.vf18, vu.Q); + + // add.xyzw vf12, vf12, vf04 apply hvdf + // gen.vtx_p0.add(Mask::xyzw, gen.vtx_p0, gen.hvdf_off); + transformed.xyz += hvdf_offset.xyz; + + // correct xy offset + transformed.xy -= (2048.); + + // correct z scale + transformed.z /= (8388608); + transformed.z -= 1; + + // correct xy scale + transformed.x /= (256); + transformed.y /= -(128); + + // hack + transformed.xyz *= transformed.w; + + gl_Position = transformed; + // scissoring area adjust + gl_Position.y *= 512.0/448.0; + + fragment_color = rgba_in; + tex_info = byte_info.xy; +} \ No newline at end of file From dd0c57ce40e5ce42aef74e3da7bede0863d49348 Mon Sep 17 00:00:00 2001 From: water Date: Sun, 6 Mar 2022 13:15:00 -0500 Subject: [PATCH 07/12] many fixes --- common/math/Vector.h | 9 ++ .../opengl_renderer/DirectRenderer2.cpp | 8 +- .../opengl_renderer/GenericProgram.cpp | 7 +- .../opengl_renderer/GenericRenderer.cpp | 7 +- .../opengl_renderer/foreground/Generic2.cpp | 10 +- .../opengl_renderer/foreground/Generic2.h | 5 +- .../foreground/Generic2_Build.cpp | 93 ++++++++++++++----- .../foreground/Generic2_DMA.cpp | 6 +- .../foreground/Generic2_OpenGL.cpp | 43 ++++++--- .../opengl_renderer/shaders/generic.frag | 2 +- .../opengl_renderer/shaders/generic.vert | 4 +- 11 files changed, 139 insertions(+), 55 deletions(-) diff --git a/common/math/Vector.h b/common/math/Vector.h index 97c74c01a2..1994c294ca 100644 --- a/common/math/Vector.h +++ b/common/math/Vector.h @@ -182,6 +182,15 @@ class Vector { return result + "]"; } + std::string to_string_hex_byte() const { + std::string result = "["; + for (auto x : m_data) { + result.append(fmt::format("0x{:02x} ", x)); + } + result.pop_back(); + return result + "]"; + } + T* data() { return m_data; } const T* data() const { return m_data; } diff --git a/game/graphics/opengl_renderer/DirectRenderer2.cpp b/game/graphics/opengl_renderer/DirectRenderer2.cpp index 8ae2c61000..6be8c3cabc 100644 --- a/game/graphics/opengl_renderer/DirectRenderer2.cpp +++ b/game/graphics/opengl_renderer/DirectRenderer2.cpp @@ -6,7 +6,8 @@ DirectRenderer2::DirectRenderer2(u32 max_verts, u32 max_inds, u32 max_draws, - const std::string& name, bool use_ftoi_mod) + const std::string& name, + bool use_ftoi_mod) : m_name(name), m_use_ftoi_mod(use_ftoi_mod) { // allocate buffers m_vertices.vertices.resize(max_verts); @@ -736,7 +737,6 @@ void DirectRenderer2::handle_xyzf2_mod_packed(const u8* data, u64 upper; memcpy(&upper, data + 8, 8); - u32 zi = (upper >> 4) & 0xffffff; float z; memcpy(&z, &upper, 4); @@ -778,9 +778,9 @@ void DirectRenderer2::handle_xyzf2_mod_packed(const u8* data, } // todo move to shader or something. - vert.xyz[0] = x * 16.f ; + vert.xyz[0] = x * 16.f; vert.xyz[1] = y * 16.f; - vert.xyz[2] = z ; + vert.xyz[2] = z; vert.rgba = m_state.rgba; vert.stq = math::Vector(m_state.s, m_state.t, m_state.Q); vert.tex_unit = m_state.tex_unit; diff --git a/game/graphics/opengl_renderer/GenericProgram.cpp b/game/graphics/opengl_renderer/GenericProgram.cpp index fa7d2a0ed1..54a987ec82 100644 --- a/game/graphics/opengl_renderer/GenericProgram.cpp +++ b/game/graphics/opengl_renderer/GenericProgram.cpp @@ -242,9 +242,10 @@ void GenericRenderer::mscal_noclip_nopipe(SharedRenderState *render_state, Scope // store! sq_buffer(Mask::xyzw, gen.vtx_p0, vu.vi10 + 2); - vu.vf18.x() /= vu.vf18.z(); - vu.vf18.y() /= vu.vf18.z(); - vu.vf18.z() = 1.f; + // this divide should happen in the vertex shader to get perspective correct textures. + // vu.vf18.x() /= vu.vf18.z(); + // vu.vf18.y() /= vu.vf18.z(); + // vu.vf18.z() = 1.f; // fmt::print("tex.z = {}\n", vu.vf18.z()); sq_buffer(Mask::xyzw, vu.vf18, vu.vi10); diff --git a/game/graphics/opengl_renderer/GenericRenderer.cpp b/game/graphics/opengl_renderer/GenericRenderer.cpp index b2af245031..7c0353849e 100644 --- a/game/graphics/opengl_renderer/GenericRenderer.cpp +++ b/game/graphics/opengl_renderer/GenericRenderer.cpp @@ -5,7 +5,7 @@ GenericRenderer::GenericRenderer(const std::string& name, BucketId my_id) : BucketRenderer(name, my_id), m_direct(name, my_id, 0x30000), m_direct2(30000, 60000, 1000, name, true), - m_debug_gen2(name, my_id, 50000, 1000, 1000, 300) {} + m_debug_gen2(name, my_id, 50000, 1000, 1000, 800) {} void GenericRenderer::init_shaders(ShaderLibrary& shaders) { m_direct2.init_shaders(shaders); @@ -202,10 +202,15 @@ void GenericRenderer::handle_dma_stream(const u8* data, } void GenericRenderer::draw_debug_window() { + ImGui::Text("Skipped %d tags", m_skipped_tags); ImGui::InputInt("kick min", &m_min_xgkick); ImGui::InputInt("kick max", &m_max_xgkick); ImGui::Text("Debug:\n%s\n", m_debug.c_str()); + if (ImGui::TreeNode("Gen2")) { + m_debug_gen2.draw_debug_window(); + ImGui::TreePop(); + } if (ImGui::TreeNode("Direct")) { m_direct.draw_debug_window(); ImGui::TreePop(); diff --git a/game/graphics/opengl_renderer/foreground/Generic2.cpp b/game/graphics/opengl_renderer/foreground/Generic2.cpp index cab0bf0cae..3af700372e 100644 --- a/game/graphics/opengl_renderer/foreground/Generic2.cpp +++ b/game/graphics/opengl_renderer/foreground/Generic2.cpp @@ -1,6 +1,7 @@ #include "Generic2.h" #include "game/graphics/opengl_renderer/AdgifHandler.h" +#include "third-party/imgui/imgui.h" Generic2::Generic2(const std::string& name, BucketId my_id, @@ -22,7 +23,14 @@ Generic2::~Generic2() { opengl_cleanup(); } -void Generic2::draw_debug_window() {} +void Generic2::draw_debug_window() { + ImGui::Checkbox("Alpha 1", &m_alpha_draw_enable[0]); + ImGui::Checkbox("Alpha 2", &m_alpha_draw_enable[1]); + ImGui::Checkbox("Alpha 3", &m_alpha_draw_enable[2]); + ImGui::Checkbox("Alpha 4", &m_alpha_draw_enable[3]); + ImGui::Checkbox("Alpha 5", &m_alpha_draw_enable[4]); + ImGui::Checkbox("Alpha 6", &m_alpha_draw_enable[5]); +} /*! * Main render function for Generic2. This will be passed a DMA "follower" from the main diff --git a/game/graphics/opengl_renderer/foreground/Generic2.h b/game/graphics/opengl_renderer/foreground/Generic2.h index b81008bbae..bbbba451ca 100644 --- a/game/graphics/opengl_renderer/foreground/Generic2.h +++ b/game/graphics/opengl_renderer/foreground/Generic2.h @@ -38,6 +38,7 @@ class Generic2 : public BucketRenderer { void process_dma(DmaFollower& dma, u32 next_bucket); void setup_draws(); void do_draws(SharedRenderState* render_state, ScopedProfilerNode& prof); + void do_draws_for_alpha(SharedRenderState* render_state, ScopedProfilerNode& prof, DrawMode::AlphaBlend alpha); bool check_for_end_of_generic_data(DmaFollower& dma, u32 next_bucket); void final_vertex_update(); bool handle_bucket_setup_dma(DmaFollower& dma, u32 next_bucket); @@ -130,7 +131,6 @@ class Generic2 : public BucketRenderer { u32 handle_fragments_after_unpack_v4_32(const u8* data, u32 off, u32 first_unpack_bytes, - u32 next_bucket, u32 end_of_vif, Fragment* frag, bool loop); @@ -171,6 +171,9 @@ class Generic2 : public BucketRenderer { u32 dma_tags = 0; } m_stats; + static constexpr int ALPHA_MODE_COUNT = 6; + bool m_alpha_draw_enable[ALPHA_MODE_COUNT] = {true, true, true, true, true, true}; + struct { GLuint vao; GLuint vertex_buffer; diff --git a/game/graphics/opengl_renderer/foreground/Generic2_Build.cpp b/game/graphics/opengl_renderer/foreground/Generic2_Build.cpp index f56e2578a6..ccc6fc6776 100644 --- a/game/graphics/opengl_renderer/foreground/Generic2_Build.cpp +++ b/game/graphics/opengl_renderer/foreground/Generic2_Build.cpp @@ -9,8 +9,8 @@ void Generic2::setup_draws() { return; } m_gs = GsState(); - determine_draw_modes(); link_adgifs_back_to_frags(); + determine_draw_modes(); draws_to_buckets(); process_matrices(); final_vertex_update(); @@ -88,9 +88,33 @@ void Generic2::determine_draw_modes() { current_mode.set_clamp_t_enable(clamp_t); } + std::optional final_alpha; + + // ADGIF 4 if ((u8)ad.alpha_addr == (u32)GsRegisterAddress::ALPHA_1) { - GsAlpha reg(ad.alpha_data); - //if (m_gs.gs_alpha != reg) { + final_alpha = ad.alpha_data; + } else { + ASSERT((u8)ad.alpha_addr == (u32)GsRegisterAddress::MIPTBP2_1); + } + + auto& frag = m_fragments[m_adgifs[i].frag]; + u64 bonus_adgif_data[4]; + memcpy(bonus_adgif_data, frag.header + (5 * 16), 4 * sizeof(u64)); + // ADGIF 5 + ASSERT((u8)bonus_adgif_data[1] == (u8)(GsRegisterAddress::TEST_1)); + u64 final_test = bonus_adgif_data[0]; + + // ADGIF 6 + if ((u8)bonus_adgif_data[3] == (u8)(GsRegisterAddress::ALPHA_1)) { + final_alpha = bonus_adgif_data[2]; + } else { + ASSERT((u8)bonus_adgif_data[3] == (u8)(GsRegisterAddress::TEST_1)); + final_test = bonus_adgif_data[2]; + } + + if (final_alpha) { + GsAlpha reg(*final_alpha); + if (m_gs.gs_alpha != reg) { m_gs.gs_alpha = reg; auto a = reg.a_mode(); auto b = reg.b_mode(); @@ -119,9 +143,32 @@ void Generic2::determine_draw_modes() { // lg::error("unsupported blend: a {} b {} c {} d {}", (int)a, (int)b, (int)c, (int)d); // ASSERT(false); } - // } - } else { - ASSERT((u8)ad.alpha_addr == (u32)GsRegisterAddress::MIPTBP2_1); + } + } + + { + GsTest reg(final_test); + current_mode.set_at(reg.alpha_test_enable()); + if (reg.alpha_test_enable()) { + switch (reg.alpha_test()) { + case GsTest::AlphaTest::NEVER: + current_mode.set_alpha_test(DrawMode::AlphaTest::NEVER); + break; + case GsTest::AlphaTest::ALWAYS: + current_mode.set_alpha_test(DrawMode::AlphaTest::ALWAYS); + break; + case GsTest::AlphaTest::GEQUAL: + current_mode.set_alpha_test(DrawMode::AlphaTest::GEQUAL); + break; + default: + ASSERT(false); + } + } + + current_mode.set_aref(reg.aref()); + current_mode.set_alpha_fail(reg.afail()); + current_mode.set_zt(reg.zte()); + current_mode.set_depth_test(reg.ztest()); } m_adgifs[i].mode = current_mode; @@ -191,23 +238,23 @@ void Generic2::process_matrices() { m_drawing_config.mat_23 = reference_mat[2][3]; m_drawing_config.mat_32 = reference_mat[3][2]; -// ASSERT(reference_mat[0][1] == 0); -// ASSERT(reference_mat[0][2] == 0); -// ASSERT(reference_mat[0][3] == 0); -// ASSERT(reference_mat[1][0] == 0); -// ASSERT(reference_mat[1][2] == 0); -// ASSERT(reference_mat[1][3] == 0); -// ASSERT(reference_mat[2][0] == 0); -// ASSERT(reference_mat[2][1] == 0); -// ASSERT(reference_mat[3][0] == 0); -// ASSERT(reference_mat[3][1] == 0); -// ASSERT(reference_mat[3][3] == 0); -// -// for (u32 i = 0; i < m_next_free_frag; i++) { -// std::array mat; -// memcpy(&mat, m_fragments[i].header, 64); -// ASSERT(mat == reference_mat); -// } + // ASSERT(reference_mat[0][1] == 0); + // ASSERT(reference_mat[0][2] == 0); + // ASSERT(reference_mat[0][3] == 0); + // ASSERT(reference_mat[1][0] == 0); + // ASSERT(reference_mat[1][2] == 0); + // ASSERT(reference_mat[1][3] == 0); + // ASSERT(reference_mat[2][0] == 0); + // ASSERT(reference_mat[2][1] == 0); + // ASSERT(reference_mat[3][0] == 0); + // ASSERT(reference_mat[3][1] == 0); + // ASSERT(reference_mat[3][3] == 0); + // + // for (u32 i = 0; i < m_next_free_frag; i++) { + // std::array mat; + // memcpy(&mat, m_fragments[i].header, 64); + // ASSERT(mat == reference_mat); + // } } /*! diff --git a/game/graphics/opengl_renderer/foreground/Generic2_DMA.cpp b/game/graphics/opengl_renderer/foreground/Generic2_DMA.cpp index 9020944a5e..2278d493f6 100644 --- a/game/graphics/opengl_renderer/foreground/Generic2_DMA.cpp +++ b/game/graphics/opengl_renderer/foreground/Generic2_DMA.cpp @@ -154,7 +154,6 @@ u32 unpack_vtx_tcs(Generic2::Vertex* vtx, const u8* data, int vtx_count) { u32 Generic2::handle_fragments_after_unpack_v4_32(const u8* data, u32 off, u32 first_unpack_bytes, - u32 next_bucket, u32 end_of_vif, Fragment* frag, bool loop) { @@ -352,7 +351,7 @@ void Generic2::process_dma(DmaFollower& dma, u32 next_bucket) { ASSERT(v1.kind == VifCode::Kind::UNPACK_V4_32); u32 unpack_bytes = v1.num * 16; auto& frag = next_frag(); - u32 off = handle_fragments_after_unpack_v4_32(vif_transfer.data, 0, unpack_bytes, next_bucket, + u32 off = handle_fragments_after_unpack_v4_32(vif_transfer.data, 0, unpack_bytes, vif_transfer.size_bytes, &frag, false); if (check_for_end_of_generic_data(dma, next_bucket)) { @@ -373,8 +372,7 @@ void Generic2::process_dma(DmaFollower& dma, u32 next_bucket) { auto& continue_frag = next_frag(); off = handle_fragments_after_unpack_v4_32(vif_transfer.data, off, next_unpack.num * 16, - next_bucket, vif_transfer.size_bytes, - &continue_frag, true); + vif_transfer.size_bytes, &continue_frag, true); continued_fragment = &continue_frag; ASSERT(off == vif_transfer.size_bytes); // } diff --git a/game/graphics/opengl_renderer/foreground/Generic2_OpenGL.cpp b/game/graphics/opengl_renderer/foreground/Generic2_OpenGL.cpp index 1c9334b113..816b123597 100644 --- a/game/graphics/opengl_renderer/foreground/Generic2_OpenGL.cpp +++ b/game/graphics/opengl_renderer/foreground/Generic2_OpenGL.cpp @@ -150,7 +150,7 @@ void Generic2::setup_opengl_for_draw_mode(const DrawMode& draw_mode, // (Cs - 0) * Ad + Cd glBlendFunc(GL_DST_ALPHA, GL_ONE); glBlendEquation(GL_FUNC_ADD); - color_mult = 0.5; + color_mult = 0.5f; // HACK, should probably be 0.5 } else { ASSERT(false); } @@ -245,6 +245,24 @@ void Generic2::setup_opengl_tex(u16 unit, } } +void Generic2::do_draws_for_alpha(SharedRenderState* render_state, + ScopedProfilerNode& prof, + DrawMode::AlphaBlend alpha) { + for (u32 i = 0; i < m_next_free_bucket; i++) { + auto& bucket = m_buckets[i]; + auto& first = m_adgifs[bucket.start]; + if (first.mode.get_alpha_blend() == alpha) { + setup_opengl_for_draw_mode(first.mode, first.fix, render_state); + setup_opengl_tex(0, first.tbp, first.mode.get_filt_enable(), first.mode.get_clamp_s_enable(), + first.mode.get_clamp_t_enable(), render_state); + glDrawElements(GL_TRIANGLES, bucket.idx_count, GL_UNSIGNED_INT, + (void*)(sizeof(u32) * bucket.idx_idx)); + prof.add_draw_call(); + prof.add_tri(bucket.idx_count / 3); + } + } +} + void Generic2::do_draws(SharedRenderState* render_state, ScopedProfilerNode& prof) { glBindVertexArray(m_ogl.vao); glBindBuffer(GL_ARRAY_BUFFER, m_ogl.vertex_buffer); @@ -253,24 +271,19 @@ void Generic2::do_draws(SharedRenderState* render_state, ScopedProfilerNode& pro GL_STREAM_DRAW); glBufferData(GL_ARRAY_BUFFER, m_next_free_vert * sizeof(Vertex), m_verts.data(), GL_STREAM_DRAW); - // hack - // glDisable(GL_DEPTH_TEST); - // glDisable(GL_BLEND); - glEnable(GL_PRIMITIVE_RESTART); glPrimitiveRestartIndex(UINT32_MAX); opengl_bind(render_state); + constexpr DrawMode::AlphaBlend alpha_order[ALPHA_MODE_COUNT] = { + DrawMode::AlphaBlend::SRC_SRC_SRC_SRC, DrawMode::AlphaBlend::SRC_DST_SRC_DST, + DrawMode::AlphaBlend::SRC_0_SRC_DST, DrawMode::AlphaBlend::ZERO_SRC_SRC_DST, + DrawMode::AlphaBlend::SRC_DST_FIX_DST, DrawMode::AlphaBlend::SRC_0_DST_DST, + }; - for (u32 i = 0; i < m_next_free_bucket; i++) { - auto& bucket = m_buckets[i]; - auto& first = m_adgifs[bucket.start]; - setup_opengl_for_draw_mode(first.mode, first.fix, render_state); - setup_opengl_tex(0, first.tbp, first.mode.get_filt_enable(), first.mode.get_clamp_s_enable(), - first.mode.get_clamp_t_enable(), render_state); - glDrawElements(GL_TRIANGLES, bucket.idx_count, GL_UNSIGNED_INT, - (void*)(sizeof(u32) * bucket.idx_idx)); - prof.add_draw_call(); - prof.add_tri(bucket.idx_count / 3); + for (int i = 0; i < ALPHA_MODE_COUNT; i++) { + if (m_alpha_draw_enable[i]) { + do_draws_for_alpha(render_state, prof, alpha_order[i]); + } } } \ No newline at end of file diff --git a/game/graphics/opengl_renderer/shaders/generic.frag b/game/graphics/opengl_renderer/shaders/generic.frag index d150aeabb4..622a789b11 100644 --- a/game/graphics/opengl_renderer/shaders/generic.frag +++ b/game/graphics/opengl_renderer/shaders/generic.frag @@ -73,6 +73,6 @@ void main() { discard; } if ((tex_info.y & 4u) != 0) { - color.xyz = mix(color.xyz, fog_color.xyz / 255., clamp(fog_color.w * (1 - fog), 0, 1)); + color.xyz = mix(color.xyz, fog_color.xyz / 255., clamp(fog_color.w * (fog), 0, 1)); } } \ No newline at end of file diff --git a/game/graphics/opengl_renderer/shaders/generic.vert b/game/graphics/opengl_renderer/shaders/generic.vert index f6bcd62f22..e385624dbe 100644 --- a/game/graphics/opengl_renderer/shaders/generic.vert +++ b/game/graphics/opengl_renderer/shaders/generic.vert @@ -48,7 +48,7 @@ void main() { float fog1 = -transformed.w + hvdf_offset.w; float fog2 = min(fog1, fog_constants.z); float fog3 = max(fog2, fog_constants.y); - fog = 255; // 255 - fog3; TODO + fog = 1 - (fog3/256); // itof12.xyz vf18, vf22 texture int to float // vu.vf18.itof12(Mask::xyz, vu.vf22); @@ -83,6 +83,6 @@ void main() { // scissoring area adjust gl_Position.y *= 512.0/448.0; - fragment_color = rgba_in; + fragment_color = vec4(rgba_in.x, rgba_in.y, rgba_in.z, rgba_in.w * 2.); tex_info = byte_info.xy; } \ No newline at end of file From 5f25e179dc6ffa69e748d4bdab109a32857bd70c Mon Sep 17 00:00:00 2001 From: water Date: Sun, 6 Mar 2022 14:43:42 -0500 Subject: [PATCH 08/12] fix hud, strip --- common/math/Vector.h | 6 + common/util/Timer.cpp | 2 +- common/util/Timer.h | 8 +- game/graphics/opengl_renderer/AdgifHandler.h | 1 - .../graphics/opengl_renderer/BucketRenderer.h | 13 ++ .../opengl_renderer/DirectRenderer2.cpp | 38 ++--- .../opengl_renderer/DirectRenderer2.h | 7 +- .../opengl_renderer/GenericProgram.cpp | 8 +- .../opengl_renderer/GenericRenderer.cpp | 20 +-- .../opengl_renderer/GenericRenderer.h | 5 +- .../opengl_renderer/OpenGLRenderer.cpp | 135 +++++++++++------- .../graphics/opengl_renderer/OpenGLRenderer.h | 9 +- game/graphics/opengl_renderer/Profiler.h | 2 + .../opengl_renderer/foreground/Generic2.cpp | 16 +-- .../opengl_renderer/foreground/Generic2.h | 24 +++- .../foreground/Generic2_Build.cpp | 95 +++++++----- .../foreground/Generic2_OpenGL.cpp | 39 +++-- .../opengl_renderer/shaders/generic.vert | 3 +- 18 files changed, 266 insertions(+), 165 deletions(-) diff --git a/common/math/Vector.h b/common/math/Vector.h index 1994c294ca..6774f64bbf 100644 --- a/common/math/Vector.h +++ b/common/math/Vector.h @@ -216,6 +216,12 @@ class Vector { Vector xyz() const { return head<3>(); } Vector xy() const { return head<2>(); } + void fill(const T& val) { + for (auto& x : m_data) { + x = val; + } + } + private: T m_data[Size]; }; diff --git a/common/util/Timer.cpp b/common/util/Timer.cpp index 4ac44ab25c..9aa870baf0 100644 --- a/common/util/Timer.cpp +++ b/common/util/Timer.cpp @@ -42,7 +42,7 @@ void Timer::start() { #endif } -int64_t Timer::getNs() { +int64_t Timer::getNs() const { struct timespec now = {}; #ifdef __linux__ clock_gettime(CLOCK_MONOTONIC, &now); diff --git a/common/util/Timer.h b/common/util/Timer.h index 9f4f21a714..074e6ec906 100644 --- a/common/util/Timer.h +++ b/common/util/Timer.h @@ -26,19 +26,19 @@ class Timer { /*! * Get milliseconds elapsed */ - double getMs() { return (double)getNs() / 1.e6; } + double getMs() const { return (double)getNs() / 1.e6; } - double getUs() { return (double)getNs() / 1.e3; } + double getUs() const { return (double)getNs() / 1.e3; } /*! * Get nanoseconds elapsed */ - int64_t getNs(); + int64_t getNs() const; /*! * Get seconds elapsed */ - double getSeconds() { return (double)getNs() / 1.e9; } + double getSeconds() const { return (double)getNs() / 1.e9; } struct timespec _startTime = {}; }; diff --git a/game/graphics/opengl_renderer/AdgifHandler.h b/game/graphics/opengl_renderer/AdgifHandler.h index b90959df7f..884e97836e 100644 --- a/game/graphics/opengl_renderer/AdgifHandler.h +++ b/game/graphics/opengl_renderer/AdgifHandler.h @@ -18,7 +18,6 @@ class AdgifHelper { m_alpha = GsAlpha(m_data.alpha_data); } - bool is_normal_adgif() const { return (u8)m_data.tex0_addr == (u32)GsRegisterAddress::TEX0_1 && (u8)m_data.tex1_addr == (u32)GsRegisterAddress::TEX1_1 && diff --git a/game/graphics/opengl_renderer/BucketRenderer.h b/game/graphics/opengl_renderer/BucketRenderer.h index 581cbd34cc..4d030071de 100644 --- a/game/graphics/opengl_renderer/BucketRenderer.h +++ b/game/graphics/opengl_renderer/BucketRenderer.h @@ -61,6 +61,19 @@ enum class BucketId { MAX_BUCKETS = 69 }; +enum class BucketCategory { + SKY, + TFRAG, + TIE, + TEX, + MERC, + GENERIC_MERC, + SPRITE, + DEBUG_DRAW, + UNUSED, + MAX_CATEGORIES +}; + struct LevelVis { bool valid = false; u8 data[2048]; diff --git a/game/graphics/opengl_renderer/DirectRenderer2.cpp b/game/graphics/opengl_renderer/DirectRenderer2.cpp index 6be8c3cabc..7f4d31533c 100644 --- a/game/graphics/opengl_renderer/DirectRenderer2.cpp +++ b/game/graphics/opengl_renderer/DirectRenderer2.cpp @@ -87,7 +87,6 @@ void DirectRenderer2::reset_buffers() { m_vertices.next_index = 0; m_vertices.next_vertex = 0; m_state.next_vertex_starts_strip = true; - m_state.strip_warmup = 0; m_current_state_has_open_draw = false; } @@ -168,14 +167,16 @@ void DirectRenderer2::draw_call_loop_simple(SharedRenderState* render_state, } else { end_idx = m_draw_buffer[draw_idx + 1].start_index; } - glDrawElements(GL_TRIANGLES, end_idx - draw.start_index, GL_UNSIGNED_INT, (void*)offset); + glDrawElements(GL_TRIANGLE_STRIP, end_idx - draw.start_index, GL_UNSIGNED_INT, (void*)offset); prof.add_draw_call(); - prof.add_tri((end_idx - draw.start_index) / 3); + prof.add_tri((end_idx - draw.start_index) - 2); } } void DirectRenderer2::draw_call_loop_grouped(SharedRenderState* render_state, ScopedProfilerNode& prof) { + glEnable(GL_PRIMITIVE_RESTART); + glPrimitiveRestartIndex(UINT32_MAX); u32 draw_idx = 0; while (draw_idx < m_next_free_draw) { const auto& draw = m_draw_buffer[draw_idx]; @@ -213,7 +214,7 @@ void DirectRenderer2::draw_call_loop_grouped(SharedRenderState* render_state, // fmt::print("drawing {:4d} with abe {} tex {} {}", end_idx - draw.start_index, // (int)draw.mode.get_ab_enable(), end_of_draw_group - draw_idx, draw.to_single_line_string() ); // fmt::print("{}\n", draw.mode.to_string()); - glDrawElements(GL_TRIANGLES, end_idx - draw.start_index, GL_UNSIGNED_INT, (void*)offset); + glDrawElements(GL_TRIANGLE_STRIP, end_idx - draw.start_index, GL_UNSIGNED_INT, (void*)offset); prof.add_draw_call(); prof.add_tri((end_idx - draw.start_index) / 3); draw_idx = end_of_draw_group + 1; @@ -680,16 +681,18 @@ void DirectRenderer2::handle_xyzf2_packed(const u8* data, if (m_state.next_vertex_starts_strip) { m_state.next_vertex_starts_strip = false; - m_state.strip_warmup = 0; + m_vertices.indices[m_vertices.next_index++] = UINT32_MAX; } // push the vertex auto& vert = m_vertices.vertices[m_vertices.next_vertex++]; - m_state.strip_warmup++; - if (adc && m_state.strip_warmup >= 3) { - m_vertices.indices[m_vertices.next_index++] = m_vertices.next_vertex - 1; - m_vertices.indices[m_vertices.next_index++] = m_vertices.next_vertex - 2; - m_vertices.indices[m_vertices.next_index++] = m_vertices.next_vertex - 3; + auto vidx = m_vertices.next_vertex - 1; + if (adc) { + m_vertices.indices[m_vertices.next_index++] = vidx; + } else { + m_vertices.indices[m_vertices.next_index++] = UINT32_MAX; + m_vertices.indices[m_vertices.next_index++] = vidx - 1; + m_vertices.indices[m_vertices.next_index++] = vidx; } if (!m_current_state_has_open_draw) { @@ -745,16 +748,19 @@ void DirectRenderer2::handle_xyzf2_mod_packed(const u8* data, if (m_state.next_vertex_starts_strip) { m_state.next_vertex_starts_strip = false; - m_state.strip_warmup = 0; + m_vertices.indices[m_vertices.next_index++] = UINT32_MAX; } // push the vertex auto& vert = m_vertices.vertices[m_vertices.next_vertex++]; - m_state.strip_warmup++; - if (adc && m_state.strip_warmup >= 3) { - m_vertices.indices[m_vertices.next_index++] = m_vertices.next_vertex - 1; - m_vertices.indices[m_vertices.next_index++] = m_vertices.next_vertex - 2; - m_vertices.indices[m_vertices.next_index++] = m_vertices.next_vertex - 3; + + auto vidx = m_vertices.next_vertex - 1; + if (adc) { + m_vertices.indices[m_vertices.next_index++] = vidx; + } else { + m_vertices.indices[m_vertices.next_index++] = UINT32_MAX; + m_vertices.indices[m_vertices.next_index++] = vidx - 1; + m_vertices.indices[m_vertices.next_index++] = vidx; } if (!m_current_state_has_open_draw) { diff --git a/game/graphics/opengl_renderer/DirectRenderer2.h b/game/graphics/opengl_renderer/DirectRenderer2.h index 29369225f0..c99b46905a 100644 --- a/game/graphics/opengl_renderer/DirectRenderer2.h +++ b/game/graphics/opengl_renderer/DirectRenderer2.h @@ -7,7 +7,11 @@ class DirectRenderer2 { public: - DirectRenderer2(u32 max_verts, u32 max_inds, u32 max_draws, const std::string& name, bool use_ftoi_mod); + DirectRenderer2(u32 max_verts, + u32 max_inds, + u32 max_draws, + const std::string& name, + bool use_ftoi_mod); void init_shaders(ShaderLibrary& shaders); void reset_state(); void render_gif_data(const u8* data, SharedRenderState* render_state, ScopedProfilerNode& prof); @@ -35,7 +39,6 @@ class DirectRenderer2 { float s, t, Q; math::Vector rgba; bool next_vertex_starts_strip = true; - u32 strip_warmup = 0; u8 vertex_flags = 0; void set_tcc_flag(bool value) { vertex_flags ^= (-(u8)value ^ vertex_flags) & 1; } void set_decal_flag(bool value) { vertex_flags ^= (-(u8)value ^ vertex_flags) & 2; } diff --git a/game/graphics/opengl_renderer/GenericProgram.cpp b/game/graphics/opengl_renderer/GenericProgram.cpp index 54a987ec82..3ac9886654 100644 --- a/game/graphics/opengl_renderer/GenericProgram.cpp +++ b/game/graphics/opengl_renderer/GenericProgram.cpp @@ -772,7 +772,7 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S vu.vf31.mfir(Mask::z, vu.vi07); if (!clipping_hack && bc) { goto L49; } - L48: + // L48: // div Q, vf01.x, vf14.w | nop 524 vu.Q = gen.fog.x() / gen.vtx_p2.w(); // lq.xyzw vf23, 998(vi00) | nop 525 @@ -844,7 +844,7 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S vu.vf31.mfir(Mask::z, vu.vi07); if (!clipping_hack && bc) { goto L53; } - L52: + // L52: // div Q, vf01.x, vf15.w | nop 607 vu.Q = gen.fog.x() / gen.vtx_p3.w(); // lq.xyzw vf23, 998(vi00) | nop 608 @@ -916,7 +916,7 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S vu.vf31.mfir(Mask::z, vu.vi07); if (!clipping_hack && bc) { goto L57; } - L56: + // L56: // div Q, vf01.x, vf12.w | nop 690 vu.Q = gen.fog.x() / gen.vtx_p0.w(); // lq.xyzw vf23, 998(vi00) | nop 691 @@ -988,7 +988,7 @@ void GenericRenderer::mscal_dispatch(int imm, SharedRenderState* render_state, S vu.vf31.mfir(Mask::z, vu.vi07); if (!clipping_hack && bc) { goto L61; } - L60: + // L60: // div Q, vf01.x, vf13.w | nop 773 vu.Q = gen.fog.x() / gen.vtx_p1.w(); // lq.xyzw vf23, 998(vi00) | nop 774 diff --git a/game/graphics/opengl_renderer/GenericRenderer.cpp b/game/graphics/opengl_renderer/GenericRenderer.cpp index 7c0353849e..1f66576c09 100644 --- a/game/graphics/opengl_renderer/GenericRenderer.cpp +++ b/game/graphics/opengl_renderer/GenericRenderer.cpp @@ -3,9 +3,8 @@ GenericRenderer::GenericRenderer(const std::string& name, BucketId my_id) : BucketRenderer(name, my_id), - m_direct(name, my_id, 0x30000), m_direct2(30000, 60000, 1000, name, true), - m_debug_gen2(name, my_id, 50000, 1000, 1000, 800) {} + m_debug_gen2(name, my_id, 1500000, 10000, 3000, 800) {} void GenericRenderer::init_shaders(ShaderLibrary& shaders) { m_direct2.init_shaders(shaders); @@ -22,7 +21,6 @@ void GenericRenderer::render(DmaFollower& dma, m_xgkick_idx = 0; m_skipped_tags = 0; m_debug.clear(); - m_direct.reset_state(); // if the first draw should have no blending, it sets ABE in PRIM, but not ALPHA. // the default ALPHA doesn't seem to be right. I don't know what's supposed to set it here. @@ -38,9 +36,6 @@ void GenericRenderer::render(DmaFollower& dma, return; } - // todo remove - DmaFollower gen2_follower = dma; - while (dma.current_tag_offset() != render_state->next_bucket) { auto data = dma.read_and_advance(); m_debug += fmt::format("{} : {} {}\n", data.size_bytes, data.vifcode0().print(), @@ -75,15 +70,11 @@ void GenericRenderer::render(DmaFollower& dma, } else if (v0.kind == VifCode::Kind::FLUSHA && v1.kind == VifCode::Kind::DIRECT) { if (render_state->use_direct2) { m_direct2.render_gif_data(data.data, render_state, prof); - } else { - m_direct.render_gif(data.data, data.size_bytes, render_state, prof); } ASSERT(v1.immediate == data.size_bytes / 16); } else if (v0.kind == VifCode::Kind::NOP && v1.kind == VifCode::Kind::DIRECT) { if (render_state->use_direct2) { m_direct2.render_gif_data(data.data, render_state, prof); - } else { - m_direct.render_gif(data.data, data.size_bytes, render_state, prof); } ASSERT(v1.immediate == data.size_bytes / 16); } else if (v0.kind == VifCode::Kind::STCYCL && v1.kind == VifCode::Kind::UNPACK_V4_32) { @@ -148,8 +139,6 @@ void GenericRenderer::render(DmaFollower& dma, } if (render_state->use_direct2) { m_direct2.flush_pending(render_state, prof); - } else { - m_direct.flush_pending(render_state, prof); } } @@ -202,7 +191,6 @@ void GenericRenderer::handle_dma_stream(const u8* data, } void GenericRenderer::draw_debug_window() { - ImGui::Text("Skipped %d tags", m_skipped_tags); ImGui::InputInt("kick min", &m_min_xgkick); ImGui::InputInt("kick max", &m_max_xgkick); @@ -211,10 +199,6 @@ void GenericRenderer::draw_debug_window() { m_debug_gen2.draw_debug_window(); ImGui::TreePop(); } - if (ImGui::TreeNode("Direct")) { - m_direct.draw_debug_window(); - ImGui::TreePop(); - } } u32 GenericRenderer::unpack32_4(const VifCodeUnpack& up, const u8* data, u32 imm) { @@ -332,8 +316,6 @@ void GenericRenderer::xgkick(u16 addr, SharedRenderState* render_state, ScopedPr if (!render_state->use_generic2) { if (render_state->use_direct2) { m_direct2.render_gif_data(m_buffer.data + (16 * addr), render_state, prof); - } else { - m_direct.render_gif(m_buffer.data + (16 * addr), UINT32_MAX, render_state, prof); } } } diff --git a/game/graphics/opengl_renderer/GenericRenderer.h b/game/graphics/opengl_renderer/GenericRenderer.h index 02b67cc502..ce994ffa08 100644 --- a/game/graphics/opengl_renderer/GenericRenderer.h +++ b/game/graphics/opengl_renderer/GenericRenderer.h @@ -1,7 +1,6 @@ #pragma once #include "game/graphics/opengl_renderer/BucketRenderer.h" -#include "game/graphics/opengl_renderer/DirectRenderer.h" #include "game/graphics/opengl_renderer/DirectRenderer2.h" #include "game/graphics/opengl_renderer/foreground/Generic2.h" #include "game/common/vu.h" @@ -42,7 +41,6 @@ class GenericRenderer : public BucketRenderer { } int m_skipped_tags = 0; - DirectRenderer m_direct; DirectRenderer2 m_direct2; Generic2 m_debug_gen2; std::string m_debug; @@ -50,8 +48,7 @@ class GenericRenderer : public BucketRenderer { struct Vu { u32 row[4]; u32 stcycl = 0; - Vf vf03, vf18, vf19, vf20, vf21, vf22, vf23, vf24, vf25, vf26, vf27, vf28, vf29, - vf30, vf31; + Vf vf03, vf18, vf19, vf20, vf21, vf22, vf23, vf24, vf25, vf26, vf27, vf28, vf29, vf30, vf31; const Vf vf00; u16 vi01, vi02, vi03, vi04, vi05, vi06, vi07, vi09, vi08, vi11, vi12, vi13, vi10, vi14, vi15; float I, P, Q; diff --git a/game/graphics/opengl_renderer/OpenGLRenderer.cpp b/game/graphics/opengl_renderer/OpenGLRenderer.cpp index 5cf07202e3..5a6c9c35f4 100644 --- a/game/graphics/opengl_renderer/OpenGLRenderer.cpp +++ b/game/graphics/opengl_renderer/OpenGLRenderer.cpp @@ -69,6 +69,7 @@ OpenGLRenderer::OpenGLRenderer(std::shared_ptr texture_pool, * Construct bucket renderers. We can specify different renderers for different buckets */ void OpenGLRenderer::init_bucket_renderers() { + m_bucket_categories.fill(BucketCategory::UNUSED); std::vector normal_tfrags = {tfrag3::TFragmentTreeKind::NORMAL, tfrag3::TFragmentTreeKind::LOWRES}; std::vector dirt_tfrags = {tfrag3::TFragmentTreeKind::DIRT}; @@ -82,37 +83,45 @@ void OpenGLRenderer::init_bucket_renderers() { // 0 // 1 // 2 - init_bucket_renderer("sky", BucketId::SKY_DRAW); // 3 + init_bucket_renderer("sky", BucketCategory::SKY, BucketId::SKY_DRAW); // 3 // 4 //----------------------- // LEVEL 0 tfrag texture //----------------------- - init_bucket_renderer("l0-tfrag-tex", BucketId::TFRAG_TEX_LEVEL0); // 5 - init_bucket_renderer("l0-tfrag-tfrag", BucketId::TFRAG_LEVEL0, normal_tfrags, false, + init_bucket_renderer("l0-tfrag-tex", BucketCategory::TEX, + BucketId::TFRAG_TEX_LEVEL0); // 5 + init_bucket_renderer("l0-tfrag-tfrag", BucketCategory::TFRAG, BucketId::TFRAG_LEVEL0, + normal_tfrags, false, 0); // 6 // 7 // 8 - init_bucket_renderer("l0-tfrag-tie", BucketId::TIE_LEVEL0, 0); // 9 - init_bucket_renderer("l0-tfrag-merc", BucketId::MERC_TFRAG_TEX_LEVEL0); // 10 - init_bucket_renderer("l0-tfrag-gmerc", BucketId::GMERC_TFRAG_TEX_LEVEL0); // 11 + init_bucket_renderer("l0-tfrag-tie", BucketCategory::TIE, BucketId::TIE_LEVEL0, 0); // 9 + init_bucket_renderer("l0-tfrag-merc", BucketCategory::MERC, + BucketId::MERC_TFRAG_TEX_LEVEL0); // 10 + init_bucket_renderer("l0-tfrag-gmerc", BucketCategory::GENERIC_MERC, + BucketId::GMERC_TFRAG_TEX_LEVEL0); // 11 //----------------------- // LEVEL 1 tfrag texture //----------------------- - init_bucket_renderer("l1-tfrag-tex", BucketId::TFRAG_TEX_LEVEL1); // 12 - init_bucket_renderer("l1-tfrag-tfrag", BucketId::TFRAG_LEVEL1, normal_tfrags, false, - 1); + init_bucket_renderer("l1-tfrag-tex", BucketCategory::TEX, + BucketId::TFRAG_TEX_LEVEL1); // 12 + init_bucket_renderer("l1-tfrag-tfrag", BucketCategory::TFRAG, BucketId::TFRAG_LEVEL1, + normal_tfrags, false, 1); // 14 // 15 - init_bucket_renderer("l1-tfrag-tie", BucketId::TIE_LEVEL1, 1); - init_bucket_renderer("l1-tfrag-merc", BucketId::MERC_TFRAG_TEX_LEVEL1); // 17 - init_bucket_renderer("l1-tfrag-gmerc", BucketId::GMERC_TFRAG_TEX_LEVEL1); // 18 + init_bucket_renderer("l1-tfrag-tie", BucketCategory::TIE, BucketId::TIE_LEVEL1, 1); + init_bucket_renderer("l1-tfrag-merc", BucketCategory::MERC, + BucketId::MERC_TFRAG_TEX_LEVEL1); // 17 + init_bucket_renderer("l1-tfrag-gmerc", BucketCategory::GENERIC_MERC, + BucketId::GMERC_TFRAG_TEX_LEVEL1); // 18 //----------------------- // LEVEL 0 shrub texture //----------------------- - init_bucket_renderer("l0-shrub-tex", BucketId::SHRUB_TEX_LEVEL0); // 19 + init_bucket_renderer("l0-shrub-tex", BucketCategory::TEX, + BucketId::SHRUB_TEX_LEVEL0); // 19 // 20 // 21 // 22 @@ -122,84 +131,105 @@ void OpenGLRenderer::init_bucket_renderers() { //----------------------- // LEVEL 1 shrub texture //----------------------- - init_bucket_renderer("l1-shrub-tex", BucketId::SHRUB_TEX_LEVEL1); // 25 + init_bucket_renderer("l1-shrub-tex", BucketCategory::TEX, + BucketId::SHRUB_TEX_LEVEL1); // 25 // 26 // 27 // 28 // 29 // I don't think this is actually used? or it might be wrong. - init_bucket_renderer("common-shrub-generic", BucketId::GENERIC_SHRUB); // 30 + init_bucket_renderer("common-shrub-generic", BucketCategory::GENERIC_MERC, + BucketId::GENERIC_SHRUB); // 30 //----------------------- // LEVEL 0 alpha texture //----------------------- - init_bucket_renderer("l0-alpha-tex", BucketId::ALPHA_TEX_LEVEL0); // 31 - init_bucket_renderer("l0-alpha-sky-blend-and-tfrag-trans", + init_bucket_renderer("l0-alpha-tex", BucketCategory::TEX, + BucketId::ALPHA_TEX_LEVEL0); // 31 + init_bucket_renderer("l0-alpha-sky-blend-and-tfrag-trans", BucketCategory::SKY, BucketId::TFRAG_TRANS0_AND_SKY_BLEND_LEVEL0, 0, sky_gpu_blender, sky_cpu_blender); // 32 // 33 - init_bucket_renderer("l0-alpha-tfrag", BucketId::TFRAG_DIRT_LEVEL0, dirt_tfrags, false, + init_bucket_renderer("l0-alpha-tfrag", BucketCategory::TFRAG, + BucketId::TFRAG_DIRT_LEVEL0, dirt_tfrags, false, 0); // 34 // 35 - init_bucket_renderer("l0-alpha-tfrag-ice", BucketId::TFRAG_ICE_LEVEL0, ice_tfrags, - false, 0); + init_bucket_renderer("l0-alpha-tfrag-ice", BucketCategory::TFRAG, + BucketId::TFRAG_ICE_LEVEL0, ice_tfrags, false, 0); // 37 //----------------------- // LEVEL 1 alpha texture //----------------------- - init_bucket_renderer("l1-alpha-tex", BucketId::ALPHA_TEX_LEVEL1); // 38 - init_bucket_renderer("l1-alpha-sky-blend-and-tfrag-trans", + init_bucket_renderer("l1-alpha-tex", BucketCategory::TEX, + BucketId::ALPHA_TEX_LEVEL1); // 38 + init_bucket_renderer("l1-alpha-sky-blend-and-tfrag-trans", BucketCategory::SKY, BucketId::TFRAG_TRANS1_AND_SKY_BLEND_LEVEL1, 1, sky_gpu_blender, sky_cpu_blender); // 39 // 40 - init_bucket_renderer("l1-alpha-tfrag-dirt", BucketId::TFRAG_DIRT_LEVEL1, dirt_tfrags, - false, + init_bucket_renderer("l1-alpha-tfrag-dirt", BucketCategory::TFRAG, + BucketId::TFRAG_DIRT_LEVEL1, dirt_tfrags, false, 1); // 41 // 42 - init_bucket_renderer("l1-alpha-tfrag-ice", BucketId::TFRAG_ICE_LEVEL1, ice_tfrags, - false, 1); + init_bucket_renderer("l1-alpha-tfrag-ice", BucketCategory::TFRAG, + BucketId::TFRAG_ICE_LEVEL1, ice_tfrags, false, 1); // 44 - init_bucket_renderer("common-alpha-merc", BucketId::MERC_AFTER_ALPHA); + init_bucket_renderer("common-alpha-merc", BucketCategory::MERC, + BucketId::MERC_AFTER_ALPHA); - init_bucket_renderer("common-alpha-generic", BucketId::GENERIC_ALPHA); // 46 + init_bucket_renderer("common-alpha-generic", BucketCategory::GENERIC_MERC, + BucketId::GENERIC_ALPHA); // 46 // 47? //----------------------- // LEVEL 0 pris texture //----------------------- - init_bucket_renderer("l0-pris-tex", BucketId::PRIS_TEX_LEVEL0); // 48 - init_bucket_renderer("l0-pris-merc", BucketId::MERC_PRIS_LEVEL0); // 49 - init_bucket_renderer("l0-pris-generic", BucketId::GENERIC_PRIS_LEVEL0); // 50 + init_bucket_renderer("l0-pris-tex", BucketCategory::TEX, + BucketId::PRIS_TEX_LEVEL0); // 48 + init_bucket_renderer("l0-pris-merc", BucketCategory::MERC, + BucketId::MERC_PRIS_LEVEL0); // 49 + init_bucket_renderer("l0-pris-generic", BucketCategory::GENERIC_MERC, + BucketId::GENERIC_PRIS_LEVEL0); // 50 //----------------------- // LEVEL 1 pris texture //----------------------- - init_bucket_renderer("l1-pris-tex", BucketId::PRIS_TEX_LEVEL1); // 51 - init_bucket_renderer("l1-pris-merc", BucketId::MERC_PRIS_LEVEL1); // 52 - init_bucket_renderer("l1-pris-generic", BucketId::GENERIC_PRIS_LEVEL1); // 53 + init_bucket_renderer("l1-pris-tex", BucketCategory::TEX, + BucketId::PRIS_TEX_LEVEL1); // 51 + init_bucket_renderer("l1-pris-merc", BucketCategory::MERC, + BucketId::MERC_PRIS_LEVEL1); // 52 + init_bucket_renderer("l1-pris-generic", BucketCategory::GENERIC_MERC, + BucketId::GENERIC_PRIS_LEVEL1); // 53 // other renderers may output to the eye renderer - m_render_state.eye_renderer = - init_bucket_renderer("common-pris-eyes", BucketId::MERC_EYES_AFTER_PRIS); // 54 - init_bucket_renderer("common-pris-merc", BucketId::MERC_AFTER_PRIS); // 55 - init_bucket_renderer("common-pris-generic", BucketId::GENERIC_PRIS); // 56 + m_render_state.eye_renderer = init_bucket_renderer( + "common-pris-eyes", BucketCategory::SKY, BucketId::MERC_EYES_AFTER_PRIS); // 54 + init_bucket_renderer("common-pris-merc", BucketCategory::MERC, + BucketId::MERC_AFTER_PRIS); // 55 + init_bucket_renderer("common-pris-generic", BucketCategory::GENERIC_MERC, + BucketId::GENERIC_PRIS); // 56 //----------------------- // LEVEL 0 water texture //----------------------- - init_bucket_renderer("l0-water-tex", BucketId::WATER_TEX_LEVEL0); // 57 - init_bucket_renderer("l0-water-merc", BucketId::MERC_WATER_LEVEL0); // 58 - init_bucket_renderer("l0-water-generic", BucketId::GENERIC_WATER_LEVEL0); // 59 + init_bucket_renderer("l0-water-tex", BucketCategory::TEX, + BucketId::WATER_TEX_LEVEL0); // 57 + init_bucket_renderer("l0-water-merc", BucketCategory::MERC, + BucketId::MERC_WATER_LEVEL0); // 58 + init_bucket_renderer("l0-water-generic", BucketCategory::GENERIC_MERC, + BucketId::GENERIC_WATER_LEVEL0); // 59 //----------------------- // LEVEL 1 water texture //----------------------- - init_bucket_renderer("l1-water-tex", BucketId::WATER_TEX_LEVEL1); // 60 - init_bucket_renderer("l1-water-merc", BucketId::MERC_WATER_LEVEL1); // 61 - init_bucket_renderer("l1-water-generic", BucketId::GENERIC_WATER_LEVEL1); // 62 + init_bucket_renderer("l1-water-tex", BucketCategory::TEX, + BucketId::WATER_TEX_LEVEL1); // 60 + init_bucket_renderer("l1-water-merc", BucketCategory::MERC, + BucketId::MERC_WATER_LEVEL1); // 61 + init_bucket_renderer("l1-water-generic", BucketCategory::GENERIC_MERC, + BucketId::GENERIC_WATER_LEVEL1); // 62 // 63? // 64? @@ -207,21 +237,26 @@ void OpenGLRenderer::init_bucket_renderers() { //----------------------- // COMMON texture //----------------------- - init_bucket_renderer("common-tex", BucketId::PRE_SPRITE_TEX); // 65 + init_bucket_renderer("common-tex", BucketCategory::TEX, + BucketId::PRE_SPRITE_TEX); // 65 std::vector> sprite_renderers; // the first renderer added will be the default for sprite. sprite_renderers.push_back(std::make_unique("sprite-3", BucketId::SPRITE)); sprite_renderers.push_back(std::make_unique("sprite-renderer", BucketId::SPRITE)); - init_bucket_renderer("sprite", BucketId::SPRITE, std::move(sprite_renderers)); // 66 + init_bucket_renderer("sprite", BucketCategory::SPRITE, BucketId::SPRITE, + std::move(sprite_renderers)); // 66 - init_bucket_renderer("debug-draw-0", BucketId::DEBUG_DRAW_0, 0x20000); - init_bucket_renderer("debug-draw-1", BucketId::DEBUG_DRAW_1, 0x8000); + init_bucket_renderer("debug-draw-0", BucketCategory::DEBUG_DRAW, + BucketId::DEBUG_DRAW_0, 0x20000); + init_bucket_renderer("debug-draw-1", BucketCategory::DEBUG_DRAW, + BucketId::DEBUG_DRAW_1, 0x8000); // for now, for any unset renderers, just set them to an EmptyBucketRenderer. for (size_t i = 0; i < m_bucket_renderers.size(); i++) { if (!m_bucket_renderers[i]) { - init_bucket_renderer(fmt::format("bucket{}", i), (BucketId)i); + init_bucket_renderer(fmt::format("bucket{}", i), BucketCategory::UNUSED, + (BucketId)i); } m_bucket_renderers[i]->init_shaders(m_render_state.shaders); @@ -342,6 +377,7 @@ void OpenGLRenderer::setup_frame(int window_width_px, void OpenGLRenderer::dispatch_buckets(DmaFollower dma, ScopedProfilerNode& prof) { // The first thing the DMA chain should be a call to a common default-registers chain. // this chain resets the state of the GS. After this is buckets + m_category_times.fill(0); m_render_state.buckets_base = dma.current_tag_offset() + 16; // offset by 1 qw for the initial call @@ -381,6 +417,7 @@ void OpenGLRenderer::dispatch_buckets(DmaFollower dma, ScopedProfilerNode& prof) ASSERT(dma.current_tag_offset() == m_render_state.next_bucket); m_render_state.next_bucket += 16; vif_interrupt_callback(); + m_category_times[(int)m_bucket_categories[bucket_id]] += bucket_prof.get_elapsed_time(); } g_current_render = ""; diff --git a/game/graphics/opengl_renderer/OpenGLRenderer.h b/game/graphics/opengl_renderer/OpenGLRenderer.h index be7cd6459a..ad0f624d21 100644 --- a/game/graphics/opengl_renderer/OpenGLRenderer.h +++ b/game/graphics/opengl_renderer/OpenGLRenderer.h @@ -36,10 +36,14 @@ class OpenGLRenderer { void finish_screenshot(const std::string& output_name, int px, int py, int x, int y); template - T* init_bucket_renderer(const std::string& name, BucketId id, Args&&... args) { + T* init_bucket_renderer(const std::string& name, + BucketCategory cat, + BucketId id, + Args&&... args) { auto renderer = std::make_unique(name, id, std::forward(args)...); T* ret = renderer.get(); m_bucket_renderers.at((int)id) = std::move(renderer); + m_bucket_categories.at((int)id) = cat; return ret; } @@ -47,4 +51,7 @@ class OpenGLRenderer { Profiler m_profiler; std::array, (int)BucketId::MAX_BUCKETS> m_bucket_renderers; + std::array m_bucket_categories; + + std::array m_category_times; }; diff --git a/game/graphics/opengl_renderer/Profiler.h b/game/graphics/opengl_renderer/Profiler.h index e99751c25b..df8af9a1e0 100644 --- a/game/graphics/opengl_renderer/Profiler.h +++ b/game/graphics/opengl_renderer/Profiler.h @@ -34,6 +34,7 @@ class ProfilerNode { void add_draw_call(int count = 1) { m_stats.draw_calls += count; } void add_tri(int count = 1) { m_stats.triangles += count; } + float get_elapsed_time() const { return m_timer.getSeconds(); } private: friend class Profiler; @@ -59,6 +60,7 @@ class ScopedProfilerNode { void add_draw_call(int count = 1) { m_node->add_draw_call(count); } void add_tri(int count = 1) { m_node->add_tri(count); } + float get_elapsed_time() const { return m_node->get_elapsed_time(); } private: ProfilerNode* m_node; diff --git a/game/graphics/opengl_renderer/foreground/Generic2.cpp b/game/graphics/opengl_renderer/foreground/Generic2.cpp index 3af700372e..e9d8b42899 100644 --- a/game/graphics/opengl_renderer/foreground/Generic2.cpp +++ b/game/graphics/opengl_renderer/foreground/Generic2.cpp @@ -56,34 +56,20 @@ void Generic2::render(DmaFollower& dma, SharedRenderState* render_state, ScopedP // Generic2 has 3 passes. { // our first pass is to go over the DMA chain from the game and extract the data into buffers - Timer proc_dma_timer; auto p = prof.make_scoped_child("dma"); process_dma(dma, render_state->next_bucket); - if (m_next_free_vert > 10000) { - // fmt::print("dma: {} in {:.3f} ms\n", m_next_free_vert, proc_dma_timer.getMs()); - } } { // the next pass is to look at all of that data, and figure out the best order to draw it // using OpenGL - Timer setup_timer; auto p = prof.make_scoped_child("setup"); setup_draws(); - if (m_next_free_vert > 10000) { -// fmt::print("setup: {} buckets, {} adgifs {} indices in {:.3f} ms\n", m_next_free_bucket, -// m_next_free_adgif, m_next_free_idx, setup_timer.getMs()); - } } { // the final pass is the actual drawing. - Timer draw_timer; auto p = prof.make_scoped_child("drawing"); - do_draws(render_state, prof); - if (m_next_free_vert > 10000) { - // fmt::print("draw {:.3f} ms\n", draw_timer.getMs()); - } - + do_draws(render_state, p); } } diff --git a/game/graphics/opengl_renderer/foreground/Generic2.h b/game/graphics/opengl_renderer/foreground/Generic2.h index bbbba451ca..c6c19214f3 100644 --- a/game/graphics/opengl_renderer/foreground/Generic2.h +++ b/game/graphics/opengl_renderer/foreground/Generic2.h @@ -38,14 +38,17 @@ class Generic2 : public BucketRenderer { void process_dma(DmaFollower& dma, u32 next_bucket); void setup_draws(); void do_draws(SharedRenderState* render_state, ScopedProfilerNode& prof); - void do_draws_for_alpha(SharedRenderState* render_state, ScopedProfilerNode& prof, DrawMode::AlphaBlend alpha); + void do_draws_for_alpha(SharedRenderState* render_state, + ScopedProfilerNode& prof, + DrawMode::AlphaBlend alpha, + bool hud); bool check_for_end_of_generic_data(DmaFollower& dma, u32 next_bucket); void final_vertex_update(); bool handle_bucket_setup_dma(DmaFollower& dma, u32 next_bucket); void opengl_setup(); void opengl_cleanup(); - void opengl_bind(SharedRenderState* render_state); + void opengl_bind_and_setup_proj(SharedRenderState* render_state); void setup_opengl_for_draw_mode(const DrawMode& draw_mode, u8 fix, SharedRenderState* render_state); @@ -67,8 +70,13 @@ class Generic2 : public BucketRenderer { math::Vector4f hvdf_offset; float pfog0; // scale factor for perspective divide float fog_min, fog_max; // clamp for fog - math::Vector3f scale; - float mat_23, mat_32; + math::Vector3f proj_scale; + float proj_mat_23, proj_mat_32; + + math::Vector3f hud_scale; + float hud_mat_23, hud_mat_32, hud_mat_33; + + bool uses_hud = false; } m_drawing_config; struct GsState { @@ -96,6 +104,7 @@ class Generic2 : public BucketRenderer { u32 vtx_idx = 0; u32 vtx_count = 0; u8 mscal_addr = 0; + bool uses_hud; }; struct Adgif { @@ -107,6 +116,7 @@ class Generic2 : public BucketRenderer { u32 frag; u32 vtx_idx; u32 vtx_count; + bool uses_hud; u32 next = -2; @@ -114,6 +124,7 @@ class Generic2 : public BucketRenderer { u64 result = mode.as_int(); result |= (((u64)tbp) << 32); result |= (((u64)fix) << 48); + result |= (((u64)uses_hud ? 1ull : 0ull) << 62); return result; } }; @@ -126,6 +137,8 @@ class Generic2 : public BucketRenderer { u32 idx_idx; u32 idx_count; + + u32 tri_count; // just for debug }; u32 handle_fragments_after_unpack_v4_32(const u8* data, @@ -178,6 +191,7 @@ class Generic2 : public BucketRenderer { GLuint vao; GLuint vertex_buffer; GLuint index_buffer; - GLuint alpha_reject, color_mult, fog_color, scale, mat_23, mat_32, fog_consts, hvdf_offset; + GLuint alpha_reject, color_mult, fog_color, scale, mat_23, mat_32, mat_33, fog_consts, + hvdf_offset; } m_ogl; }; diff --git a/game/graphics/opengl_renderer/foreground/Generic2_Build.cpp b/game/graphics/opengl_renderer/foreground/Generic2_Build.cpp index ccc6fc6776..6f0cd36036 100644 --- a/game/graphics/opengl_renderer/foreground/Generic2_Build.cpp +++ b/game/graphics/opengl_renderer/foreground/Generic2_Build.cpp @@ -10,9 +10,9 @@ void Generic2::setup_draws() { } m_gs = GsState(); link_adgifs_back_to_frags(); + process_matrices(); determine_draw_modes(); draws_to_buckets(); - process_matrices(); final_vertex_update(); build_index_buffer(); } @@ -48,6 +48,8 @@ void Generic2::determine_draw_modes() { // iterate over all adgifs for (u32 i = 0; i < m_next_free_adgif; i++) { auto& ad = m_adgifs[i].data; + auto& frag = m_fragments[m_adgifs[i].frag]; + m_adgifs[i].uses_hud = frag.uses_hud; // ADGIF 0 ASSERT((u8)ad.tex0_addr == (u32)GsRegisterAddress::TEX0_1); @@ -97,7 +99,6 @@ void Generic2::determine_draw_modes() { ASSERT((u8)ad.alpha_addr == (u32)GsRegisterAddress::MIPTBP2_1); } - auto& frag = m_fragments[m_adgifs[i].frag]; u64 bonus_adgif_data[4]; memcpy(bonus_adgif_data, frag.header + (5 * 16), 4 * sizeof(u64)); // ADGIF 5 @@ -230,31 +231,59 @@ void Generic2::draws_to_buckets() { * I don't think this will hold for TIE... */ void Generic2::process_matrices() { - std::array reference_mat; - memcpy(&reference_mat, m_fragments[0].header, 64); - m_drawing_config.scale[0] = reference_mat[0][0]; - m_drawing_config.scale[1] = reference_mat[1][1]; - m_drawing_config.scale[2] = reference_mat[2][2]; - m_drawing_config.mat_23 = reference_mat[2][3]; - m_drawing_config.mat_32 = reference_mat[3][2]; + // first, we need to find the projection matrix. + // most of the time, it's first. If you have the hud open, there may be a few others. + bool found_proj_matrix = false; + std::array projection_matrix, hud_matrix; + for (u32 i = 0; i < m_next_free_frag; i++) { + float mat_33; + memcpy(&mat_33, m_fragments[i].header + 15 * sizeof(float), sizeof(float)); + if (mat_33 == 0) { + // got it. + memcpy(&projection_matrix, m_fragments[i].header, 64); + found_proj_matrix = true; + break; + } + } + + if (!found_proj_matrix) { + for (auto& row : projection_matrix) { + row.fill(0); + } + } + + // mark as hud/proj + bool found_hud_matrix = false; + for (u32 i = 0; i < m_next_free_frag; i++) { + float mat_33; + memcpy(&mat_33, m_fragments[i].header + 15 * sizeof(float), sizeof(float)); + if (mat_33 == 0) { + m_fragments[i].uses_hud = false; + } else { + m_fragments[i].uses_hud = true; + if (!found_hud_matrix) { + found_hud_matrix = true; + memcpy(&hud_matrix, m_fragments[i].header, 64); + } + } + } + + m_drawing_config.proj_scale[0] = projection_matrix[0][0]; + m_drawing_config.proj_scale[1] = projection_matrix[1][1]; + m_drawing_config.proj_scale[2] = projection_matrix[2][2]; + m_drawing_config.proj_mat_23 = projection_matrix[2][3]; + m_drawing_config.proj_mat_32 = projection_matrix[3][2]; - // ASSERT(reference_mat[0][1] == 0); - // ASSERT(reference_mat[0][2] == 0); - // ASSERT(reference_mat[0][3] == 0); - // ASSERT(reference_mat[1][0] == 0); - // ASSERT(reference_mat[1][2] == 0); - // ASSERT(reference_mat[1][3] == 0); - // ASSERT(reference_mat[2][0] == 0); - // ASSERT(reference_mat[2][1] == 0); - // ASSERT(reference_mat[3][0] == 0); - // ASSERT(reference_mat[3][1] == 0); - // ASSERT(reference_mat[3][3] == 0); - // - // for (u32 i = 0; i < m_next_free_frag; i++) { - // std::array mat; - // memcpy(&mat, m_fragments[i].header, 64); - // ASSERT(mat == reference_mat); - // } + if (found_hud_matrix) { + m_drawing_config.hud_scale[0] = hud_matrix[0][0]; + m_drawing_config.hud_scale[1] = hud_matrix[1][1]; + m_drawing_config.hud_scale[2] = hud_matrix[2][2]; + m_drawing_config.hud_mat_23 = hud_matrix[2][3]; + m_drawing_config.hud_mat_32 = hud_matrix[3][2]; + m_drawing_config.hud_mat_33 = hud_matrix[3][3]; + } + + m_drawing_config.uses_hud = found_hud_matrix; } /*! @@ -272,27 +301,29 @@ void Generic2::final_vertex_update() { /*! * Build the index buffer. - * TODO: this de-strips the strips... */ void Generic2::build_index_buffer() { for (u32 bucket_idx = 0; bucket_idx < m_next_free_bucket; bucket_idx++) { auto& bucket = m_buckets[bucket_idx]; + bucket.tri_count = 0; bucket.idx_idx = m_next_free_idx; u32 adgif_idx = bucket.start; while (adgif_idx != UINT32_MAX) { auto& adgif = m_adgifs[adgif_idx]; - - u32 warmup = 0; + m_indices[m_next_free_idx++] = UINT32_MAX; for (u32 vidx = adgif.vtx_idx; vidx < adgif.vtx_idx + adgif.vtx_count; vidx++) { auto& vtx = m_verts[vidx]; - warmup++; - if (vtx.adc && warmup >= 3) { + if (vtx.adc) { m_indices[m_next_free_idx++] = vidx; + bucket.tri_count++; + } else { + m_indices[m_next_free_idx++] = UINT32_MAX; m_indices[m_next_free_idx++] = vidx - 1; - m_indices[m_next_free_idx++] = vidx - 2; + m_indices[m_next_free_idx++] = vidx; } } + bucket.tri_count -= 2; adgif_idx = adgif.next; } diff --git a/game/graphics/opengl_renderer/foreground/Generic2_OpenGL.cpp b/game/graphics/opengl_renderer/foreground/Generic2_OpenGL.cpp index 816b123597..95a07849ae 100644 --- a/game/graphics/opengl_renderer/foreground/Generic2_OpenGL.cpp +++ b/game/graphics/opengl_renderer/foreground/Generic2_OpenGL.cpp @@ -71,18 +71,20 @@ void Generic2::init_shaders(ShaderLibrary& shaders) { m_ogl.scale = glGetUniformLocation(shaders[ShaderId::GENERIC].id(), "scale"); m_ogl.mat_23 = glGetUniformLocation(shaders[ShaderId::GENERIC].id(), "mat_23"); m_ogl.mat_32 = glGetUniformLocation(shaders[ShaderId::GENERIC].id(), "mat_32"); + m_ogl.mat_33 = glGetUniformLocation(shaders[ShaderId::GENERIC].id(), "mat_33"); m_ogl.fog_consts = glGetUniformLocation(shaders[ShaderId::GENERIC].id(), "fog_constants"); m_ogl.hvdf_offset = glGetUniformLocation(shaders[ShaderId::GENERIC].id(), "hvdf_offset"); } -void Generic2::opengl_bind(SharedRenderState* render_state) { +void Generic2::opengl_bind_and_setup_proj(SharedRenderState* render_state) { render_state->shaders[ShaderId::GENERIC].activate(); glUniform4f(m_ogl.fog_color, render_state->fog_color[0], render_state->fog_color[1], render_state->fog_color[2], render_state->fog_intensity); - glUniform4f(m_ogl.scale, m_drawing_config.scale[0], m_drawing_config.scale[1], - m_drawing_config.scale[2], 0); - glUniform1f(m_ogl.mat_23, m_drawing_config.mat_23); - glUniform1f(m_ogl.mat_32, m_drawing_config.mat_32); + glUniform4f(m_ogl.scale, m_drawing_config.proj_scale[0], m_drawing_config.proj_scale[1], + m_drawing_config.proj_scale[2], 0); + glUniform1f(m_ogl.mat_23, m_drawing_config.proj_mat_23); + glUniform1f(m_ogl.mat_32, m_drawing_config.proj_mat_32); + glUniform1f(m_ogl.mat_33, 0); glUniform3f(m_ogl.fog_consts, m_drawing_config.pfog0, m_drawing_config.fog_min, m_drawing_config.fog_max); glUniform4f(m_ogl.hvdf_offset, m_drawing_config.hvdf_offset[0], m_drawing_config.hvdf_offset[1], @@ -247,18 +249,19 @@ void Generic2::setup_opengl_tex(u16 unit, void Generic2::do_draws_for_alpha(SharedRenderState* render_state, ScopedProfilerNode& prof, - DrawMode::AlphaBlend alpha) { + DrawMode::AlphaBlend alpha, + bool hud) { for (u32 i = 0; i < m_next_free_bucket; i++) { auto& bucket = m_buckets[i]; auto& first = m_adgifs[bucket.start]; - if (first.mode.get_alpha_blend() == alpha) { + if (first.mode.get_alpha_blend() == alpha && first.uses_hud == hud) { setup_opengl_for_draw_mode(first.mode, first.fix, render_state); setup_opengl_tex(0, first.tbp, first.mode.get_filt_enable(), first.mode.get_clamp_s_enable(), first.mode.get_clamp_t_enable(), render_state); - glDrawElements(GL_TRIANGLES, bucket.idx_count, GL_UNSIGNED_INT, + glDrawElements(GL_TRIANGLE_STRIP, bucket.idx_count, GL_UNSIGNED_INT, (void*)(sizeof(u32) * bucket.idx_idx)); prof.add_draw_call(); - prof.add_tri(bucket.idx_count / 3); + prof.add_tri(bucket.tri_count); } } } @@ -274,7 +277,7 @@ void Generic2::do_draws(SharedRenderState* render_state, ScopedProfilerNode& pro glEnable(GL_PRIMITIVE_RESTART); glPrimitiveRestartIndex(UINT32_MAX); - opengl_bind(render_state); + opengl_bind_and_setup_proj(render_state); constexpr DrawMode::AlphaBlend alpha_order[ALPHA_MODE_COUNT] = { DrawMode::AlphaBlend::SRC_SRC_SRC_SRC, DrawMode::AlphaBlend::SRC_DST_SRC_DST, DrawMode::AlphaBlend::SRC_0_SRC_DST, DrawMode::AlphaBlend::ZERO_SRC_SRC_DST, @@ -283,7 +286,21 @@ void Generic2::do_draws(SharedRenderState* render_state, ScopedProfilerNode& pro for (int i = 0; i < ALPHA_MODE_COUNT; i++) { if (m_alpha_draw_enable[i]) { - do_draws_for_alpha(render_state, prof, alpha_order[i]); + do_draws_for_alpha(render_state, prof, alpha_order[i], false); + } + } + + if (m_drawing_config.uses_hud) { + glUniform4f(m_ogl.scale, m_drawing_config.hud_scale[0], m_drawing_config.hud_scale[1], + m_drawing_config.hud_scale[2], 0); + glUniform1f(m_ogl.mat_23, m_drawing_config.hud_mat_23); + glUniform1f(m_ogl.mat_32, m_drawing_config.hud_mat_32); + glUniform1f(m_ogl.mat_33, m_drawing_config.hud_mat_33); + + for (int i = 0; i < ALPHA_MODE_COUNT; i++) { + if (m_alpha_draw_enable[i]) { + do_draws_for_alpha(render_state, prof, alpha_order[i], true); + } } } } \ No newline at end of file diff --git a/game/graphics/opengl_renderer/shaders/generic.vert b/game/graphics/opengl_renderer/shaders/generic.vert index e385624dbe..b2f5dfa8fd 100644 --- a/game/graphics/opengl_renderer/shaders/generic.vert +++ b/game/graphics/opengl_renderer/shaders/generic.vert @@ -9,6 +9,7 @@ uniform float mat_32; uniform vec3 fog_constants; uniform vec4 scale; uniform float mat_23; +uniform float mat_33; uniform vec4 hvdf_offset; out vec2 tex_coord; @@ -36,7 +37,7 @@ void main() { // vu.acc.madda(Mask::xyzw, gen.mat0, gen.vtx_load0.x()); transformed.xyz = position_in * scale.xyz; transformed.z += mat_32; - transformed.w = mat_23 * position_in.z; + transformed.w = mat_23 * position_in.z + mat_33; transformed *= -1; // todo? From 223673afbc8ac831de5ed65d7696476f130588b1 Mon Sep 17 00:00:00 2001 From: water Date: Sun, 6 Mar 2022 17:59:18 -0500 Subject: [PATCH 09/12] windows fix --- common/util/Timer.cpp | 2 +- common/util/Timer.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/common/util/Timer.cpp b/common/util/Timer.cpp index 9aa870baf0..1195f29906 100644 --- a/common/util/Timer.cpp +++ b/common/util/Timer.cpp @@ -11,7 +11,7 @@ #define NS_PER_HNS (100ULL) // NS = nanoseconds #define NS_PER_SEC (MS_PER_SEC * US_PER_MS * NS_PER_US) -int Timer::clock_gettime_monotonic(struct timespec* tv) { +int Timer::clock_gettime_monotonic(struct timespec* tv) const { static LARGE_INTEGER ticksPerSec; LARGE_INTEGER ticks; double seconds; diff --git a/common/util/Timer.h b/common/util/Timer.h index 074e6ec906..762ed923a3 100644 --- a/common/util/Timer.h +++ b/common/util/Timer.h @@ -15,7 +15,7 @@ class Timer { explicit Timer() { start(); } #ifdef _WIN32 - int clock_gettime_monotonic(struct timespec* tv); + int clock_gettime_monotonic(struct timespec* tv) const; #endif /*! From a2307aeaaf2d73e4cf70ea745c36e42f858c578f Mon Sep 17 00:00:00 2001 From: water Date: Sun, 6 Mar 2022 18:54:33 -0500 Subject: [PATCH 10/12] final tweaks --- .../graphics/opengl_renderer/BucketRenderer.h | 67 +------------- .../opengl_renderer/OpenGLRenderer.cpp | 20 +++-- .../graphics/opengl_renderer/OpenGLRenderer.h | 1 + game/graphics/opengl_renderer/Profiler.cpp | 90 +++++++++++++------ game/graphics/opengl_renderer/Profiler.h | 29 +++++- game/graphics/opengl_renderer/buckets.h | 68 ++++++++++++++ game/graphics/opengl_renderer/debug_gui.cpp | 5 +- game/graphics/pipelines/opengl.cpp | 4 +- 8 files changed, 178 insertions(+), 106 deletions(-) create mode 100644 game/graphics/opengl_renderer/buckets.h diff --git a/game/graphics/opengl_renderer/BucketRenderer.h b/game/graphics/opengl_renderer/BucketRenderer.h index 4d030071de..c73cc00fe8 100644 --- a/game/graphics/opengl_renderer/BucketRenderer.h +++ b/game/graphics/opengl_renderer/BucketRenderer.h @@ -7,72 +7,7 @@ #include "game/graphics/texture/TexturePool.h" #include "game/graphics/opengl_renderer/Profiler.h" #include "game/graphics/opengl_renderer/Loader.h" - -/*! - * Matches the bucket-id enum in GOAL - */ -enum class BucketId { - BUCKET0 = 0, - BUCKET1 = 1, - SKY_DRAW = 3, - TFRAG_TEX_LEVEL0 = 5, - TFRAG_LEVEL0 = 6, - TIE_LEVEL0 = 9, - MERC_TFRAG_TEX_LEVEL0 = 10, - GMERC_TFRAG_TEX_LEVEL0 = 11, - TFRAG_TEX_LEVEL1 = 12, - TFRAG_LEVEL1 = 13, - TIE_LEVEL1 = 16, - MERC_TFRAG_TEX_LEVEL1 = 17, - GMERC_TFRAG_TEX_LEVEL1 = 18, - SHRUB_TEX_LEVEL0 = 19, - SHRUB_TEX_LEVEL1 = 25, - GENERIC_SHRUB = 30, - ALPHA_TEX_LEVEL0 = 31, - TFRAG_TRANS0_AND_SKY_BLEND_LEVEL0 = 32, - TFRAG_DIRT_LEVEL0 = 34, - TFRAG_ICE_LEVEL0 = 36, - ALPHA_TEX_LEVEL1 = 38, - TFRAG_TRANS1_AND_SKY_BLEND_LEVEL1 = 39, - TFRAG_DIRT_LEVEL1 = 41, - TFRAG_ICE_LEVEL1 = 43, - MERC_AFTER_ALPHA = 45, - GENERIC_ALPHA = 46, - PRIS_TEX_LEVEL0 = 48, - MERC_PRIS_LEVEL0 = 49, - GENERIC_PRIS_LEVEL0 = 50, - PRIS_TEX_LEVEL1 = 51, - MERC_PRIS_LEVEL1 = 52, - GENERIC_PRIS_LEVEL1 = 53, - MERC_EYES_AFTER_PRIS = 54, - MERC_AFTER_PRIS = 55, - GENERIC_PRIS = 56, - WATER_TEX_LEVEL0 = 57, - MERC_WATER_LEVEL0 = 58, - GENERIC_WATER_LEVEL0 = 59, - WATER_TEX_LEVEL1 = 60, - MERC_WATER_LEVEL1 = 61, - GENERIC_WATER_LEVEL1 = 62, - // ... - PRE_SPRITE_TEX = 65, // maybe it's just common textures? - SPRITE = 66, - DEBUG_DRAW_0 = 67, - DEBUG_DRAW_1 = 68, - MAX_BUCKETS = 69 -}; - -enum class BucketCategory { - SKY, - TFRAG, - TIE, - TEX, - MERC, - GENERIC_MERC, - SPRITE, - DEBUG_DRAW, - UNUSED, - MAX_CATEGORIES -}; +#include "game/graphics/opengl_renderer/buckets.h" struct LevelVis { bool valid = false; diff --git a/game/graphics/opengl_renderer/OpenGLRenderer.cpp b/game/graphics/opengl_renderer/OpenGLRenderer.cpp index 5a6c9c35f4..0c5fc66018 100644 --- a/game/graphics/opengl_renderer/OpenGLRenderer.cpp +++ b/game/graphics/opengl_renderer/OpenGLRenderer.cpp @@ -69,7 +69,7 @@ OpenGLRenderer::OpenGLRenderer(std::shared_ptr texture_pool, * Construct bucket renderers. We can specify different renderers for different buckets */ void OpenGLRenderer::init_bucket_renderers() { - m_bucket_categories.fill(BucketCategory::UNUSED); + m_bucket_categories.fill(BucketCategory::OTHER); std::vector normal_tfrags = {tfrag3::TFragmentTreeKind::NORMAL, tfrag3::TFragmentTreeKind::LOWRES}; std::vector dirt_tfrags = {tfrag3::TFragmentTreeKind::DIRT}; @@ -83,7 +83,7 @@ void OpenGLRenderer::init_bucket_renderers() { // 0 // 1 // 2 - init_bucket_renderer("sky", BucketCategory::SKY, BucketId::SKY_DRAW); // 3 + init_bucket_renderer("sky", BucketCategory::OTHER, BucketId::SKY_DRAW); // 3 // 4 //----------------------- @@ -147,7 +147,7 @@ void OpenGLRenderer::init_bucket_renderers() { //----------------------- init_bucket_renderer("l0-alpha-tex", BucketCategory::TEX, BucketId::ALPHA_TEX_LEVEL0); // 31 - init_bucket_renderer("l0-alpha-sky-blend-and-tfrag-trans", BucketCategory::SKY, + init_bucket_renderer("l0-alpha-sky-blend-and-tfrag-trans", BucketCategory::OTHER, BucketId::TFRAG_TRANS0_AND_SKY_BLEND_LEVEL0, 0, sky_gpu_blender, sky_cpu_blender); // 32 // 33 @@ -164,7 +164,7 @@ void OpenGLRenderer::init_bucket_renderers() { //----------------------- init_bucket_renderer("l1-alpha-tex", BucketCategory::TEX, BucketId::ALPHA_TEX_LEVEL1); // 38 - init_bucket_renderer("l1-alpha-sky-blend-and-tfrag-trans", BucketCategory::SKY, + init_bucket_renderer("l1-alpha-sky-blend-and-tfrag-trans", BucketCategory::OTHER, BucketId::TFRAG_TRANS1_AND_SKY_BLEND_LEVEL1, 1, sky_gpu_blender, sky_cpu_blender); // 39 // 40 @@ -205,7 +205,7 @@ void OpenGLRenderer::init_bucket_renderers() { // other renderers may output to the eye renderer m_render_state.eye_renderer = init_bucket_renderer( - "common-pris-eyes", BucketCategory::SKY, BucketId::MERC_EYES_AFTER_PRIS); // 54 + "common-pris-eyes", BucketCategory::OTHER, BucketId::MERC_EYES_AFTER_PRIS); // 54 init_bucket_renderer("common-pris-merc", BucketCategory::MERC, BucketId::MERC_AFTER_PRIS); // 55 init_bucket_renderer("common-pris-generic", BucketCategory::GENERIC_MERC, @@ -255,7 +255,7 @@ void OpenGLRenderer::init_bucket_renderers() { // for now, for any unset renderers, just set them to an EmptyBucketRenderer. for (size_t i = 0; i < m_bucket_renderers.size(); i++) { if (!m_bucket_renderers[i]) { - init_bucket_renderer(fmt::format("bucket{}", i), BucketCategory::UNUSED, + init_bucket_renderer(fmt::format("bucket{}", i), BucketCategory::OTHER, (BucketId)i); } @@ -313,7 +313,13 @@ void OpenGLRenderer::render(DmaFollower dma, const RenderOptions& settings) { // } if (settings.draw_small_profiler_window) { - m_profiler.draw_small_window(m_render_state.load_status_debug); + SmallProfilerStats stats; + stats.draw_calls = m_profiler.root()->stats().draw_calls; + stats.triangles = m_profiler.root()->stats().triangles; + for (int i = 0; i < (int)BucketCategory::MAX_CATEGORIES; i++) { + stats.time_per_category[i] = m_category_times[i]; + } + m_small_profiler.draw(m_render_state.load_status_debug, stats); } if (settings.save_screenshot) { diff --git a/game/graphics/opengl_renderer/OpenGLRenderer.h b/game/graphics/opengl_renderer/OpenGLRenderer.h index ad0f624d21..f084afd0f1 100644 --- a/game/graphics/opengl_renderer/OpenGLRenderer.h +++ b/game/graphics/opengl_renderer/OpenGLRenderer.h @@ -49,6 +49,7 @@ class OpenGLRenderer { SharedRenderState m_render_state; Profiler m_profiler; + SmallProfiler m_small_profiler; std::array, (int)BucketId::MAX_BUCKETS> m_bucket_renderers; std::array m_bucket_categories; diff --git a/game/graphics/opengl_renderer/Profiler.cpp b/game/graphics/opengl_renderer/Profiler.cpp index 62a368a0c1..2f99eb480c 100644 --- a/game/graphics/opengl_renderer/Profiler.cpp +++ b/game/graphics/opengl_renderer/Profiler.cpp @@ -88,34 +88,6 @@ void Profiler::draw() { ImGui::End(); } -void Profiler::draw_small_window(const std::string& status) { - ImGuiWindowFlags window_flags = ImGuiWindowFlags_NoDecoration | - ImGuiWindowFlags_AlwaysAutoResize | - ImGuiWindowFlags_NoSavedSettings | - ImGuiWindowFlags_NoFocusOnAppearing | ImGuiWindowFlags_NoNav; - auto* p_open = &m_small_window_open; - const float PAD = 10.0f; - const ImGuiViewport* viewport = ImGui::GetMainViewport(); - ImVec2 work_pos = viewport->WorkPos; // Use work area to avoid menu-bar/task-bar, if any! - ImVec2 work_size = viewport->WorkSize; - ImVec2 window_pos, window_pos_pivot; - window_pos.x = (work_pos.x + PAD); - window_pos.y = (work_pos.y + work_size.y - PAD); - window_pos_pivot.x = 0.0f; - window_pos_pivot.y = 1.0f; - ImGui::SetNextWindowPos(window_pos, ImGuiCond_Always, window_pos_pivot); - - ImGui::SetNextWindowBgAlpha(0.85f); // Transparent background - if (ImGui::Begin("Profiler (short)", p_open, window_flags)) { - ImGui::Text(" tri: %7d\n", m_root.m_stats.triangles); - ImGui::Text(" DC: %4d\n", m_root.m_stats.draw_calls); - if (!status.empty()) { - ImGui::Text("%s", status.c_str()); - } - } - ImGui::End(); -} - u32 name_to_color(const std::string& name) { u64 val = std::hash{}(name); return colors::common_colors[val % colors::COLOR_COUNT] | 0xff000000; @@ -185,4 +157,66 @@ void ProfilerNode::to_string_helper(std::string& str, int depth) const { for (const auto& child : m_children) { child.to_string_helper(str, depth + 1); } +} + +void FramePlot::push(float val) { + m_buffer[m_idx++] = val; + if (m_idx == SIZE) { + m_idx = 0; + } +} + +void FramePlot::draw(float max) { + float worst = 0, total = 0; + for (auto x : m_buffer) { + worst = std::max(x, worst); + total += x; + } + ImGui::SameLine(); + ImGui::Text("avg: %.1f", total / SIZE); + + ImGui::SameLine(); + ImGui::Text("worst: %.1f", worst); + + ImGui::Separator(); + ImGui::PlotLines( + "", + [](void* data, int idx) { + auto* me = (FramePlot*)data; + return me->m_buffer[(me->m_idx + idx) % SIZE]; + }, + (void*)this, SIZE, 0, nullptr, 0, max, ImVec2(300, 40)); +} + +void SmallProfiler::draw(const std::string& status, const SmallProfilerStats& stats) { + ImGuiWindowFlags window_flags = ImGuiWindowFlags_NoDecoration | + ImGuiWindowFlags_AlwaysAutoResize | + ImGuiWindowFlags_NoSavedSettings | + ImGuiWindowFlags_NoFocusOnAppearing | ImGuiWindowFlags_NoNav; + const float PAD = 10.0f; + const ImGuiViewport* viewport = ImGui::GetMainViewport(); + ImVec2 work_pos = viewport->WorkPos; // Use work area to avoid menu-bar/task-bar, if any! + ImVec2 work_size = viewport->WorkSize; + ImVec2 window_pos, window_pos_pivot; + window_pos.x = (work_pos.x + PAD); + window_pos.y = (work_pos.y + work_size.y - PAD); + window_pos_pivot.x = 0.0f; + window_pos_pivot.y = 1.0f; + ImGui::SetNextWindowPos(window_pos, ImGuiCond_Always, window_pos_pivot); + + ImGui::SetNextWindowBgAlpha(0.85f); // Transparent background + if (ImGui::Begin("Profiler (short)", nullptr, window_flags)) { + ImGui::Text(" tri: %7d\n", stats.triangles); + ImGui::Text(" DC: %4d\n", stats.draw_calls); + if (!status.empty()) { + ImGui::Text("%s", status.c_str()); + } + + for (int i = 0; i < (int)BucketCategory::MAX_CATEGORIES; i++) { + m_plots[i].push(stats.time_per_category[i] * 1000.f); + ImGui::Text("%6s", BUCKET_CATEGORY_NAMES[i]); + m_plots[i].draw(5.f); + } + } + ImGui::End(); } \ No newline at end of file diff --git a/game/graphics/opengl_renderer/Profiler.h b/game/graphics/opengl_renderer/Profiler.h index df8af9a1e0..fc397fbca8 100644 --- a/game/graphics/opengl_renderer/Profiler.h +++ b/game/graphics/opengl_renderer/Profiler.h @@ -6,6 +6,8 @@ #include "common/common_types.h" #include "common/util/Timer.h" +#include "game/graphics/opengl_renderer/buckets.h" + enum class ProfilerSort { NONE = 0, TIME = 1, DRAW_CALLS = 2, TRIANGLES = 3 }; struct ProfilerStats { @@ -35,6 +37,7 @@ class ProfilerNode { void add_draw_call(int count = 1) { m_stats.draw_calls += count; } void add_tri(int count = 1) { m_stats.triangles += count; } float get_elapsed_time() const { return m_timer.getSeconds(); } + const ProfilerStats& stats() const { return m_stats; } private: friend class Profiler; @@ -71,7 +74,6 @@ class Profiler { Profiler(); void clear(); void draw(); - void draw_small_window(const std::string& status); void finish(); float root_time() const { return m_root.m_stats.duration; } @@ -88,6 +90,29 @@ class Profiler { }; int m_mode_selector = 0; - bool m_small_window_open = true; ProfilerNode m_root; +}; + +class FramePlot { + public: + void push(float val); + void draw(float max); + + private: + static constexpr int SIZE = 60 * 5; + float m_buffer[SIZE] = {}; + int m_idx = 0; +}; + +struct SmallProfilerStats { + int triangles, draw_calls; + float time_per_category[(int)BucketCategory::MAX_CATEGORIES]; +}; + +class SmallProfiler { + public: + void draw(const std::string& load_status, const SmallProfilerStats& stats); + + private: + std::array m_plots; }; \ No newline at end of file diff --git a/game/graphics/opengl_renderer/buckets.h b/game/graphics/opengl_renderer/buckets.h new file mode 100644 index 0000000000..74fad1e758 --- /dev/null +++ b/game/graphics/opengl_renderer/buckets.h @@ -0,0 +1,68 @@ +#pragma once +/*! + * Matches the bucket-id enum in GOAL + */ +enum class BucketId { + BUCKET0 = 0, + BUCKET1 = 1, + SKY_DRAW = 3, + TFRAG_TEX_LEVEL0 = 5, + TFRAG_LEVEL0 = 6, + TIE_LEVEL0 = 9, + MERC_TFRAG_TEX_LEVEL0 = 10, + GMERC_TFRAG_TEX_LEVEL0 = 11, + TFRAG_TEX_LEVEL1 = 12, + TFRAG_LEVEL1 = 13, + TIE_LEVEL1 = 16, + MERC_TFRAG_TEX_LEVEL1 = 17, + GMERC_TFRAG_TEX_LEVEL1 = 18, + SHRUB_TEX_LEVEL0 = 19, + SHRUB_TEX_LEVEL1 = 25, + GENERIC_SHRUB = 30, + ALPHA_TEX_LEVEL0 = 31, + TFRAG_TRANS0_AND_SKY_BLEND_LEVEL0 = 32, + TFRAG_DIRT_LEVEL0 = 34, + TFRAG_ICE_LEVEL0 = 36, + ALPHA_TEX_LEVEL1 = 38, + TFRAG_TRANS1_AND_SKY_BLEND_LEVEL1 = 39, + TFRAG_DIRT_LEVEL1 = 41, + TFRAG_ICE_LEVEL1 = 43, + MERC_AFTER_ALPHA = 45, + GENERIC_ALPHA = 46, + PRIS_TEX_LEVEL0 = 48, + MERC_PRIS_LEVEL0 = 49, + GENERIC_PRIS_LEVEL0 = 50, + PRIS_TEX_LEVEL1 = 51, + MERC_PRIS_LEVEL1 = 52, + GENERIC_PRIS_LEVEL1 = 53, + MERC_EYES_AFTER_PRIS = 54, + MERC_AFTER_PRIS = 55, + GENERIC_PRIS = 56, + WATER_TEX_LEVEL0 = 57, + MERC_WATER_LEVEL0 = 58, + GENERIC_WATER_LEVEL0 = 59, + WATER_TEX_LEVEL1 = 60, + MERC_WATER_LEVEL1 = 61, + GENERIC_WATER_LEVEL1 = 62, + // ... + PRE_SPRITE_TEX = 65, // maybe it's just common textures? + SPRITE = 66, + DEBUG_DRAW_0 = 67, + DEBUG_DRAW_1 = 68, + MAX_BUCKETS = 69 +}; + +enum class BucketCategory { + TFRAG, + TIE, + TEX, + MERC, + GENERIC_MERC, + SPRITE, + DEBUG_DRAW, + OTHER, + MAX_CATEGORIES +}; + +constexpr const char* BUCKET_CATEGORY_NAMES[(int)BucketCategory::MAX_CATEGORIES] = { + "tfrag", "tie", "tex", "merc", "mercneric", "sprite", "debug", "other"}; \ No newline at end of file diff --git a/game/graphics/opengl_renderer/debug_gui.cpp b/game/graphics/opengl_renderer/debug_gui.cpp index 902c807d2b..b7965a1703 100644 --- a/game/graphics/opengl_renderer/debug_gui.cpp +++ b/game/graphics/opengl_renderer/debug_gui.cpp @@ -37,8 +37,9 @@ void FrameTimeRecorder::draw_window(const DmaStats& dma_stats) { ImGui::SetNextWindowBgAlpha(0.85f); // Transparent background if (ImGui::Begin("Frame Timing", p_open, window_flags)) { - ImGui::Text("DMA: sync ms %.1f, tc %4d, sz %3d KB, ch %d", dma_stats.sync_time_ms, - dma_stats.num_tags, (dma_stats.num_data_bytes) / (1 << 10), dma_stats.num_chunks); + // ImGui::Text("DMA: sync ms %.1f, tc %4d, sz %3d KB, ch %d", dma_stats.sync_time_ms, + // dma_stats.num_tags, (dma_stats.num_data_bytes) / (1 << 10), + // dma_stats.num_chunks); float worst = 0, total = 0; for (auto x : m_frame_times) { worst = std::max(x, worst); diff --git a/game/graphics/pipelines/opengl.cpp b/game/graphics/pipelines/opengl.cpp index 17aa25ff3e..9297f53743 100644 --- a/game/graphics/pipelines/opengl.cpp +++ b/game/graphics/pipelines/opengl.cpp @@ -355,7 +355,9 @@ static void gl_render_display(GfxDisplay* display) { #endif // render game! - render_game_frame(width, height, lbox_w, lbox_h); + if (g_gfx_data->debug_gui.should_advance_frame()) { + render_game_frame(width, height, lbox_w, lbox_h); + } if (g_gfx_data->debug_gui.should_gl_finish()) { glFinish(); From 634b58dd4e2fe529ca362752e2cf8eae7d877b11 Mon Sep 17 00:00:00 2001 From: water Date: Sun, 6 Mar 2022 19:07:37 -0500 Subject: [PATCH 11/12] memcard folder --- game/kernel/kmemcard.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/game/kernel/kmemcard.cpp b/game/kernel/kmemcard.cpp index 06e9c63738..e89a8b3b4c 100644 --- a/game/kernel/kmemcard.cpp +++ b/game/kernel/kmemcard.cpp @@ -253,6 +253,7 @@ void pc_game_save_synch() { Timer mc_timer; mc_timer.start(); pc_update_card(); + file_util::create_dir_if_needed(file_util::get_file_path({"user", "memcard", filename[0]})); // cd_reprobe_save // if (!file_is_present(op.param2)) { From 392b1065b19d72a04bed52f276bd0cea2e7f5f1a Mon Sep 17 00:00:00 2001 From: water Date: Sun, 6 Mar 2022 19:23:53 -0500 Subject: [PATCH 12/12] add missing include --- game/graphics/opengl_renderer/Profiler.h | 1 + 1 file changed, 1 insertion(+) diff --git a/game/graphics/opengl_renderer/Profiler.h b/game/graphics/opengl_renderer/Profiler.h index fc397fbca8..eade3f690d 100644 --- a/game/graphics/opengl_renderer/Profiler.h +++ b/game/graphics/opengl_renderer/Profiler.h @@ -2,6 +2,7 @@ #include #include +#include #include "common/common_types.h" #include "common/util/Timer.h"