Skip to content

Commit

Permalink
GPU: Assume vertex commands are 8-byte aligned
Browse files Browse the repository at this point in the history
  • Loading branch information
stenzek committed Dec 28, 2024
1 parent 8c80711 commit 799f5bd
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 15 deletions.
28 changes: 14 additions & 14 deletions src/core/gpu_hw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2480,9 +2480,9 @@ void GPU_HW::DrawLine(const GPUBackendDrawLineCommand* cmd)

for (u32 i = 0; i < num_vertices; i += 2)
{
const GSVector2i start_pos = GSVector2i::load<false>(&cmd->vertices[i].x);
const GSVector2i start_pos = GSVector2i::load<true>(&cmd->vertices[i].x);
const u32 start_color = cmd->vertices[i].color;
const GSVector2i end_pos = GSVector2i::load<false>(&cmd->vertices[i + 1].x);
const GSVector2i end_pos = GSVector2i::load<true>(&cmd->vertices[i + 1].x);
const u32 end_color = cmd->vertices[i + 1].color;

const GSVector4i bounds = GSVector4i::xyxy(start_pos, end_pos);
Expand Down Expand Up @@ -2524,9 +2524,9 @@ void GPU_HW::DrawPreciseLine(const GPUBackendDrawPreciseLineCommand* cmd)

for (u32 i = 0; i < num_vertices; i += 2)
{
const GSVector2 start_pos = GSVector2::load<false>(&cmd->vertices[i].x);
const GSVector2 start_pos = GSVector2::load<true>(&cmd->vertices[i].x);
const u32 start_color = cmd->vertices[i].color;
const GSVector2 end_pos = GSVector2::load<false>(&cmd->vertices[i + 1].x);
const GSVector2 end_pos = GSVector2::load<true>(&cmd->vertices[i + 1].x);
const u32 end_color = cmd->vertices[i + 1].color;

const GSVector4 bounds = GSVector4::xyxy(start_pos, end_pos);
Expand Down Expand Up @@ -2765,7 +2765,7 @@ void GPU_HW::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd)
for (u32 i = 0; i < num_vertices; i++)
{
const GPUBackendDrawPolygonCommand::Vertex& vert = cmd->vertices[i];
const GSVector2 vert_pos = GSVector2(GSVector2i::load<false>(&vert.x));
const GSVector2 vert_pos = GSVector2(GSVector2i::load<true>(&vert.x));
vertices[i].Set(vert_pos.x, vert_pos.y, depth, 1.0f, raw_texture ? UINT32_C(0x00808080) : vert.color, texpage,
vert.texcoord, 0xFFFF0000u);
}
Expand Down Expand Up @@ -2839,9 +2839,9 @@ ALWAYS_INLINE_RELEASE bool GPU_HW::BeginPolygonDraw(const GPUBackendDrawCommand*
GSVector4i& clamped_draw_rect_012,
GSVector4i& clamped_draw_rect_123)
{
GSVector2 v0f = GSVector2::load<false>(&vertices[0].x);
GSVector2 v1f = GSVector2::load<false>(&vertices[1].x);
GSVector2 v2f = GSVector2::load<false>(&vertices[2].x);
GSVector2 v0f = GSVector2::load<true>(&vertices[0].x);
GSVector2 v1f = GSVector2::load<true>(&vertices[1].x);
GSVector2 v2f = GSVector2::load<true>(&vertices[2].x);
GSVector2 min_pos_12 = v1f.min(v2f);
GSVector2 max_pos_12 = v1f.max(v2f);
GSVector4i draw_rect_012 =
Expand All @@ -2867,7 +2867,7 @@ ALWAYS_INLINE_RELEASE bool GPU_HW::BeginPolygonDraw(const GPUBackendDrawCommand*
};

const GSVector4 tv01f = truncate_pos(GSVector4::xyxy(v0f, v1f));
const GSVector4 tv23f = truncate_pos(GSVector4::xyxy(v2f, GSVector2::load<false>(&vertices[3].x)));
const GSVector4 tv23f = truncate_pos(GSVector4::xyxy(v2f, GSVector2::load<true>(&vertices[3].x)));
const GSVector2 tv0f = tv01f.xy();
const GSVector2 tv1f = tv01f.zw();
const GSVector2 tv2f = tv23f.xy();
Expand All @@ -2881,11 +2881,11 @@ ALWAYS_INLINE_RELEASE bool GPU_HW::BeginPolygonDraw(const GPUBackendDrawCommand*
!tdraw_rect_012.rintersects(m_clamped_drawing_area));
if (!first_tri_culled)
{
GSVector4::storel<false>(&vertices[0].x, tv01f);
GSVector4::storeh<false>(&vertices[1].x, tv01f);
GSVector4::storel<false>(&vertices[2].x, tv23f);
GSVector4::storel<true>(&vertices[0].x, tv01f);
GSVector4::storeh<true>(&vertices[1].x, tv01f);
GSVector4::storel<true>(&vertices[2].x, tv23f);
if (num_vertices == 4)
GSVector4::storeh<false>(&vertices[3].x, tv23f);
GSVector4::storeh<true>(&vertices[3].x, tv23f);

GL_INS_FMT("Adjusted polygon from [{} {} {}] to [{} {} {}] due to coordinate truncation", v0f, v1f, v2f, tv0f,
tv1f, tv2f);
Expand All @@ -2908,7 +2908,7 @@ ALWAYS_INLINE_RELEASE bool GPU_HW::BeginPolygonDraw(const GPUBackendDrawCommand*

if (num_vertices == 4)
{
const GSVector2 v3f = GSVector2::load<false>(&vertices[3].x);
const GSVector2 v3f = GSVector2::load<true>(&vertices[3].x);
const GSVector4i draw_rect_123 = GSVector4i(GSVector4(min_pos_12.min(v3f)).upld(GSVector4(max_pos_12.max(v3f))))
.add32(GSVector4i::cxpr(0, 0, 1, 1));
clamped_draw_rect_123 = draw_rect_123.rintersect(m_clamped_drawing_area);
Expand Down
2 changes: 1 addition & 1 deletion src/core/gpu_hw.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ class GPU_HW final : public GPUBackend

static_assert(GPUDevice::MIN_TEXEL_BUFFER_ELEMENTS >= (VRAM_WIDTH * VRAM_HEIGHT));

struct BatchVertex
struct alignas(16) BatchVertex
{
float x;
float y;
Expand Down

0 comments on commit 799f5bd

Please sign in to comment.