Skip to content

Commit

Permalink
GPU: Move vertex culling to GPU thread
Browse files Browse the repository at this point in the history
i.e. push all primitives through unless they are oversized, which the
GPU will definitely skip.

Needed because of coordinate truncation in Final Fantasy VIII, these
scenes will now render correctly with the software renderer again.
  • Loading branch information
stenzek committed Dec 28, 2024
1 parent 58b0ccf commit c99625e
Show file tree
Hide file tree
Showing 7 changed files with 167 additions and 96 deletions.
7 changes: 5 additions & 2 deletions src/core/gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -326,8 +326,10 @@ class GPU final

AddCommandTicks(pixels);
}
ALWAYS_INLINE_RELEASE void AddDrawRectangleTicks(const GSVector4i clamped_rect, bool textured, bool semitransparent)
ALWAYS_INLINE_RELEASE void AddDrawRectangleTicks(const GSVector4i rect, bool textured, bool semitransparent)
{
const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect);

u32 drawn_width = clamped_rect.width();
u32 drawn_height = clamped_rect.height();

Expand Down Expand Up @@ -373,8 +375,9 @@ class GPU final

AddCommandTicks(ticks_per_row * drawn_height);
}
ALWAYS_INLINE_RELEASE void AddDrawLineTicks(const GSVector4i clamped_rect, bool shaded)
ALWAYS_INLINE_RELEASE void AddDrawLineTicks(const GSVector4i rect, bool shaded)
{
const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area);
u32 drawn_width = clamped_rect.width();
u32 drawn_height = clamped_rect.height();

Expand Down
4 changes: 3 additions & 1 deletion src/core/gpu_backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ GPUBackend::~GPUBackend()

bool GPUBackend::Initialize(bool clear_vram, Error* error)
{
m_clamped_drawing_area = GPU::GetClampedDrawingArea(GPU_SW_Rasterizer::g_drawing_area);

if (!CompileDisplayPipelines(true, true, g_gpu_settings.display_24bit_chroma_smoothing, error))
return false;

Expand Down Expand Up @@ -451,9 +453,9 @@ void GPUBackend::HandleCommand(const GPUThreadCommand* cmd)

case GPUBackendCommandType::SetDrawingArea:
{
FlushRender();
const GPUBackendSetDrawingAreaCommand* ccmd = static_cast<const GPUBackendSetDrawingAreaCommand*>(cmd);
GPU_SW_Rasterizer::g_drawing_area = ccmd->new_area;
m_clamped_drawing_area = GPU::GetClampedDrawingArea(ccmd->new_area);
DrawingAreaChanged();
}
break;
Expand Down
2 changes: 2 additions & 0 deletions src/core/gpu_backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,8 @@ class GPUBackend
void DestroyDeinterlaceTextures();
bool ApplyChromaSmoothing();

GSVector4i m_clamped_drawing_area = {};

s32 m_display_width = 0;
s32 m_display_height = 0;
s32 m_display_origin_left = 0;
Expand Down
75 changes: 28 additions & 47 deletions src/core/gpu_commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -428,22 +428,20 @@ bool GPU::HandleRenderPolygonCommand()
}

// Cull polygons which are too large.
const GSVector2 v0f = GSVector2::load<false>(&cmd->vertices[0].x);
const GSVector2 v1f = GSVector2::load<false>(&cmd->vertices[1].x);
const GSVector2 v2f = GSVector2::load<false>(&cmd->vertices[2].x);
const GSVector2 min_pos_12 = v1f.min(v2f);
const GSVector2 max_pos_12 = v1f.max(v2f);
const GSVector4i draw_rect_012 = GSVector4i(GSVector4(min_pos_12.min(v0f)).upld(GSVector4(max_pos_12.max(v0f))))
.add32(GSVector4i::cxpr(0, 0, 1, 1));
const GSVector2i v0 = GSVector2i::load<false>(&cmd->vertices[0].native_x);
const GSVector2i v1 = GSVector2i::load<false>(&cmd->vertices[1].native_x);
const GSVector2i v2 = GSVector2i::load<false>(&cmd->vertices[2].native_x);
const GSVector2i min_pos_12 = v1.min_s32(v2);
const GSVector2i max_pos_12 = v1.max_s32(v2);
const GSVector4i draw_rect_012 =
GSVector4i::xyxy(min_pos_12.min_s32(v0), max_pos_12.max_s32(v0)).add32(GSVector4i::cxpr(0, 0, 1, 1));
const bool first_tri_culled =
(draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT ||
!draw_rect_012.rintersects(m_clamped_drawing_area));
(draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT);
if (first_tri_culled)
{
// TODO: GPU events... somehow.
DEBUG_LOG("Culling off-screen/too-large polygon: {},{} {},{} {},{}", cmd->vertices[0].native_x,
cmd->vertices[0].native_y, cmd->vertices[1].native_x, cmd->vertices[1].native_y,
cmd->vertices[2].native_x, cmd->vertices[2].native_y);
DEBUG_LOG("Culling too-large polygon: {},{} {},{} {},{}", cmd->vertices[0].native_x, cmd->vertices[0].native_y,
cmd->vertices[1].native_x, cmd->vertices[1].native_y, cmd->vertices[2].native_x,
cmd->vertices[2].native_y);

if (!rc.quad_polygon)
{
Expand All @@ -462,19 +460,19 @@ bool GPU::HandleRenderPolygonCommand()
// quads
if (rc.quad_polygon)
{
const GSVector2 v3f = GSVector2::load<false>(&cmd->vertices[3].x);
const GSVector4i draw_rect_123 = GSVector4i(GSVector4(min_pos_12.min(v3f)).upld(GSVector4(max_pos_12.max(v3f))))
const GSVector2i v3 = GSVector2i::load<false>(&cmd->vertices[3].native_x);
const GSVector4i draw_rect_123 = GSVector4i(min_pos_12.min_s32(v3))
.upl64(GSVector4i(max_pos_12.max_s32(v3)))
.add32(GSVector4i::cxpr(0, 0, 1, 1));

// Cull polygons which are too large.
const bool second_tri_culled =
(draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT ||
!draw_rect_123.rintersects(m_clamped_drawing_area));
(draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT);
if (second_tri_culled)
{
DEBUG_LOG("Culling off-screen/too-large polygon (quad second half): {},{} {},{} {},{}",
cmd->vertices[2].native_x, cmd->vertices[2].native_y, cmd->vertices[1].native_x,
cmd->vertices[1].native_y, cmd->vertices[0].native_x, cmd->vertices[0].native_y);
DEBUG_LOG("Culling too-large polygon (quad second half): {},{} {},{} {},{}", cmd->vertices[2].native_x,
cmd->vertices[2].native_y, cmd->vertices[1].native_x, cmd->vertices[1].native_y,
cmd->vertices[3].native_x, cmd->vertices[3].native_y);

if (first_tri_culled)
{
Expand All @@ -483,9 +481,6 @@ bool GPU::HandleRenderPolygonCommand()
}

// Remove second part of quad.
// NOTE: Culling this way results in subtle differences with UV clamping, since the fourth vertex is no
// longer considered in the range. This is mainly apparent when the UV gradient is zero. Seems like it
// generally looks better this way, so I'm keeping it.
cmd->size = GPUThreadCommand::AlignCommandSize(sizeof(GPUBackendDrawPrecisePolygonCommand) +
3 * sizeof(GPUBackendDrawPrecisePolygonCommand::Vertex));
cmd->num_vertices = 3;
Expand Down Expand Up @@ -540,11 +535,10 @@ bool GPU::HandleRenderPolygonCommand()
const GSVector4i draw_rect_012 =
GSVector4i::xyxy(min_pos_12.min_s32(v0), max_pos_12.max_s32(v0)).add32(GSVector4i::cxpr(0, 0, 1, 1));
const bool first_tri_culled =
(draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT ||
!draw_rect_012.rintersects(m_clamped_drawing_area));
(draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT);
if (first_tri_culled)
{
DEBUG_LOG("Culling off-screen/too-large polygon: {},{} {},{} {},{}", cmd->vertices[0].x, cmd->vertices[0].y,
DEBUG_LOG("Culling too-large polygon: {},{} {},{} {},{}", cmd->vertices[0].x, cmd->vertices[0].y,
cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[2].x, cmd->vertices[2].y);

if (!rc.quad_polygon)
Expand All @@ -568,12 +562,11 @@ bool GPU::HandleRenderPolygonCommand()

// Cull polygons which are too large.
const bool second_tri_culled =
(draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT ||
!draw_rect_123.rintersects(m_clamped_drawing_area));
(draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT);
if (second_tri_culled)
{
DEBUG_LOG("Culling too-large polygon (quad second half): {},{} {},{} {},{}", cmd->vertices[2].x,
cmd->vertices[2].y, cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[0].x, cmd->vertices[0].y);
cmd->vertices[2].y, cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[3].x, cmd->vertices[3].y);

if (first_tri_culled)
{
Expand Down Expand Up @@ -681,15 +674,7 @@ bool GPU::HandleRenderRectangleCommand()
}

const GSVector4i rect = GSVector4i(cmd->x, cmd->y, cmd->x + cmd->width, cmd->y + cmd->height);
const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect);
if (clamped_rect.rempty()) [[unlikely]]
{
DEBUG_LOG("Culling off-screen rectangle {}", rect);
EndCommand();
return true;
}

AddDrawRectangleTicks(clamped_rect, rc.texture_enable, rc.transparency_enable);
AddDrawRectangleTicks(rect, rc.texture_enable, rc.transparency_enable);

GPUBackend::PushCommand(cmd);
EndCommand();
Expand Down Expand Up @@ -883,15 +868,13 @@ void GPU::FinishPolyline()
const GSVector2 end_pos = GSVector2::load<false>(&end.x);
const GSVector4i rect =
GSVector4i(GSVector4::xyxy(start_pos.min(end_pos), start_pos.max(end_pos))).add32(GSVector4i::cxpr(0, 0, 1, 1));
const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area);

if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty())
if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT)
{
DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", start_pos.x, start_pos.y, end_pos.x, end_pos.y);
}
else
{
AddDrawLineTicks(clamped_rect, m_render_command.shading_enable);
AddDrawLineTicks(rect, m_render_command.shading_enable);

cmd->vertices[out_vertex_count++] = start;
cmd->vertices[out_vertex_count++] = end;
Expand Down Expand Up @@ -930,15 +913,13 @@ void GPU::FinishPolyline()

const GSVector4i rect =
GSVector4i::xyxy(start_pos.min_s32(end_pos), start_pos.max_s32(end_pos)).add32(GSVector4i::cxpr(0, 0, 1, 1));
const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area);

if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty())
if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT)
{
DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", start_pos.x, start_pos.y, end_pos.x, end_pos.y);
DEBUG_LOG("Culling too-large line: {},{} - {},{}", start_pos.x, start_pos.y, end_pos.x, end_pos.y);
}
else
{
AddDrawLineTicks(clamped_rect, m_render_command.shading_enable);
AddDrawLineTicks(rect, m_render_command.shading_enable);

GPUBackendDrawLineCommand::Vertex* out_vertex = &cmd->vertices[out_vertex_count];
out_vertex_count += 2;
Expand Down
Loading

0 comments on commit c99625e

Please sign in to comment.