Skip to content

Commit

Permalink
Implement guard band culling on all four backends.
Browse files Browse the repository at this point in the history
Assumes the viewport is centered in the 4096x4096 rectangle to save a
uniform.

Ignores the bottom for now, can't figure out TOCA :(
  • Loading branch information
hrydgard committed Apr 2, 2017
1 parent a45857e commit 6998732
Show file tree
Hide file tree
Showing 15 changed files with 147 additions and 44 deletions.
1 change: 1 addition & 0 deletions GPU/Common/GPUStateUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ bool IsColorTestTriviallyTrue() {
}
}

// TODO: Pack into 16-bit integer.
const bool nonAlphaSrcFactors[16] = {
true, // GE_SRCBLEND_DSTCOLOR,
true, // GE_SRCBLEND_INVDSTCOLOR,
Expand Down
2 changes: 2 additions & 0 deletions GPU/Common/ShaderCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ enum : uint64_t {
DIRTY_TEXCLAMP = 1ULL << 19,
DIRTY_DEPTHRANGE = 1ULL << 19,

DIRTY_GUARDBAND = 1ULL << 20,

DIRTY_WORLDMATRIX = 1ULL << 21,
DIRTY_VIEWMATRIX = 1ULL << 22,
DIRTY_TEXMATRIX = 1ULL << 23,
Expand Down
31 changes: 29 additions & 2 deletions GPU/Common/ShaderUniforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,28 @@ static void ConvertProjMatrixToD3D11(Matrix4x4 &in) {
in.translateAndScale(trans, scale);
}

void ComputeGuardband(float gb[4], float zmin) {
float vpWidth = fabsf(gstate_c.vpWidth);
float vpHeight = fabsf(gstate_c.vpHeight);
// Avoid bad values during initialization. Doubt these are really needed.
if (vpWidth == 0.0)
vpWidth = 480;
if (vpHeight == 0.0)
vpHeight = 272;

// We assume a symmetric guardband, even though it's not entirely correct to do so - but nearly everything does it
// this way and we have space for the NAN in the uniform.
// We also assume that everything behind the near clipping plane gets clipped and will thus not in reality
// exceed the guardband. This is a bit rough but should be ok.
float offsetX = gstate.getOffsetX();
float offsetY = gstate.getOffsetY();
float nearPlane = 0.0;
gb[0] = (2048.0f / (vpWidth*0.5f));
gb[1] = (2048.0f / (vpHeight*0.5f));
gb[2] = zmin;
gb[3] = NAN;
}

void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipViewport) {
if (dirtyUniforms & DIRTY_TEXENV) {
Uint8x3ToFloat4(ub->texEnvColor, gstate.texenvcolor);
Expand All @@ -34,7 +56,7 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView
Uint8x3ToInt4_Alpha(ub->colorTestMask, gstate.getColorTestMask(), gstate.getAlphaTestMask());
}
if (dirtyUniforms & DIRTY_FOGCOLOR) {
Uint8x3ToFloat4_Alpha(ub->fogColor, gstate.fogcolor, NAN);
Uint8x3ToFloat4(ub->fogColor, gstate.fogcolor);
}
if (dirtyUniforms & DIRTY_SHADERBLEND) {
Uint8x3ToFloat4(ub->blendFixA, gstate.getFixA());
Expand All @@ -56,7 +78,11 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView
ub->texClampOffset[0] = gstate_c.curTextureXOffset * invW;
ub->texClampOffset[1] = gstate_c.curTextureYOffset * invH;
}

if (dirtyUniforms & DIRTY_GUARDBAND) {
float gb[4];
ComputeGuardband(gb, 0.0f);
memcpy(ub->guardband, gb, sizeof(float) * 4);
}
if (dirtyUniforms & DIRTY_PROJMATRIX) {
Matrix4x4 flippedMatrix;
memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float));
Expand Down Expand Up @@ -98,6 +124,7 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView
if (g_Config.iRenderingMode == 0 && g_display_rotation != DisplayRotation::ROTATE_0) {
proj_through = proj_through * g_display_rot_matrix;
}
// proj_through.translateAndScale(Vec3(0, 0, 0), Vec3(1.0f / debugscale, 1.0f / debugscale, 0));
CopyMatrix4x4(ub->proj_through, proj_through.getReadPtr());
}

Expand Down
13 changes: 10 additions & 3 deletions GPU/Common/ShaderUniforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
enum : uint64_t {
DIRTY_BASE_UNIFORMS =
DIRTY_WORLDMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWMATRIX | DIRTY_TEXMATRIX | DIRTY_ALPHACOLORREF |
DIRTY_PROJMATRIX | DIRTY_FOGCOLOR | DIRTY_FOGCOEF | DIRTY_TEXENV | DIRTY_STENCILREPLACEVALUE |
DIRTY_PROJMATRIX | DIRTY_FOGCOLOR | DIRTY_FOGCOEF | DIRTY_TEXENV | DIRTY_STENCILREPLACEVALUE | DIRTY_GUARDBAND |
DIRTY_ALPHACOLORMASK | DIRTY_SHADERBLEND | DIRTY_UVSCALEOFFSET | DIRTY_TEXCLAMP | DIRTY_DEPTHRANGE | DIRTY_MATAMBIENTALPHA |
DIRTY_BEZIERSPLINE,
DIRTY_LIGHT_UNIFORMS =
Expand All @@ -34,6 +34,7 @@ struct UB_VS_FS_Base {
int spline_count_v;
int spline_type_u;
int spline_type_v;
float guardband[4];
// Fragment data
float fogColor[4];
float texEnvColor[4];
Expand All @@ -59,7 +60,9 @@ R"( mat4 proj_mtx;
int spline_count_v;
int spline_type_u;
int spline_type_v;
vec3 fogcolor; float nanValue;
vec4 guardband;
// Fragment
vec3 fogcolor;
vec3 texenv;
ivec4 alphacolorref;
ivec4 alphacolormask;
Expand All @@ -84,7 +87,9 @@ R"( float4x4 u_proj;
int u_spline_count_v;
int u_spline_type_u;
int u_spline_type_v;
float3 u_fogcolor; float nanValue;
float4 u_guardband;
// Fragment
float3 u_fogcolor;
float3 u_texenv;
uint4 u_alphacolorref;
uint4 u_alphacolormask;
Expand Down Expand Up @@ -184,3 +189,5 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView
void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms);
void BoneUpdateUniforms(UB_VS_Bones *ub, uint64_t dirtyUniforms);

// Shared helper functions
void ComputeGuardband(float gb[4], float zmin);
3 changes: 2 additions & 1 deletion GPU/D3D11/StateMappingD3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -427,10 +427,11 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
if (rasterIter == rasterCache_.end()) {
D3D11_RASTERIZER_DESC desc{};
desc.CullMode = (D3D11_CULL_MODE)(keys_.raster.cullMode);
// desc.FillMode = gstate.isModeThrough() ? D3D11_FILL_SOLID : D3D11_FILL_WIREFRAME;
desc.FillMode = D3D11_FILL_SOLID;
desc.ScissorEnable = TRUE;
desc.FrontCounterClockwise = TRUE;
desc.DepthClipEnable = TRUE;
desc.DepthClipEnable = TRUE; // FALSE
ASSERT_SUCCESS(device_->CreateRasterizerState(&desc, &rs));
rasterCache_.insert(std::pair<uint32_t, ID3D11RasterizerState *>(keys_.raster.value, rs));
} else {
Expand Down
6 changes: 6 additions & 0 deletions GPU/Directx9/ShaderManagerDX9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "GPU/Math3D.h"
#include "GPU/GPUState.h"
#include "GPU/ge_constants.h"
#include "GPU/Common/ShaderUniforms.h"
#include "GPU/Directx9/ShaderManagerDX9.h"
#include "GPU/Directx9/DrawEngineDX9.h"
#include "GPU/Directx9/FramebufferDX9.h"
Expand Down Expand Up @@ -394,6 +395,11 @@ void ShaderManagerDX9::VSUpdateUniforms(u64 dirtyUniforms) {
#endif
VSSetFloatArray(CONST_VS_FOGCOEF, fogcoef, 2);
}
if (dirtyUniforms & DIRTY_GUARDBAND) {
float gb[4];
ComputeGuardband(gb, 0.0f);
VSSetFloatUniform4(CONST_VS_GUARDBAND, gb);
}
// TODO: Could even set all bones in one go if they're all dirty.
#ifdef USE_BONE_ARRAY
if (u_bone != 0) {
Expand Down
46 changes: 32 additions & 14 deletions GPU/Directx9/VertexShaderGeneratorDX9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,12 @@

namespace DX9 {

// The PSP does not have a proper triangle clipper, but it does have a guard band and can rasterize rather large
// The PSP does not have a proper triangle clipper on the sides. It does have on for the front plane.
// It has a guard band though and can rasterize rather large
// triangles that go outside the viewport. However, there are limits, and it will drop triangles that are very
// large. Some games appear to draw broken geometry, probably game bugs that were never discovered because the PSP
// would drop the geometry, including Parappa The Rapper in an obscure case and Outrun. Try to get rid of those
// triangles by setting the W of one of the vertices to NaN if they are discovered.
const bool guardBandCulling = true;
// Not sure what a good value for this is, it should probably depend on the framebuffer size.
// Let's be conservative.
const float guardBand = 64.0f;

static const char * const boneWeightAttrDecl[9] = {
"#ERROR#",
Expand All @@ -65,6 +62,26 @@ enum DoLightComputation {
LIGHT_FULL,
};

// #define COLORGUARDBAND

#ifdef COLORGUARDBAND
// Coloring debug version
static void WriteGuardBand(char *&p) {
WRITE(p, " float3 projPos = outPos.xyz / outPos.w; \n");
WRITE(p, " if (outPos.w >= u_guardband.z) {\n");
WRITE(p, " if (abs(projPos.x) > u_guardband.x || projPos.y > u_guardband.y) colorOverride.g = 0.0;\n");//outPos.w = u_guardband.w;\n");
WRITE(p, " } else { colorOverride.b = 0.0; } \n");
}
#else
// NOTE: We are skipping the bottom check. This fixes TOCA but I am dubious about it...
static void WriteGuardBand(char *&p) {
WRITE(p, " float3 projPos = outPos.xyz / outPos.w; \n");
WRITE(p, " if (outPos.w >= u_guardband.z) {\n");
WRITE(p, " if (abs(projPos.x) > u_guardband.x || projPos.y > u_guardband.y) outPos.w = u_guardband.w;\n");
WRITE(p, " }\n");
}
#endif

void GenerateVertexShaderHLSL(const ShaderID &id, char *buffer, ShaderLanguage lang) {
char *p = buffer;
const u32 vertType = gstate.vertType;
Expand Down Expand Up @@ -128,6 +145,7 @@ void GenerateVertexShaderHLSL(const ShaderID &id, char *buffer, ShaderLanguage l
WRITE(p, "float4x4 u_proj : register(c%i);\n", CONST_VS_PROJ);
// Add all the uniforms we'll need to transform properly.
}
WRITE(p, "float4 u_guardband : register(c%i);\n", CONST_VS_GUARDBAND);

if (enableFog) {
WRITE(p, "float2 u_fogcoef : register(c%i);\n", CONST_VS_FOGCOEF);
Expand Down Expand Up @@ -354,9 +372,11 @@ void GenerateVertexShaderHLSL(const ShaderID &id, char *buffer, ShaderLanguage l
}
}


WRITE(p, "VS_OUT main(VS_IN In) {\n");
WRITE(p, " VS_OUT Out;\n");
#ifdef COLORGUARDBAND
WRITE(p, " float4 colorOverride = float4(1.0, 1.0, 1.0, 1.0);\n");
#endif
if (!useHWTransform) {
// Simple pass-through of vertex data to fragment shader
if (doTexture) {
Expand Down Expand Up @@ -399,10 +419,8 @@ void GenerateVertexShaderHLSL(const ShaderID &id, char *buffer, ShaderLanguage l
}
}
}
if (lang != HLSL_DX9 && guardBandCulling) {
// Guard band culling
WRITE(p, " float2 projPos = outPos.xy / outPos.w;\n");
WRITE(p, " if (abs(projPos.x) > %f || abs(projPos.y) > %f) outPos.w = nanValue;\n", guardBand, guardBand);
if (lang != HLSL_DX9) {
WriteGuardBand(p);
}
WRITE(p, " Out.gl_Position = outPos;\n");
} else {
Expand Down Expand Up @@ -603,10 +621,7 @@ void GenerateVertexShaderHLSL(const ShaderID &id, char *buffer, ShaderLanguage l
WRITE(p, " float4 outPos = mul(viewPos, u_proj);\n");
}
}
if (lang != HLSL_DX9 && guardBandCulling) {
WRITE(p, " float2 projPos = outPos.xy / outPos.w;\n");
WRITE(p, " if (abs(projPos.x) > %f || abs(projPos.y) > %f) outPos.w = nanValue;\n", guardBand, guardBand);
}
WriteGuardBand(p);
WRITE(p, " Out.gl_Position = outPos;\n");

// TODO: Declare variables for dots for shade mapping if needed.
Expand Down Expand Up @@ -825,6 +840,9 @@ void GenerateVertexShaderHLSL(const ShaderID &id, char *buffer, ShaderLanguage l
}
}

#ifdef COLORGUARDBAND
WRITE(p, " Out.v_color0 *= colorOverride;\n");
#endif
WRITE(p, " return Out;\n");
WRITE(p, "}\n");
}
Expand Down
1 change: 1 addition & 0 deletions GPU/Directx9/VertexShaderGeneratorDX9.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,5 +57,6 @@ namespace DX9 {
#define CONST_VS_LIGHTSPECULAR 75
#define CONST_VS_LIGHTAMBIENT 79
#define CONST_VS_DEPTHRANGE 83
#define CONST_VS_GUARDBAND 84

};
12 changes: 11 additions & 1 deletion GPU/GLES/ShaderManagerGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include "ext/native/gfx/GLStateCache.h"
#include "GPU/GLES/ShaderManagerGLES.h"
#include "GPU/GLES/DrawEngineGLES.h"
#include "GPU/Common/ShaderUniforms.h"
#include "FramebufferManagerGLES.h"

Shader::Shader(const char *code, uint32_t glShaderType, bool useHWTransform)
Expand Down Expand Up @@ -209,6 +210,7 @@ LinkedShader::LinkedShader(ShaderID VSID, Shader *vs, ShaderID FSID, Shader *fs,
u_uvscaleoffset = glGetUniformLocation(program, "u_uvscaleoffset");
u_texclamp = glGetUniformLocation(program, "u_texclamp");
u_texclampoff = glGetUniformLocation(program, "u_texclampoff");
u_guardband = glGetUniformLocation(program, "u_guardband");

for (int i = 0; i < 4; i++) {
char temp[64];
Expand Down Expand Up @@ -267,7 +269,8 @@ LinkedShader::LinkedShader(ShaderID VSID, Shader *vs, ShaderID FSID, Shader *fs,
if (u_blendFixA != -1 || u_blendFixB != -1 || u_fbotexSize != -1) availableUniforms |= DIRTY_SHADERBLEND;
if (u_depthRange != -1)
availableUniforms |= DIRTY_DEPTHRANGE;

if (u_guardband != -1)
availableUniforms |= DIRTY_GUARDBAND;
// Looping up to numBones lets us avoid checking u_bone[i]
#ifdef USE_BONE_ARRAY
if (u_bone != -1) {
Expand Down Expand Up @@ -608,6 +611,13 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid) {
if (dirty & DIRTY_TEXMATRIX) {
SetMatrix4x3(u_texmtx, gstate.tgenMatrix);
}

if (dirty & DIRTY_GUARDBAND) {
float gb[4];
ComputeGuardband(gb, 0.0f);
SetFloatUniform4(u_guardband, gb);
}

if ((dirty & DIRTY_DEPTHRANGE) && u_depthRange != -1) {
// Since depth is [-1, 1] mapping to [minz, maxz], this is easyish.
float vpZScale = gstate.getViewportZScale();
Expand Down
3 changes: 3 additions & 0 deletions GPU/GLES/ShaderManagerGLES.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ class LinkedShader {
int u_texclamp;
int u_texclampoff;

// Clipping
int u_guardband;

// Lighting
int u_ambient;
int u_matambientalpha;
Expand Down
22 changes: 17 additions & 5 deletions GPU/GLES/VertexShaderGeneratorGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,12 @@ enum DoLightComputation {
LIGHT_FULL,
};

static void WriteGuardBand(char *&p) {
WRITE(p, " vec3 projPos = outPos.xyz / outPos.w; \n");
WRITE(p, " if (outPos.w >= u_guardband.z) {\n");
WRITE(p, " if (abs(projPos.x) > u_guardband.x || projPos.y < -u_guardband.y) outPos.w = u_guardband.w;\n");
WRITE(p, " }\n");
}

// Depth range and viewport
//
Expand All @@ -94,6 +100,7 @@ enum DoLightComputation {
// TODO: Skip all this if we can actually get a 16-bit depth buffer along with stencil, which
// is a bit of a rare configuration, although quite common on mobile.

// NOTE: We are skipping the bottom check. This fixes TOCA but I am dubious about it...

void GenerateVertexShader(const ShaderID &id, char *buffer) {
char *p = buffer;
Expand Down Expand Up @@ -246,6 +253,7 @@ void GenerateVertexShader(const ShaderID &id, char *buffer) {
WRITE(p, "uniform mat4 u_proj;\n");
// Add all the uniforms we'll need to transform properly.
}
WRITE(p, "uniform vec4 u_guardband;\n");

bool scaleUV = !throughmode && (uvGenMode == GE_TEXMAP_TEXTURE_COORDS || uvGenMode == GE_TEXMAP_UNKNOWN);

Expand Down Expand Up @@ -448,15 +456,17 @@ void GenerateVertexShader(const ShaderID &id, char *buffer) {
WRITE(p, " v_fogdepth = position.w;\n");
}
if (isModeThrough) {
WRITE(p, " gl_Position = u_proj_through * vec4(position.xyz, 1.0);\n");
WRITE(p, " vec4 outPos = u_proj_through * vec4(position.xyz, 1.0);\n");
} else {
// The viewport is used in this case, so need to compensate for that.
if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) {
WRITE(p, " gl_Position = depthRoundZVP(u_proj * vec4(position.xyz, 1.0));\n");
WRITE(p, " vec4 outPos = depthRoundZVP(u_proj * vec4(position.xyz, 1.0));\n");
} else {
WRITE(p, " gl_Position = u_proj * vec4(position.xyz, 1.0);\n");
WRITE(p, " vec4 outPos = u_proj * vec4(position.xyz, 1.0);\n");
}
}
WriteGuardBand(p);
WRITE(p, " gl_Position = outPos;\n");
} else {
// Step 1: World Transform / Skinning
if (!enableBones) {
Expand Down Expand Up @@ -648,10 +658,12 @@ void GenerateVertexShader(const ShaderID &id, char *buffer) {

// Final view and projection transforms.
if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) {
WRITE(p, " gl_Position = depthRoundZVP(u_proj * viewPos);\n");
WRITE(p, " vec4 outPos = depthRoundZVP(u_proj * viewPos);\n");
} else {
WRITE(p, " gl_Position = u_proj * viewPos;\n");
WRITE(p, " vec4 outPos = u_proj * viewPos;\n");
}
WriteGuardBand(p);
WRITE(p, " gl_Position = outPos;\n");

// TODO: Declare variables for dots for shade mapping if needed.

Expand Down
Loading

0 comments on commit 6998732

Please sign in to comment.