diff --git a/GPU/Common/ShaderCommon.h b/GPU/Common/ShaderCommon.h index c26185885435..1ef4ba51e4ef 100644 --- a/GPU/Common/ShaderCommon.h +++ b/GPU/Common/ShaderCommon.h @@ -87,14 +87,15 @@ enum : uint64_t { DIRTY_BEZIERSPLINE = 1ULL << 32, DIRTY_TEXCLAMP = 1ULL << 33, + DIRTY_CULLRANGE = 1ULL << 34, - DIRTY_DEPAL = 1ULL << 34, + DIRTY_DEPAL = 1ULL << 35, // space for 5 more uniform dirty flags. Remember to update DIRTY_ALL_UNIFORMS. DIRTY_BONE_UNIFORMS = 0xFF000000ULL, - DIRTY_ALL_UNIFORMS = 0x7FFFFFFFFULL, + DIRTY_ALL_UNIFORMS = 0xFFFFFFFFFULL, DIRTY_ALL_LIGHTS = DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3, // Other dirty elements that aren't uniforms! diff --git a/GPU/Common/ShaderUniforms.cpp b/GPU/Common/ShaderUniforms.cpp index e2dee45c2826..c6ffcf23c51e 100644 --- a/GPU/Common/ShaderUniforms.cpp +++ b/GPU/Common/ShaderUniforms.cpp @@ -26,6 +26,49 @@ static void ConvertProjMatrixToD3D11(Matrix4x4 &in) { in.translateAndScale(trans, scale); } +void CalcCullRange(float minValues[4], float maxValues[4], bool flipViewport, bool hasNegZ) { + // Account for the projection viewport adjustment when viewport is too large. + auto reverseViewportX = [](float x) { + float pspViewport = (x - gstate.getViewportXCenter()) * (1.0f / gstate.getViewportXScale()); + return pspViewport * (1.0f / gstate_c.vpWidthScale); + }; + auto reverseViewportY = [flipViewport](float y) { + float heightScale = gstate_c.vpHeightScale; + if (flipViewport) { + // For D3D11 and GLES non-buffered. + heightScale = -heightScale; + } + float pspViewport = (y - gstate.getViewportYCenter()) * (1.0f / gstate.getViewportYScale()); + return pspViewport * (1.0f / gstate_c.vpHeightScale); + }; + auto reverseViewportZ = [hasNegZ](float z) { + float pspViewport = (z - gstate.getViewportZCenter()) * (1.0f / gstate.getViewportZScale()); + // Differs from GLES: depth is 0 to 1, not -1 to 1. + float realViewport = (pspViewport - gstate_c.vpZOffset) * (1.0f / gstate_c.vpDepthScale); + return hasNegZ ? realViewport : (realViewport * 0.5f + 0.5f); + }; + auto sortPair = [](float a, float b) { + return a > b ? std::make_pair(b, a) : std::make_pair(a, b); + }; + + // The PSP seems to use 0.12.4 for X and Y, and 0.16.0 for Z. + // Any vertex outside this range (unless depth clamp enabled) is discarded. + auto x = sortPair(reverseViewportX(0.0f), reverseViewportX(4096.0f)); + auto y = sortPair(reverseViewportY(0.0f), reverseViewportY(4096.0f)); + auto z = sortPair(reverseViewportZ(0.0f), reverseViewportZ(65535.5f)); + // Since we have space in w, use it to pass the depth clamp flag. We also pass NAN for w "discard". + float clampEnable = gstate.isDepthClampEnabled() ? 1.0f : 0.0f; + + minValues[0] = x.first; + minValues[1] = y.first; + minValues[2] = z.first; + minValues[3] = clampEnable; + maxValues[0] = x.second; + maxValues[1] = y.second; + maxValues[2] = z.second; + maxValues[3] = NAN; +} + void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipViewport) { if (dirtyUniforms & DIRTY_TEXENV) { Uint8x3ToFloat4(ub->texEnvColor, gstate.texenvcolor); @@ -192,6 +235,10 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView ub->depthRange[3] = viewZInvScale; } + if (dirtyUniforms & DIRTY_CULLRANGE) { + CalcCullRange(ub->cullRangeMin, ub->cullRangeMax, flipViewport, false); + } + if (dirtyUniforms & DIRTY_BEZIERSPLINE) { ub->spline_counts = BytesToUint32(gstate_c.spline_count_u, gstate_c.spline_count_v, gstate_c.spline_type_u, gstate_c.spline_type_v); } diff --git a/GPU/Common/ShaderUniforms.h b/GPU/Common/ShaderUniforms.h index dd1e659604b4..be8d2e69f397 100644 --- a/GPU/Common/ShaderUniforms.h +++ b/GPU/Common/ShaderUniforms.h @@ -18,7 +18,7 @@ enum : uint64_t { }; // TODO: Split into two structs, one for software transform and one for hardware transform, to save space. -// 512 bytes. Probably can't get to 256 (nVidia's UBO alignment). +// Currently 512 bytes. Probably can't get to 256 (nVidia's UBO alignment). // Every line here is a 4-float. struct UB_VS_FS_Base { float proj[16]; @@ -32,6 +32,8 @@ struct UB_VS_FS_Base { float matAmbient[4]; uint32_t spline_counts; uint32_t depal_mask_shift_off_fmt; // 4 params packed into one. int pad2; int pad3; + float cullRangeMin[4]; + float cullRangeMax[4]; // Fragment data float fogColor[4]; float texEnvColor[4]; @@ -58,6 +60,8 @@ R"( mat4 proj_mtx; uint depal_mask_shift_off_fmt; int pad2; int pad3; + vec4 cullRangeMin; + vec4 cullRangeMax; vec3 fogcolor; vec3 texenv; ivec4 alphacolorref; @@ -84,6 +88,8 @@ R"( float4x4 u_proj; uint u_depal_mask_shift_off_fmt; int pad2; int pad3; + float4 u_cullRangeMin; + float4 u_cullRangeMax; float3 u_fogcolor; float3 u_texenv; uint4 u_alphacolorref; @@ -175,6 +181,8 @@ static const char *cb_vs_bonesStr = R"( float4x3 u_bone[8]; )"; +void CalcCullRange(float minValues[4], float maxValues[4], bool flipViewport, bool hasNegZ); + void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipViewport); void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms); void BoneUpdateUniforms(UB_VS_Bones *ub, uint64_t dirtyUniforms); diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index a10e56603f06..bdc7ce328ff8 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -36,6 +36,7 @@ #include "GPU/Math3D.h" #include "GPU/GPUState.h" #include "GPU/ge_constants.h" +#include "GPU/Common/ShaderUniforms.h" #include "GPU/Directx9/ShaderManagerDX9.h" #include "GPU/Directx9/DrawEngineDX9.h" #include "GPU/Directx9/FramebufferDX9.h" @@ -314,7 +315,7 @@ void ShaderManagerDX9::PSUpdateUniforms(u64 dirtyUniforms) { } const uint64_t vsUniforms = DIRTY_PROJMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_WORLDMATRIX | DIRTY_VIEWMATRIX | DIRTY_TEXMATRIX | -DIRTY_FOGCOEF | DIRTY_BONE_UNIFORMS | DIRTY_UVSCALEOFFSET | DIRTY_DEPTHRANGE | +DIRTY_FOGCOEF | DIRTY_BONE_UNIFORMS | DIRTY_UVSCALEOFFSET | DIRTY_DEPTHRANGE | DIRTY_CULLRANGE | DIRTY_AMBIENT | DIRTY_MATAMBIENTALPHA | DIRTY_MATSPECULAR | DIRTY_MATDIFFUSE | DIRTY_MATEMISSIVE | DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3; void ShaderManagerDX9::VSUpdateUniforms(u64 dirtyUniforms) { @@ -425,7 +426,7 @@ void ShaderManagerDX9::VSUpdateUniforms(u64 dirtyUniforms) { VSSetFloatArray(CONST_VS_UVSCALEOFFSET, uvscaleoff, 4); } - if (dirtyUniforms & DIRTY_DEPTHRANGE) { + if (dirtyUniforms & DIRTY_DEPTHRANGE) { // Depth is [0, 1] mapping to [minz, maxz], not too hard. float vpZScale = gstate.getViewportZScale(); float vpZCenter = gstate.getViewportZCenter(); @@ -447,6 +448,13 @@ void ShaderManagerDX9::VSUpdateUniforms(u64 dirtyUniforms) { float data[4] = { viewZScale, viewZCenter, viewZCenter, viewZInvScale }; VSSetFloatUniform4(CONST_VS_DEPTHRANGE, data); } + if (dirtyUniforms & DIRTY_CULLRANGE) { + float minValues[4], maxValues[4]; + CalcCullRange(minValues, maxValues, false, false); + VSSetFloatUniform4(CONST_VS_CULLRANGEMIN, minValues); + VSSetFloatUniform4(CONST_VS_CULLRANGEMAX, maxValues); + } + // Lighting if (dirtyUniforms & DIRTY_AMBIENT) { VSSetColorUniform3Alpha(CONST_VS_AMBIENT, gstate.ambientcolor, gstate.getAmbientA()); diff --git a/GPU/Directx9/VertexShaderGeneratorDX9.cpp b/GPU/Directx9/VertexShaderGeneratorDX9.cpp index 056bcc158093..1cd0f383c4fa 100644 --- a/GPU/Directx9/VertexShaderGeneratorDX9.cpp +++ b/GPU/Directx9/VertexShaderGeneratorDX9.cpp @@ -176,6 +176,10 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage if (!isModeThrough && gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { WRITE(p, "float4 u_depthRange : register(c%i);\n", CONST_VS_DEPTHRANGE); } + if (!isModeThrough) { + WRITE(p, "float4 u_cullRangeMin : register(c%i);\n", CONST_VS_CULLRANGEMIN); + WRITE(p, "float4 u_cullRangeMax : register(c%i);\n", CONST_VS_CULLRANGEMAX); + } } else { WRITE(p, "cbuffer base : register(b0) {\n%s};\n", cb_baseStr); WRITE(p, "cbuffer lights: register(b1) {\n%s};\n", cb_vs_lightsStr); @@ -370,22 +374,22 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage } if (lang == HLSL_D3D11 || lang == HLSL_D3D11_LEVEL9) { if (isModeThrough) { - WRITE(p, " Out.gl_Position = mul(u_proj_through, float4(In.position.xyz, 1.0));\n"); + WRITE(p, " float4 outPos = mul(u_proj_through, float4(In.position.xyz, 1.0));\n"); } else { if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { - WRITE(p, " Out.gl_Position = depthRoundZVP(mul(u_proj, float4(In.position.xyz, 1.0)));\n"); + WRITE(p, " float4 outPos = depthRoundZVP(mul(u_proj, float4(In.position.xyz, 1.0)));\n"); } else { - WRITE(p, " Out.gl_Position = mul(u_proj, float4(In.position.xyz, 1.0));\n"); + WRITE(p, " float4 outPos = mul(u_proj, float4(In.position.xyz, 1.0));\n"); } } } else { if (isModeThrough) { - WRITE(p, " Out.gl_Position = mul(float4(In.position.xyz, 1.0), u_proj_through);\n"); + WRITE(p, " float4 outPos = mul(float4(In.position.xyz, 1.0), u_proj_through);\n"); } else { if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { - WRITE(p, " Out.gl_Position = depthRoundZVP(mul(float4(In.position.xyz, 1.0), u_proj));\n"); + WRITE(p, " float4 outPos = depthRoundZVP(mul(float4(In.position.xyz, 1.0), u_proj));\n"); } else { - WRITE(p, " Out.gl_Position = mul(float4(In.position.xyz, 1.0), u_proj);\n"); + WRITE(p, " float4 outPos = mul(float4(In.position.xyz, 1.0), u_proj);\n"); } } } @@ -577,16 +581,16 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage if (lang == HLSL_D3D11 || lang == HLSL_D3D11_LEVEL9) { // Final view and projection transforms. if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { - WRITE(p, " Out.gl_Position = depthRoundZVP(mul(u_proj, viewPos));\n"); + WRITE(p, " float4 outPos = depthRoundZVP(mul(u_proj, viewPos));\n"); } else { - WRITE(p, " Out.gl_Position = mul(u_proj, viewPos);\n"); + WRITE(p, " float4 outPos = mul(u_proj, viewPos);\n"); } } else { // Final view and projection transforms. if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { - WRITE(p, " Out.gl_Position = depthRoundZVP(mul(viewPos, u_proj));\n"); + WRITE(p, " float4 outPos = depthRoundZVP(mul(viewPos, u_proj));\n"); } else { - WRITE(p, " Out.gl_Position = mul(viewPos, u_proj);\n"); + WRITE(p, " float4 outPos = mul(viewPos, u_proj);\n"); } } @@ -811,6 +815,19 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage } } + if (!isModeThrough) { + WRITE(p, " float3 projPos = outPos.xyz / outPos.w;\n"); + // Vertex range culling doesn't happen when depth is clamped, so only do this if in range. + WRITE(p, " if (u_cullRangeMin.w <= 0.0f || (projPos.z >= u_cullRangeMin.z && projPos.z <= u_cullRangeMax.z)) {\n"); + const char *outMin = "projPos.x < u_cullRangeMin.x || projPos.y < u_cullRangeMin.y || projPos.z < u_cullRangeMin.z"; + const char *outMax = "projPos.x > u_cullRangeMax.x || projPos.y > u_cullRangeMax.y || projPos.z > u_cullRangeMax.z"; + WRITE(p, " if (%s || %s) {\n", outMin, outMax); + WRITE(p, " outPos.w = u_cullRangeMax.w;\n"); + WRITE(p, " }\n"); + WRITE(p, " }\n"); + } + WRITE(p, " Out.gl_Position = outPos;\n"); + WRITE(p, " return Out;\n"); WRITE(p, "}\n"); } diff --git a/GPU/Directx9/VertexShaderGeneratorDX9.h b/GPU/Directx9/VertexShaderGeneratorDX9.h index e33567992cc9..aecc113ea367 100644 --- a/GPU/Directx9/VertexShaderGeneratorDX9.h +++ b/GPU/Directx9/VertexShaderGeneratorDX9.h @@ -53,6 +53,8 @@ namespace DX9 { CONST_VS_BONE6 = 71, CONST_VS_BONE7 = 74, CONST_VS_BONE8 = 77, + CONST_VS_CULLRANGEMIN = 80, + CONST_VS_CULLRANGEMAX = 81, }; }; diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index 484819332e6b..c47b6a184350 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -42,9 +42,10 @@ #include "GPU/Math3D.h" #include "GPU/GPUState.h" #include "GPU/ge_constants.h" +#include "GPU/Common/ShaderUniforms.h" #include "GPU/GLES/ShaderManagerGLES.h" #include "GPU/GLES/DrawEngineGLES.h" -#include "FramebufferManagerGLES.h" +#include "GPU/GLES/FramebufferManagerGLES.h" Shader::Shader(GLRenderManager *render, const char *code, const std::string &desc, uint32_t glShaderType, bool useHWTransform, uint32_t attrMask, uint64_t uniformMask) : render_(render), failed_(false), useHWTransform_(useHWTransform), attrMask_(attrMask), uniformMask_(uniformMask) { @@ -116,6 +117,8 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs, else numBones = 0; queries.push_back({ &u_depthRange, "u_depthRange" }); + queries.push_back({ &u_cullRangeMin, "u_cullRangeMin" }); + queries.push_back({ &u_cullRangeMax, "u_cullRangeMax" }); #ifdef USE_BONE_ARRAY queries.push_back({ &u_bone, "u_bone" }); @@ -455,7 +458,7 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid) { if (dirty & DIRTY_TEXMATRIX) { SetMatrix4x3(render_, &u_texmtx, gstate.tgenMatrix); } - if ((dirty & DIRTY_DEPTHRANGE) && u_depthRange != -1) { + if (dirty & DIRTY_DEPTHRANGE) { // Since depth is [-1, 1] mapping to [minz, maxz], this is easyish. float vpZScale = gstate.getViewportZScale(); float vpZCenter = gstate.getViewportZCenter(); @@ -481,6 +484,12 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid) { float data[4] = { viewZScale, viewZCenter, viewZCenter, viewZInvScale }; SetFloatUniform4(render_, &u_depthRange, data); } + if (dirty & DIRTY_CULLRANGE) { + float minValues[4], maxValues[4]; + CalcCullRange(minValues, maxValues, g_Config.iRenderingMode == FB_NON_BUFFERED_MODE, true); + SetFloatUniform4(render_, &u_cullRangeMin, minValues); + SetFloatUniform4(render_, &u_cullRangeMax, maxValues); + } if (dirty & DIRTY_STENCILREPLACEVALUE) { float f = (float)gstate.getStencilTestRef() * (1.0f / 255.0f); diff --git a/GPU/GLES/ShaderManagerGLES.h b/GPU/GLES/ShaderManagerGLES.h index fdcd2ce98b0d..5dbfe43b4e25 100644 --- a/GPU/GLES/ShaderManagerGLES.h +++ b/GPU/GLES/ShaderManagerGLES.h @@ -71,6 +71,8 @@ class LinkedShader { int u_texmtx; int u_world; int u_depthRange; // x,y = viewport xscale/xcenter. z,w=clipping minz/maxz (?) + int u_cullRangeMin; + int u_cullRangeMax; #ifdef USE_BONE_ARRAY int u_bone; // array, size is numBones diff --git a/GPU/GLES/VertexShaderGeneratorGLES.cpp b/GPU/GLES/VertexShaderGeneratorGLES.cpp index c3a6427d3b7d..c09f879a55b9 100644 --- a/GPU/GLES/VertexShaderGeneratorGLES.cpp +++ b/GPU/GLES/VertexShaderGeneratorGLES.cpp @@ -87,6 +87,10 @@ enum DoLightComputation { // // Now, the regular machinery will take over and do the calculation again. // +// Depth is not clipped to the viewport, but does clip to "minz" and "maxz". It may also be clamped +// to 0 and 65535 if a depth clamping/clipping flag is set (x/y clipping is performed only if depth +// needs to be clamped.) +// // All this above is for full transform mode. // In through mode, the Z coordinate just goes straight through and there is no perspective division. // We simulate this of course with pretty much an identity matrix. Rounding Z becomes very easy. @@ -335,6 +339,12 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask, *uniformMask |= DIRTY_DEPTHRANGE; } + if (!isModeThrough) { + WRITE(p, "uniform highp vec4 u_cullRangeMin;\n"); + WRITE(p, "uniform highp vec4 u_cullRangeMax;\n"); + *uniformMask |= DIRTY_CULLRANGE; + } + WRITE(p, "%s%s lowp vec4 v_color0;\n", shading, varying); if (lmode) { WRITE(p, "%s%s lowp vec3 v_color1;\n", shading, varying); @@ -472,13 +482,13 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask, WRITE(p, " v_fogdepth = position.w;\n"); } if (isModeThrough) { - WRITE(p, " gl_Position = u_proj_through * vec4(position.xyz, 1.0);\n"); + WRITE(p, " vec4 outPos = u_proj_through * vec4(position.xyz, 1.0);\n"); } else { // The viewport is used in this case, so need to compensate for that. if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { - WRITE(p, " gl_Position = depthRoundZVP(u_proj * vec4(position.xyz, 1.0));\n"); + WRITE(p, " vec4 outPos = depthRoundZVP(u_proj * vec4(position.xyz, 1.0));\n"); } else { - WRITE(p, " gl_Position = u_proj * vec4(position.xyz, 1.0);\n"); + WRITE(p, " vec4 outPos = u_proj * vec4(position.xyz, 1.0);\n"); } } } else { @@ -671,9 +681,9 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask, // Final view and projection transforms. if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { - WRITE(p, " gl_Position = depthRoundZVP(u_proj * viewPos);\n"); + WRITE(p, " vec4 outPos = depthRoundZVP(u_proj * viewPos);\n"); } else { - WRITE(p, " gl_Position = u_proj * viewPos;\n"); + WRITE(p, " vec4 outPos = u_proj * viewPos;\n"); } // TODO: Declare variables for dots for shade mapping if needed. @@ -898,5 +908,19 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask, if (enableFog) WRITE(p, " v_fogdepth = (viewPos.z + u_fogcoef.x) * u_fogcoef.y;\n"); } + + if (!isModeThrough) { + WRITE(p, " vec3 projPos = outPos.xyz / outPos.w;\n"); + // Vertex range culling doesn't happen when depth is clamped, so only do this if in range. + WRITE(p, " if (u_cullRangeMin.w <= 0.0f || (projPos.z >= u_cullRangeMin.z && projPos.z <= u_cullRangeMax.z)) {\n"); + const char *outMin = "projPos.x < u_cullRangeMin.x || projPos.y < u_cullRangeMin.y || projPos.z < u_cullRangeMin.z"; + const char *outMax = "projPos.x > u_cullRangeMax.x || projPos.y > u_cullRangeMax.y || projPos.z > u_cullRangeMax.z"; + WRITE(p, " if (%s || %s) {\n", outMin, outMax); + WRITE(p, " outPos.w = u_cullRangeMax.w;\n"); + WRITE(p, " }\n"); + WRITE(p, " }\n"); + } + WRITE(p, " gl_Position = outPos;\n"); + WRITE(p, "}\n"); } diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index fed71166aed3..a9f24a31b0d7 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -188,17 +188,17 @@ const CommonCommandTableEntry commonCommandTable[] = { // Viewport. { GE_CMD_OFFSETX, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE }, { GE_CMD_OFFSETY, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE }, - { GE_CMD_VIEWPORTXSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE }, - { GE_CMD_VIEWPORTYSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE }, - { GE_CMD_VIEWPORTXCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE }, - { GE_CMD_VIEWPORTYCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE }, - { GE_CMD_VIEWPORTZSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_DEPTHRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE }, - { GE_CMD_VIEWPORTZCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_DEPTHRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE }, - { GE_CMD_DEPTHCLAMPENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE }, + { GE_CMD_VIEWPORTXSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_VIEWPORTSCISSOR_STATE }, + { GE_CMD_VIEWPORTYSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_VIEWPORTSCISSOR_STATE }, + { GE_CMD_VIEWPORTXCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_VIEWPORTSCISSOR_STATE }, + { GE_CMD_VIEWPORTYCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_VIEWPORTSCISSOR_STATE }, + { GE_CMD_VIEWPORTZSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_DEPTHRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE }, + { GE_CMD_VIEWPORTZCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_DEPTHRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE }, + { GE_CMD_DEPTHCLAMPENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_RASTER_STATE }, // Z clip - { GE_CMD_MINZ, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHRANGE | DIRTY_VIEWPORTSCISSOR_STATE }, - { GE_CMD_MAXZ, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHRANGE | DIRTY_VIEWPORTSCISSOR_STATE }, + { GE_CMD_MINZ, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHRANGE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE }, + { GE_CMD_MAXZ, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHRANGE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE }, // Region { GE_CMD_REGION1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE }, diff --git a/GPU/Vulkan/StateMappingVulkan.cpp b/GPU/Vulkan/StateMappingVulkan.cpp index 68bc711a1da5..282b602dd82e 100644 --- a/GPU/Vulkan/StateMappingVulkan.cpp +++ b/GPU/Vulkan/StateMappingVulkan.cpp @@ -239,15 +239,22 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag } if (gstate_c.IsDirty(DIRTY_RASTER_STATE)) { - if (gstate.isModeClear()) { + if (gstate.isModeClear() || gstate.isModeThrough()) { key.cullMode = VK_CULL_MODE_NONE; - // TODO: Or does it always clamp? + // TODO: Might happen in clear mode if not through... key.depthClampEnable = false; } else { // Set cull - bool wantCull = !gstate.isModeThrough() && prim != GE_PRIM_RECTANGLES && gstate.isCullEnabled(); + bool wantCull = prim != GE_PRIM_RECTANGLES && gstate.isCullEnabled(); key.cullMode = wantCull ? (gstate.getCullMode() ? VK_CULL_MODE_FRONT_BIT : VK_CULL_MODE_BACK_BIT) : VK_CULL_MODE_NONE; - key.depthClampEnable = gstate.isDepthClampEnabled() && gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP); + if (gstate.getDepthRangeMin() == 0 || gstate.getDepthRangeMax() == 65535) { + // TODO: Still has a bug where we clamp to depth range if one is not the full range. + // But the alternate is not clamping in either direction... + key.depthClampEnable = gstate.isDepthClampEnabled() && gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP); + } else { + // We just want to clip in this case, the clamp would be clipped anyway. + key.depthClampEnable = false; + } } } diff --git a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp index e901c4390043..c180631e9e27 100644 --- a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp +++ b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp @@ -317,13 +317,13 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer) { WRITE(p, " v_fogdepth = position.w;\n"); } if (isModeThrough) { - WRITE(p, " gl_Position = base.proj_through_mtx * vec4(position.xyz, 1.0);\n"); + WRITE(p, " vec4 outPos = base.proj_through_mtx * vec4(position.xyz, 1.0);\n"); } else { // The viewport is used in this case, so need to compensate for that. if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { - WRITE(p, " gl_Position = depthRoundZVP(base.proj_mtx * vec4(position.xyz, 1.0));\n"); + WRITE(p, " vec4 outPos = depthRoundZVP(base.proj_mtx * vec4(position.xyz, 1.0));\n"); } else { - WRITE(p, " gl_Position = base.proj_mtx * vec4(position.xyz, 1.0);\n"); + WRITE(p, " vec4 outPos = base.proj_mtx * vec4(position.xyz, 1.0);\n"); } } } else { @@ -472,9 +472,9 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer) { // Final view and projection transforms. if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { - WRITE(p, " gl_Position = depthRoundZVP(base.proj_mtx * viewPos);\n"); + WRITE(p, " vec4 outPos = depthRoundZVP(base.proj_mtx * viewPos);\n"); } else { - WRITE(p, " gl_Position = base.proj_mtx * viewPos;\n"); + WRITE(p, " vec4 outPos = base.proj_mtx * viewPos;\n"); } // TODO: Declare variables for dots for shade mapping if needed. @@ -694,6 +694,20 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer) { if (enableFog) WRITE(p, " v_fogdepth = (viewPos.z + base.fogcoef.x) * base.fogcoef.y;\n"); } + + if (!isModeThrough) { + WRITE(p, " vec3 projPos = outPos.xyz / outPos.w;\n"); + // Vertex range culling doesn't happen when depth is clamped, so only do this if in range. + WRITE(p, " if (base.cullRangeMin.w <= 0.0f || (projPos.z >= base.cullRangeMin.z && projPos.z <= base.cullRangeMax.z)) {\n"); + const char *outMin = "projPos.x < base.cullRangeMin.x || projPos.y < base.cullRangeMin.y || projPos.z < base.cullRangeMin.z"; + const char *outMax = "projPos.x > base.cullRangeMax.x || projPos.y > base.cullRangeMax.y || projPos.z > base.cullRangeMax.z"; + WRITE(p, " if (%s || %s) {\n", outMin, outMax); + WRITE(p, " outPos.w = base.cullRangeMax.w;\n"); + WRITE(p, " }\n"); + WRITE(p, " }\n"); + } + WRITE(p, " gl_Position = outPos;\n"); + WRITE(p, "}\n"); return true; }