diff --git a/GPU/Common/GPUStateUtils.cpp b/GPU/Common/GPUStateUtils.cpp index 5244e4f83a18..faa0c4926267 100644 --- a/GPU/Common/GPUStateUtils.cpp +++ b/GPU/Common/GPUStateUtils.cpp @@ -137,6 +137,7 @@ bool IsColorTestTriviallyTrue() { } } +// TODO: Pack into 16-bit integer. const bool nonAlphaSrcFactors[16] = { true, // GE_SRCBLEND_DSTCOLOR, true, // GE_SRCBLEND_INVDSTCOLOR, diff --git a/GPU/Common/ShaderCommon.h b/GPU/Common/ShaderCommon.h index 07d851c1de58..0528d90f7d16 100644 --- a/GPU/Common/ShaderCommon.h +++ b/GPU/Common/ShaderCommon.h @@ -75,6 +75,8 @@ enum : uint64_t { DIRTY_TEXCLAMP = 1ULL << 19, DIRTY_DEPTHRANGE = 1ULL << 19, + DIRTY_GUARDBAND = 1ULL << 20, + DIRTY_WORLDMATRIX = 1ULL << 21, DIRTY_VIEWMATRIX = 1ULL << 22, DIRTY_TEXMATRIX = 1ULL << 23, diff --git a/GPU/Common/ShaderUniforms.cpp b/GPU/Common/ShaderUniforms.cpp index 20dd1e1b1494..3a9af5901701 100644 --- a/GPU/Common/ShaderUniforms.cpp +++ b/GPU/Common/ShaderUniforms.cpp @@ -23,6 +23,28 @@ static void ConvertProjMatrixToD3D11(Matrix4x4 &in) { in.translateAndScale(trans, scale); } +void ComputeGuardband(float gb[4], float zmin) { + float vpWidth = fabsf(gstate_c.vpWidth); + float vpHeight = fabsf(gstate_c.vpHeight); + // Avoid bad values during initialization. Doubt these are really needed. + if (vpWidth == 0.0) + vpWidth = 480; + if (vpHeight == 0.0) + vpHeight = 272; + + // We assume a symmetric guardband, even though it's not entirely correct to do so - but nearly everything does it + // this way and we have space for the NAN in the uniform. + // We also assume that everything behind the near clipping plane gets clipped and will thus not in reality + // exceed the guardband. This is a bit rough but should be ok. + float offsetX = gstate.getOffsetX(); + float offsetY = gstate.getOffsetY(); + float nearPlane = 0.0; + gb[0] = (2048.0f / (vpWidth*0.5f)); + gb[1] = (2048.0f / (vpHeight*0.5f)); + gb[2] = zmin; + gb[3] = NAN; +} + void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipViewport) { if (dirtyUniforms & DIRTY_TEXENV) { Uint8x3ToFloat4(ub->texEnvColor, gstate.texenvcolor); @@ -34,7 +56,7 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView Uint8x3ToInt4_Alpha(ub->colorTestMask, gstate.getColorTestMask(), gstate.getAlphaTestMask()); } if (dirtyUniforms & DIRTY_FOGCOLOR) { - Uint8x3ToFloat4_Alpha(ub->fogColor, gstate.fogcolor, NAN); + Uint8x3ToFloat4(ub->fogColor, gstate.fogcolor); } if (dirtyUniforms & DIRTY_SHADERBLEND) { Uint8x3ToFloat4(ub->blendFixA, gstate.getFixA()); @@ -56,7 +78,11 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView ub->texClampOffset[0] = gstate_c.curTextureXOffset * invW; ub->texClampOffset[1] = gstate_c.curTextureYOffset * invH; } - + if (dirtyUniforms & DIRTY_GUARDBAND) { + float gb[4]; + ComputeGuardband(gb, 0.0f); + memcpy(ub->guardband, gb, sizeof(float) * 4); + } if (dirtyUniforms & DIRTY_PROJMATRIX) { Matrix4x4 flippedMatrix; memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float)); @@ -98,6 +124,7 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView if (g_Config.iRenderingMode == 0 && g_display_rotation != DisplayRotation::ROTATE_0) { proj_through = proj_through * g_display_rot_matrix; } + // proj_through.translateAndScale(Vec3(0, 0, 0), Vec3(1.0f / debugscale, 1.0f / debugscale, 0)); CopyMatrix4x4(ub->proj_through, proj_through.getReadPtr()); } diff --git a/GPU/Common/ShaderUniforms.h b/GPU/Common/ShaderUniforms.h index 7db09bd7244e..c1b04e8bf368 100644 --- a/GPU/Common/ShaderUniforms.h +++ b/GPU/Common/ShaderUniforms.h @@ -10,7 +10,7 @@ enum : uint64_t { DIRTY_BASE_UNIFORMS = DIRTY_WORLDMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWMATRIX | DIRTY_TEXMATRIX | DIRTY_ALPHACOLORREF | - DIRTY_PROJMATRIX | DIRTY_FOGCOLOR | DIRTY_FOGCOEF | DIRTY_TEXENV | DIRTY_STENCILREPLACEVALUE | + DIRTY_PROJMATRIX | DIRTY_FOGCOLOR | DIRTY_FOGCOEF | DIRTY_TEXENV | DIRTY_STENCILREPLACEVALUE | DIRTY_GUARDBAND | DIRTY_ALPHACOLORMASK | DIRTY_SHADERBLEND | DIRTY_UVSCALEOFFSET | DIRTY_TEXCLAMP | DIRTY_DEPTHRANGE | DIRTY_MATAMBIENTALPHA | DIRTY_BEZIERSPLINE, DIRTY_LIGHT_UNIFORMS = @@ -34,6 +34,7 @@ struct UB_VS_FS_Base { int spline_count_v; int spline_type_u; int spline_type_v; + float guardband[4]; // Fragment data float fogColor[4]; float texEnvColor[4]; @@ -59,7 +60,9 @@ R"( mat4 proj_mtx; int spline_count_v; int spline_type_u; int spline_type_v; - vec3 fogcolor; float nanValue; + vec4 guardband; + // Fragment + vec3 fogcolor; vec3 texenv; ivec4 alphacolorref; ivec4 alphacolormask; @@ -84,7 +87,9 @@ R"( float4x4 u_proj; int u_spline_count_v; int u_spline_type_u; int u_spline_type_v; - float3 u_fogcolor; float nanValue; + float4 u_guardband; + // Fragment + float3 u_fogcolor; float3 u_texenv; uint4 u_alphacolorref; uint4 u_alphacolormask; @@ -184,3 +189,5 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms); void BoneUpdateUniforms(UB_VS_Bones *ub, uint64_t dirtyUniforms); +// Shared helper functions +void ComputeGuardband(float gb[4], float zmin); diff --git a/GPU/D3D11/StateMappingD3D11.cpp b/GPU/D3D11/StateMappingD3D11.cpp index 78a9b5e0ff41..01c4655bc8f9 100644 --- a/GPU/D3D11/StateMappingD3D11.cpp +++ b/GPU/D3D11/StateMappingD3D11.cpp @@ -427,10 +427,11 @@ void DrawEngineD3D11::ApplyDrawState(int prim) { if (rasterIter == rasterCache_.end()) { D3D11_RASTERIZER_DESC desc{}; desc.CullMode = (D3D11_CULL_MODE)(keys_.raster.cullMode); + // desc.FillMode = gstate.isModeThrough() ? D3D11_FILL_SOLID : D3D11_FILL_WIREFRAME; desc.FillMode = D3D11_FILL_SOLID; desc.ScissorEnable = TRUE; desc.FrontCounterClockwise = TRUE; - desc.DepthClipEnable = TRUE; + desc.DepthClipEnable = TRUE; // FALSE ASSERT_SUCCESS(device_->CreateRasterizerState(&desc, &rs)); rasterCache_.insert(std::pair(keys_.raster.value, rs)); } else { diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index 43ef78d294a8..6aaf8e1663b9 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -34,6 +34,7 @@ #include "GPU/Math3D.h" #include "GPU/GPUState.h" #include "GPU/ge_constants.h" +#include "GPU/Common/ShaderUniforms.h" #include "GPU/Directx9/ShaderManagerDX9.h" #include "GPU/Directx9/DrawEngineDX9.h" #include "GPU/Directx9/FramebufferDX9.h" @@ -394,6 +395,11 @@ void ShaderManagerDX9::VSUpdateUniforms(u64 dirtyUniforms) { #endif VSSetFloatArray(CONST_VS_FOGCOEF, fogcoef, 2); } + if (dirtyUniforms & DIRTY_GUARDBAND) { + float gb[4]; + ComputeGuardband(gb, 0.0f); + VSSetFloatUniform4(CONST_VS_GUARDBAND, gb); + } // TODO: Could even set all bones in one go if they're all dirty. #ifdef USE_BONE_ARRAY if (u_bone != 0) { diff --git a/GPU/Directx9/VertexShaderGeneratorDX9.cpp b/GPU/Directx9/VertexShaderGeneratorDX9.cpp index 4313123d7372..dd9404668c87 100644 --- a/GPU/Directx9/VertexShaderGeneratorDX9.cpp +++ b/GPU/Directx9/VertexShaderGeneratorDX9.cpp @@ -37,15 +37,12 @@ namespace DX9 { -// The PSP does not have a proper triangle clipper, but it does have a guard band and can rasterize rather large +// The PSP does not have a proper triangle clipper on the sides. It does have on for the front plane. +// It has a guard band though and can rasterize rather large // triangles that go outside the viewport. However, there are limits, and it will drop triangles that are very // large. Some games appear to draw broken geometry, probably game bugs that were never discovered because the PSP // would drop the geometry, including Parappa The Rapper in an obscure case and Outrun. Try to get rid of those // triangles by setting the W of one of the vertices to NaN if they are discovered. -const bool guardBandCulling = true; -// Not sure what a good value for this is, it should probably depend on the framebuffer size. -// Let's be conservative. -const float guardBand = 64.0f; static const char * const boneWeightAttrDecl[9] = { "#ERROR#", @@ -65,6 +62,26 @@ enum DoLightComputation { LIGHT_FULL, }; +// #define COLORGUARDBAND + +#ifdef COLORGUARDBAND +// Coloring debug version +static void WriteGuardBand(char *&p) { + WRITE(p, " float3 projPos = outPos.xyz / outPos.w; \n"); + WRITE(p, " if (outPos.w >= u_guardband.z) {\n"); + WRITE(p, " if (abs(projPos.x) > u_guardband.x || projPos.y > u_guardband.y) colorOverride.g = 0.0;\n");//outPos.w = u_guardband.w;\n"); + WRITE(p, " } else { colorOverride.b = 0.0; } \n"); +} +#else +// NOTE: We are skipping the bottom check. This fixes TOCA but I am dubious about it... +static void WriteGuardBand(char *&p) { + WRITE(p, " float3 projPos = outPos.xyz / outPos.w; \n"); + WRITE(p, " if (outPos.w >= u_guardband.z) {\n"); + WRITE(p, " if (abs(projPos.x) > u_guardband.x || projPos.y > u_guardband.y) outPos.w = u_guardband.w;\n"); + WRITE(p, " }\n"); +} +#endif + void GenerateVertexShaderHLSL(const ShaderID &id, char *buffer, ShaderLanguage lang) { char *p = buffer; const u32 vertType = gstate.vertType; @@ -128,6 +145,7 @@ void GenerateVertexShaderHLSL(const ShaderID &id, char *buffer, ShaderLanguage l WRITE(p, "float4x4 u_proj : register(c%i);\n", CONST_VS_PROJ); // Add all the uniforms we'll need to transform properly. } + WRITE(p, "float4 u_guardband : register(c%i);\n", CONST_VS_GUARDBAND); if (enableFog) { WRITE(p, "float2 u_fogcoef : register(c%i);\n", CONST_VS_FOGCOEF); @@ -354,9 +372,11 @@ void GenerateVertexShaderHLSL(const ShaderID &id, char *buffer, ShaderLanguage l } } - WRITE(p, "VS_OUT main(VS_IN In) {\n"); WRITE(p, " VS_OUT Out;\n"); +#ifdef COLORGUARDBAND + WRITE(p, " float4 colorOverride = float4(1.0, 1.0, 1.0, 1.0);\n"); +#endif if (!useHWTransform) { // Simple pass-through of vertex data to fragment shader if (doTexture) { @@ -399,10 +419,8 @@ void GenerateVertexShaderHLSL(const ShaderID &id, char *buffer, ShaderLanguage l } } } - if (lang != HLSL_DX9 && guardBandCulling) { - // Guard band culling - WRITE(p, " float2 projPos = outPos.xy / outPos.w;\n"); - WRITE(p, " if (abs(projPos.x) > %f || abs(projPos.y) > %f) outPos.w = nanValue;\n", guardBand, guardBand); + if (lang != HLSL_DX9) { + WriteGuardBand(p); } WRITE(p, " Out.gl_Position = outPos;\n"); } else { @@ -603,10 +621,7 @@ void GenerateVertexShaderHLSL(const ShaderID &id, char *buffer, ShaderLanguage l WRITE(p, " float4 outPos = mul(viewPos, u_proj);\n"); } } - if (lang != HLSL_DX9 && guardBandCulling) { - WRITE(p, " float2 projPos = outPos.xy / outPos.w;\n"); - WRITE(p, " if (abs(projPos.x) > %f || abs(projPos.y) > %f) outPos.w = nanValue;\n", guardBand, guardBand); - } + WriteGuardBand(p); WRITE(p, " Out.gl_Position = outPos;\n"); // TODO: Declare variables for dots for shade mapping if needed. @@ -825,6 +840,9 @@ void GenerateVertexShaderHLSL(const ShaderID &id, char *buffer, ShaderLanguage l } } +#ifdef COLORGUARDBAND + WRITE(p, " Out.v_color0 *= colorOverride;\n"); +#endif WRITE(p, " return Out;\n"); WRITE(p, "}\n"); } diff --git a/GPU/Directx9/VertexShaderGeneratorDX9.h b/GPU/Directx9/VertexShaderGeneratorDX9.h index 21601b989d95..1aad270cb898 100644 --- a/GPU/Directx9/VertexShaderGeneratorDX9.h +++ b/GPU/Directx9/VertexShaderGeneratorDX9.h @@ -57,5 +57,6 @@ namespace DX9 { #define CONST_VS_LIGHTSPECULAR 75 #define CONST_VS_LIGHTAMBIENT 79 #define CONST_VS_DEPTHRANGE 83 +#define CONST_VS_GUARDBAND 84 }; diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index 48847ed8f50d..17ad9ea21d0c 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -40,6 +40,7 @@ #include "ext/native/gfx/GLStateCache.h" #include "GPU/GLES/ShaderManagerGLES.h" #include "GPU/GLES/DrawEngineGLES.h" +#include "GPU/Common/ShaderUniforms.h" #include "FramebufferManagerGLES.h" Shader::Shader(const char *code, uint32_t glShaderType, bool useHWTransform) @@ -209,6 +210,7 @@ LinkedShader::LinkedShader(ShaderID VSID, Shader *vs, ShaderID FSID, Shader *fs, u_uvscaleoffset = glGetUniformLocation(program, "u_uvscaleoffset"); u_texclamp = glGetUniformLocation(program, "u_texclamp"); u_texclampoff = glGetUniformLocation(program, "u_texclampoff"); + u_guardband = glGetUniformLocation(program, "u_guardband"); for (int i = 0; i < 4; i++) { char temp[64]; @@ -267,7 +269,8 @@ LinkedShader::LinkedShader(ShaderID VSID, Shader *vs, ShaderID FSID, Shader *fs, if (u_blendFixA != -1 || u_blendFixB != -1 || u_fbotexSize != -1) availableUniforms |= DIRTY_SHADERBLEND; if (u_depthRange != -1) availableUniforms |= DIRTY_DEPTHRANGE; - + if (u_guardband != -1) + availableUniforms |= DIRTY_GUARDBAND; // Looping up to numBones lets us avoid checking u_bone[i] #ifdef USE_BONE_ARRAY if (u_bone != -1) { @@ -608,6 +611,13 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid) { if (dirty & DIRTY_TEXMATRIX) { SetMatrix4x3(u_texmtx, gstate.tgenMatrix); } + + if (dirty & DIRTY_GUARDBAND) { + float gb[4]; + ComputeGuardband(gb, 0.0f); + SetFloatUniform4(u_guardband, gb); + } + if ((dirty & DIRTY_DEPTHRANGE) && u_depthRange != -1) { // Since depth is [-1, 1] mapping to [minz, maxz], this is easyish. float vpZScale = gstate.getViewportZScale(); diff --git a/GPU/GLES/ShaderManagerGLES.h b/GPU/GLES/ShaderManagerGLES.h index eb0755c491c8..41740c598d44 100644 --- a/GPU/GLES/ShaderManagerGLES.h +++ b/GPU/GLES/ShaderManagerGLES.h @@ -96,6 +96,9 @@ class LinkedShader { int u_texclamp; int u_texclampoff; + // Clipping + int u_guardband; + // Lighting int u_ambient; int u_matambientalpha; diff --git a/GPU/GLES/VertexShaderGeneratorGLES.cpp b/GPU/GLES/VertexShaderGeneratorGLES.cpp index d4812caf5e59..9baac3963f12 100644 --- a/GPU/GLES/VertexShaderGeneratorGLES.cpp +++ b/GPU/GLES/VertexShaderGeneratorGLES.cpp @@ -68,6 +68,12 @@ enum DoLightComputation { LIGHT_FULL, }; +static void WriteGuardBand(char *&p) { + WRITE(p, " vec3 projPos = outPos.xyz / outPos.w; \n"); + WRITE(p, " if (outPos.w >= u_guardband.z) {\n"); + WRITE(p, " if (abs(projPos.x) > u_guardband.x || projPos.y < -u_guardband.y) outPos.w = u_guardband.w;\n"); + WRITE(p, " }\n"); +} // Depth range and viewport // @@ -94,6 +100,7 @@ enum DoLightComputation { // TODO: Skip all this if we can actually get a 16-bit depth buffer along with stencil, which // is a bit of a rare configuration, although quite common on mobile. +// NOTE: We are skipping the bottom check. This fixes TOCA but I am dubious about it... void GenerateVertexShader(const ShaderID &id, char *buffer) { char *p = buffer; @@ -246,6 +253,7 @@ void GenerateVertexShader(const ShaderID &id, char *buffer) { WRITE(p, "uniform mat4 u_proj;\n"); // Add all the uniforms we'll need to transform properly. } + WRITE(p, "uniform vec4 u_guardband;\n"); bool scaleUV = !throughmode && (uvGenMode == GE_TEXMAP_TEXTURE_COORDS || uvGenMode == GE_TEXMAP_UNKNOWN); @@ -448,15 +456,17 @@ void GenerateVertexShader(const ShaderID &id, char *buffer) { WRITE(p, " v_fogdepth = position.w;\n"); } if (isModeThrough) { - WRITE(p, " gl_Position = u_proj_through * vec4(position.xyz, 1.0);\n"); + WRITE(p, " vec4 outPos = u_proj_through * vec4(position.xyz, 1.0);\n"); } else { // The viewport is used in this case, so need to compensate for that. if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { - WRITE(p, " gl_Position = depthRoundZVP(u_proj * vec4(position.xyz, 1.0));\n"); + WRITE(p, " vec4 outPos = depthRoundZVP(u_proj * vec4(position.xyz, 1.0));\n"); } else { - WRITE(p, " gl_Position = u_proj * vec4(position.xyz, 1.0);\n"); + WRITE(p, " vec4 outPos = u_proj * vec4(position.xyz, 1.0);\n"); } } + WriteGuardBand(p); + WRITE(p, " gl_Position = outPos;\n"); } else { // Step 1: World Transform / Skinning if (!enableBones) { @@ -648,10 +658,12 @@ void GenerateVertexShader(const ShaderID &id, char *buffer) { // Final view and projection transforms. if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { - WRITE(p, " gl_Position = depthRoundZVP(u_proj * viewPos);\n"); + WRITE(p, " vec4 outPos = depthRoundZVP(u_proj * viewPos);\n"); } else { - WRITE(p, " gl_Position = u_proj * viewPos;\n"); + WRITE(p, " vec4 outPos = u_proj * viewPos;\n"); } + WriteGuardBand(p); + WRITE(p, " gl_Position = outPos;\n"); // TODO: Declare variables for dots for shade mapping if needed. diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 021a5ee01640..7d973b2c6acf 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -91,8 +91,6 @@ const CommonCommandTableEntry commonCommandTable[] = { { GE_CMD_TEXENVCOLOR, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXENV }, // Simple render state changes. Handled in StateMapping.cpp. - { GE_CMD_OFFSETX, FLAG_FLUSHBEFOREONCHANGE }, - { GE_CMD_OFFSETY, FLAG_FLUSHBEFOREONCHANGE }, { GE_CMD_CULL, FLAG_FLUSHBEFOREONCHANGE }, { GE_CMD_CULLFACEENABLE, FLAG_FLUSHBEFOREONCHANGE }, { GE_CMD_DITHERENABLE, FLAG_FLUSHBEFOREONCHANGE }, @@ -170,12 +168,14 @@ const CommonCommandTableEntry commonCommandTable[] = { { GE_CMD_ANTIALIASENABLE, FLAG_FLUSHBEFOREONCHANGE }, // Viewport. - { GE_CMD_VIEWPORTXSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS }, - { GE_CMD_VIEWPORTYSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS }, - { GE_CMD_VIEWPORTXCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS }, - { GE_CMD_VIEWPORTYCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS }, + { GE_CMD_VIEWPORTXSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_GUARDBAND}, + { GE_CMD_VIEWPORTYSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_GUARDBAND}, + { GE_CMD_VIEWPORTXCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_GUARDBAND}, + { GE_CMD_VIEWPORTYCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_GUARDBAND}, { GE_CMD_VIEWPORTZSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_DEPTHRANGE | DIRTY_PROJMATRIX }, { GE_CMD_VIEWPORTZCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_DEPTHRANGE | DIRTY_PROJMATRIX }, + { GE_CMD_OFFSETX, FLAG_FLUSHBEFOREONCHANGE, DIRTY_GUARDBAND }, + { GE_CMD_OFFSETY, FLAG_FLUSHBEFOREONCHANGE, DIRTY_GUARDBAND }, { GE_CMD_CLIPENABLE, FLAG_FLUSHBEFOREONCHANGE }, // Z clip diff --git a/GPU/Vulkan/PipelineManagerVulkan.cpp b/GPU/Vulkan/PipelineManagerVulkan.cpp index 3aa802cb85c3..15fd80e0e584 100644 --- a/GPU/Vulkan/PipelineManagerVulkan.cpp +++ b/GPU/Vulkan/PipelineManagerVulkan.cpp @@ -22,8 +22,12 @@ void PipelineManagerVulkan::Clear() { // This could also be an opportunity to store the whole cache to disk. Will need to also // store the keys. for (auto &iter : pipelines_) { - vulkan_->Delete().QueueDeletePipeline(iter.second->pipeline); - delete iter.second; + if (iter.second) { + vulkan_->Delete().QueueDeletePipeline(iter.second->pipeline); + delete iter.second; + } else { + Crash(); // null pipeline was created somehow. + } } pipelines_.clear(); } @@ -307,6 +311,9 @@ VulkanPipeline *PipelineManagerVulkan::GetOrCreatePipeline(VkPipelineLayout layo VulkanPipeline *pipeline = CreateVulkanPipeline( vulkan_->GetDevice(), pipelineCache_, layout, vulkan_->GetSurfaceRenderPass(), rasterKey, vtxDec, vs, fs, useHwTransform); + if (!pipeline) { + Crash(); + } pipelines_[key] = pipeline; return pipeline; } diff --git a/GPU/Vulkan/ShaderManagerVulkan.cpp b/GPU/Vulkan/ShaderManagerVulkan.cpp index ec2d62baf35a..8716522344c0 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.cpp +++ b/GPU/Vulkan/ShaderManagerVulkan.cpp @@ -22,6 +22,7 @@ #include #include "base/logging.h" +#include "base/stringutil.h" #include "math/lin/matrix4x4.h" #include "math/math_util.h" #include "math/dataconv.h" @@ -62,7 +63,7 @@ VulkanFragmentShader::VulkanFragmentShader(VulkanContext *vulkan, ShaderID id, c #ifdef SHADERLOG OutputDebugStringA("Messages:\n"); OutputDebugStringA(errorMessage.c_str()); - OutputDebugStringA(code); + OutputDebugStringA(LineNumberString(code).c_str()); #endif Reporting::ReportMessage("Vulkan error in shader compilation: info: %s / code: %s", errorMessage.c_str(), code); } else { diff --git a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp index b3e9ae7e13de..e7f7307681f1 100644 --- a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp +++ b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp @@ -47,11 +47,7 @@ static const char *vulkan_glsl_preamble = // texcoord = 2 // fog = 3 - - - #undef WRITE - #define WRITE p+=sprintf static const char * const boneWeightDecl[9] = { @@ -72,6 +68,13 @@ enum DoLightComputation { LIGHT_FULL, }; +static void WriteGuardBand(char *&p) { + WRITE(p, " vec3 projPos = outPos.xyz / outPos.w; \n"); + WRITE(p, " if (outPos.w >= base.guardband.z) {\n"); + WRITE(p, " if (abs(projPos.x) > base.guardband.x || projPos.y < -base.guardband.y) outPos.w = base.guardband.w;\n"); + WRITE(p, " }\n"); +} + // Depth range and viewport // // After the multiplication with the projection matrix, we have a 4D vector in clip space. @@ -317,15 +320,17 @@ bool GenerateVulkanGLSLVertexShader(const ShaderID &id, char *buffer, bool *uses WRITE(p, " v_fogdepth = position.w;\n"); } if (isModeThrough) { - WRITE(p, " gl_Position = base.proj_through_mtx * vec4(position.xyz, 1.0);\n"); + WRITE(p, " vec4 outPos = base.proj_through_mtx * vec4(position.xyz, 1.0);\n"); } else { // The viewport is used in this case, so need to compensate for that. if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { - WRITE(p, " gl_Position = depthRoundZVP(base.proj_mtx * vec4(position.xyz, 1.0));\n"); + WRITE(p, " vec4 outPos = depthRoundZVP(base.proj_mtx * vec4(position.xyz, 1.0));\n"); } else { - WRITE(p, " gl_Position = base.proj_mtx * vec4(position.xyz, 1.0);\n"); + WRITE(p, " vec4 outPos = base.proj_mtx * vec4(position.xyz, 1.0);\n"); } } + WriteGuardBand(p); + WRITE(p, " gl_Position = outPos;\n"); } else { // Step 1: World Transform / Skinning if (!enableBones) { @@ -469,10 +474,12 @@ bool GenerateVulkanGLSLVertexShader(const ShaderID &id, char *buffer, bool *uses // Final view and projection transforms. if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { - WRITE(p, " gl_Position = depthRoundZVP(base.proj_mtx * viewPos);\n"); + WRITE(p, " vec4 outPos = depthRoundZVP(base.proj_mtx * viewPos);\n"); } else { - WRITE(p, " gl_Position = base.proj_mtx * viewPos;\n"); + WRITE(p, " vec4 outPos = base.proj_mtx * viewPos;\n"); } + WriteGuardBand(p); + WRITE(p, " gl_Position = outPos;\n"); // TODO: Declare variables for dots for shade mapping if needed.