Skip to content

Commit

Permalink
Restore fragmentshader ID flags double and texalpha. Add a ubershader…
Browse files Browse the repository at this point in the history
… mode that uses dynamic state.
  • Loading branch information
hrydgard committed May 23, 2023
1 parent 5adbecc commit 381a99e
Show file tree
Hide file tree
Showing 11 changed files with 85 additions and 23 deletions.
37 changes: 29 additions & 8 deletions GPU/Common/FragmentShaderGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,11 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
bool colorTestAgainstZero = id.Bit(FS_BIT_COLOR_AGAINST_ZERO);
bool doTextureProjection = id.Bit(FS_BIT_DO_TEXTURE_PROJ);

bool ubershader = id.Bit(FS_BIT_UBERSHADER);
// ubershader-controlled bits. If ubershader is on, these will not be used below (and will be false).
bool useTexAlpha = id.Bit(FS_BIT_TEXALPHA);
bool enableColorDouble = id.Bit(FS_BIT_DOUBLE_COLOR);

if (texture3D && arrayTexture) {
*errorString = "Invalid combination of 3D texture and array texture, shouldn't happen";
return false;
Expand Down Expand Up @@ -264,7 +269,9 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
if (texFunc == GE_TEXFUNC_BLEND) {
WRITE(p, "float3 u_texenv : register(c%i);\n", CONST_PS_TEXENV);
}
WRITE(p, "float2 u_texNoAlphaMul : register(c%i);\n", CONST_PS_TEX_NO_ALPHA_MUL);
if (ubershader) {
WRITE(p, "float2 u_texNoAlphaMul : register(c%i);\n", CONST_PS_TEX_NO_ALPHA_MUL);
}
}
if (enableFog) {
WRITE(p, "float3 u_fogcolor : register(c%i);\n", CONST_PS_FOGCOLOR);
Expand Down Expand Up @@ -363,7 +370,9 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
WRITE(p, "uniform sampler2D tex;\n");
}
*uniformMask |= DIRTY_TEX_ALPHA_MUL;
WRITE(p, "uniform vec2 u_texNoAlphaMul;\n");
if (ubershader) {
WRITE(p, "uniform vec2 u_texNoAlphaMul;\n");
}
}

if (readFramebufferTex) {
Expand Down Expand Up @@ -842,7 +851,11 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
WRITE(p, " vec4 p = v_color0;\n");

if (texFunc != GE_TEXFUNC_REPLACE) {
WRITE(p, " t.a = max(t.a, u_texNoAlphaMul.x);\n");
if (ubershader) {
WRITE(p, " t.a = max(t.a, u_texNoAlphaMul.x);\n");
} else if (!useTexAlpha) {
WRITE(p, " t.a = 1.0;\n");
}
}

switch (texFunc) {
Expand All @@ -857,7 +870,11 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
break;
case GE_TEXFUNC_REPLACE:
WRITE(p, " vec4 r = t;\n");
WRITE(p, " r.a = mix(r.a, p.a, u_texNoAlphaMul.x);\n");
if (ubershader) {
WRITE(p, " r.a = mix(r.a, p.a, u_texNoAlphaMul.x);\n");
} else if (!useTexAlpha) {
WRITE(p, " r.a = p.a;\n");
}
WRITE(p, " vec4 v = r%s;\n", secondary);
break;
case GE_TEXFUNC_ADD:
Expand All @@ -876,10 +893,14 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
*uniformMask |= DIRTY_TEX_ALPHA_MUL;

// We only need a clamp if the color will be further processed. Otherwise the hardware color conversion will clamp for us.
if (enableFog || enableColorTest || replaceBlend != REPLACE_BLEND_NO || simulateLogicOpType != LOGICOPTYPE_NORMAL || colorWriteMask || blueToAlpha) {
WRITE(p, " v.rgb = clamp(v.rgb * u_texNoAlphaMul.y, 0.0, 1.0);\n");
} else {
WRITE(p, " v.rgb *= u_texNoAlphaMul.y;\n");
if (ubershader) {
if (enableFog || enableColorTest || replaceBlend != REPLACE_BLEND_NO || simulateLogicOpType != LOGICOPTYPE_NORMAL || colorWriteMask || blueToAlpha) {
WRITE(p, " v.rgb = clamp(v.rgb * u_texNoAlphaMul.y, 0.0, 1.0);\n");
} else {
WRITE(p, " v.rgb *= u_texNoAlphaMul.y;\n");
}
} else if (enableColorDouble) {
p.C(" v.rgb = clamp(v.rgb * 2.0, 0.0, 1.0);\n");
}
} else {
// No texture mapping
Expand Down
17 changes: 16 additions & 1 deletion GPU/Common/ShaderId.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,11 @@ std::string FragmentShaderDesc(const FShaderID &id) {
if (id.Bit(FS_BIT_DO_TEXTURE_PROJ)) desc << "TexProj ";
if (id.Bit(FS_BIT_ENABLE_FOG)) desc << "Fog ";
if (id.Bit(FS_BIT_LMODE)) desc << "LM ";
if (id.Bit(FS_BIT_TEXALPHA)) desc << "TexAlpha ";
if (id.Bit(FS_BIT_DOUBLE_COLOR)) desc << "Double ";
if (id.Bit(FS_BIT_FLATSHADE)) desc << "Flat ";
if (id.Bit(FS_BIT_BGRA_TEXTURE)) desc << "BGRA ";
if (id.Bit(FS_BIT_UBERSHADER)) desc << "FragUber ";
switch ((ShaderDepalMode)id.Bits(FS_BIT_SHADER_DEPAL_MODE, 2)) {
case ShaderDepalMode::OFF: break;
case ShaderDepalMode::NORMAL: desc << "Depal "; break;
Expand Down Expand Up @@ -285,9 +288,14 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip
bool enableFog = gstate.isFogEnabled() && !isModeThrough;
bool enableAlphaTest = gstate.isAlphaTestEnabled() && !IsAlphaTestTriviallyTrue();
bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue();
bool enableColorDouble = gstate.isColorDoublingEnabled();
bool doTextureProjection = (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX && MatrixNeedsProjection(gstate.tgenMatrix, gstate.getUVProjMode()));
bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT;

bool enableTexAlpha = gstate.isTextureAlphaUsed();

bool uberShader = gstate_c.Use(GPU_USE_FRAGMENT_UBERSHADER);

ShaderDepalMode shaderDepalMode = gstate_c.shaderDepalMode;

bool colorWriteMask = pipelineState.maskState.applyFramebufferRead;
Expand Down Expand Up @@ -329,7 +337,14 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip
id.SetBit(FS_BIT_TEST_DISCARD_TO_ZERO, !NeedsTestDiscard());
}

id.SetBit(FS_BIT_ENABLE_FOG, enableFog);
id.SetBit(FS_BIT_ENABLE_FOG, enableFog); // TODO: Will be moved back to the ubershader.

id.SetBit(FS_BIT_UBERSHADER, uberShader);
if (!uberShader) {
id.SetBit(FS_BIT_TEXALPHA, enableTexAlpha);
id.SetBit(FS_BIT_DOUBLE_COLOR, enableColorDouble);
}

id.SetBit(FS_BIT_DO_TEXTURE_PROJ, doTextureProjection);

// 2 bits
Expand Down
16 changes: 10 additions & 6 deletions GPU/Common/ShaderId.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,36 +68,40 @@ enum FShaderBit : uint8_t {
FS_BIT_CLEARMODE = 0,
FS_BIT_DO_TEXTURE = 1,
FS_BIT_TEXFUNC = 2, // 3 bits
FS_BIT_DO_TEXTURE_PROJ = 5,
FS_BIT_DOUBLE_COLOR = 5, // Not used with FS_BIT_UBERSHADER
FS_BIT_3D_TEXTURE = 6,
FS_BIT_SHADER_TEX_CLAMP = 7,
FS_BIT_CLAMP_S = 8,
FS_BIT_CLAMP_T = 9,
FS_BIT_FLATSHADE = 10,
FS_BIT_TEXALPHA = 10, // Not used with FS_BIT_UBERSHADER
FS_BIT_LMODE = 11,
FS_BIT_ALPHA_TEST = 12,
FS_BIT_ALPHA_TEST_FUNC = 13, // 3 bits
FS_BIT_ALPHA_AGAINST_ZERO = 16,
FS_BIT_COLOR_TEST = 17,
FS_BIT_COLOR_TEST_FUNC = 18, // 2 bits
FS_BIT_COLOR_AGAINST_ZERO = 20,
FS_BIT_ENABLE_FOG = 21,
FS_BIT_SAMPLE_ARRAY_TEXTURE = 22, // For multiview, framebuffers are array textures and we need to sample the two layers correctly.
FS_BIT_STEREO = 23,
FS_BIT_ENABLE_FOG = 21, // Not used with FS_BIT_UBERSHADER
FS_BIT_DO_TEXTURE_PROJ = 22,
// 1 free bit
FS_BIT_STENCIL_TO_ALPHA = 24, // 2 bits
FS_BIT_REPLACE_ALPHA_WITH_STENCIL_TYPE = 26, // 4 bits (ReplaceAlphaType)
FS_BIT_SIMULATE_LOGIC_OP_TYPE = 30, // 2 bits
FS_BIT_REPLACE_BLEND = 32, // 3 bits (ReplaceBlendType)
FS_BIT_BLENDEQ = 35, // 3 bits
FS_BIT_BLENDFUNC_A = 38, // 4 bits
FS_BIT_BLENDFUNC_B = 42, // 4 bits
FS_BIT_USE_FRAMEBUFFER_FETCH = 46,
FS_BIT_FLATSHADE = 46,
FS_BIT_BGRA_TEXTURE = 47,
FS_BIT_TEST_DISCARD_TO_ZERO = 48,
FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL = 49,
FS_BIT_COLOR_WRITEMASK = 50,
FS_BIT_REPLACE_LOGIC_OP = 51, // 4 bits. GE_LOGIC_COPY means no-op/off.
FS_BIT_SHADER_DEPAL_MODE = 55, // 2 bits (ShaderDepalMode)
FS_BIT_SAMPLE_ARRAY_TEXTURE = 57, // For multiview, framebuffers are array textures and we need to sample the two layers correctly.
FS_BIT_STEREO = 58,
FS_BIT_USE_FRAMEBUFFER_FETCH = 59,
FS_BIT_UBERSHADER = 60,
};

static inline FShaderBit operator +(FShaderBit bit, int i) {
Expand Down
13 changes: 6 additions & 7 deletions GPU/Common/ShaderUniforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,13 @@ struct alignas(16) UB_VS_FS_Base {
uint32_t spline_counts; uint32_t depal_mask_shift_off_fmt; // 4 params packed into one.
uint32_t colorWriteMask; float mipBias;
// Fragment data
float texNoAlpha; float texMul; float padding[2]; // this vec4 will hold ubershader stuff. We won't use integer flags in the fragment shader.
float fogColor[3]; uint32_t alphaColorRef;
float texEnvColor[3]; uint32_t colorTestMask;
float blendFixA[3]; float stencilReplaceValue;
float blendFixB[3]; float rotation;
float texClamp[4];
float texClampOffset[2]; float fogCoef[2];
float texNoAlpha; float texMul; float padding[2];
float blendFixA[3]; float stencilReplaceValue;
float blendFixB[3]; float rotation;
// VR stuff is to go here, later. For normal drawing, we can then get away
// with just uploading the first 448 bytes of the struct (up to and including fogCoef).
};
Expand All @@ -59,14 +59,13 @@ R"( mat4 u_proj;
uint u_depal_mask_shift_off_fmt;
uint u_colorWriteMask;
float u_mipBias;
vec2 u_texNoAlphaMul; float pad1; float pad2;
vec3 u_fogcolor; uint u_alphacolorref;
vec3 u_texenv; uint u_alphacolormask;
vec4 u_texclamp;
vec2 u_texclampoff; vec2 u_fogcoef;
vec3 u_blendFixA; float u_stencilReplaceValue;
vec3 u_blendFixB; float u_rotation;
vec4 u_texclamp;
vec2 u_texclampoff;
vec2 u_fogcoef;
vec2 u_texNoAlphaMul; float pad1; float pad2;
)";

// 512 bytes. Would like to shrink more. Some colors only have 8-bit precision and we expand
Expand Down
2 changes: 2 additions & 0 deletions GPU/D3D11/GPU_D3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ u32 GPU_D3D11::CheckGPUFeatures() const {
features |= GPU_USE_16BIT_FORMATS;
}

features |= GPU_USE_FRAGMENT_UBERSHADER;

return CheckGPUFeaturesLate(features);
}

Expand Down
3 changes: 3 additions & 0 deletions GPU/Directx9/GPU_DX9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ u32 GPU_DX9::CheckGPUFeatures() const {
// So we cannot incorrectly use the viewport transform as the depth range on Direct3D.
features |= GPU_USE_ACCURATE_DEPTH;

// DX9 GPUs probably benefit more than they lose from this. Though, might be a vendor check.
features |= GPU_USE_FRAGMENT_UBERSHADER;

return CheckGPUFeaturesLate(features);
}

Expand Down
3 changes: 3 additions & 0 deletions GPU/GLES/DrawEngineGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,9 @@ void DrawEngineGLES::DoFlush() {
gpuStats.numDrawCalls += numDrawCalls;
gpuStats.numVertsSubmitted += vertexCountInDrawCalls_;

// TODO: When the next flush has the same vertex format, we can continue with the same offset in the vertex buffer,
// and start indexing from a higher value. This is very friendly to OpenGL (where we can't rely on baseindex if we
// wanted to avoid rebinding the vertex input every time).
indexGen.Reset();
decodedVerts_ = 0;
numDrawCalls = 0;
Expand Down
4 changes: 4 additions & 0 deletions GPU/GLES/GPU_GLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,10 @@ u32 GPU_GLES::CheckGPUFeatures() const {
}
}

if (gl_extensions.GLES3) {
features |= GPU_USE_FRAGMENT_UBERSHADER;
}

return features;
}

Expand Down
7 changes: 7 additions & 0 deletions GPU/GPUCommonHW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,13 @@ void GPUCommonHW::UpdateCmdInfo() {
cmdInfo_[GE_CMD_MATERIALUPDATE].RemoveDirty(DIRTY_LIGHT_CONTROL);
cmdInfo_[GE_CMD_MATERIALUPDATE].AddDirty(DIRTY_VERTEXSHADER_STATE);
}

if (gstate_c.Use(GPU_USE_FRAGMENT_UBERSHADER)) {
// Texfunc controls both texalpha and doubling. The rest is not dynamic yet so can't remove fragment shader dirtying.
cmdInfo_[GE_CMD_TEXFUNC].AddDirty(DIRTY_TEX_ALPHA_MUL);
} else {
cmdInfo_[GE_CMD_TEXFUNC].RemoveDirty(DIRTY_TEX_ALPHA_MUL);
}
}

void GPUCommonHW::BeginFrame() {
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUState.h
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ enum {
GPU_USE_VS_RANGE_CULLING = FLAG_BIT(3),
GPU_USE_BLEND_MINMAX = FLAG_BIT(4),
GPU_USE_LOGIC_OP = FLAG_BIT(5),
// Bit 6 is free.
GPU_USE_FRAGMENT_UBERSHADER = FLAG_BIT(6),
GPU_USE_TEXTURE_NPOT = FLAG_BIT(7),
GPU_USE_ANISOTROPY = FLAG_BIT(8),
GPU_USE_CLEAR_RAM_HACK = FLAG_BIT(9),
Expand Down
4 changes: 4 additions & 0 deletions GPU/Vulkan/GPU_Vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,10 @@ u32 GPU_Vulkan::CheckGPUFeatures() const {
features &= ~GPU_USE_FRAMEBUFFER_FETCH;
// }

// Only a few low-power GPUs should probably avoid this.
// Let's figure that out later.
features |= GPU_USE_FRAGMENT_UBERSHADER;

// Attempt to workaround #17386
if (draw_->GetBugs().Has(Draw::Bugs::UNIFORM_INDEXING_BROKEN)) {
features &= ~GPU_USE_LIGHT_UBERSHADER;
Expand Down

0 comments on commit 381a99e

Please sign in to comment.