Add some shaders and update others. (#3234)

- Add fxaa.fx, aa-shader-40.fx, bilateral.fx; - Update geom.fx, crt-geom.fx, bicubic.fx, lanczos3.fx, super-xbr.fx.
stenzek · Jun 27, 2024 · ad27f8b · ad27f8b
1 parent ebf50ed
commit ad27f8b
Show file tree

Hide file tree

Showing 8 changed files with 581 additions and 38 deletions.
diff --git a/data/resources/shaders/reshade/Shaders/anti-aliasing/aa-shader-4.0.fx b/data/resources/shaders/reshade/Shaders/anti-aliasing/aa-shader-4.0.fx
@@ -0,0 +1,104 @@
+#include "ReShade.fxh"
+
+/*
+   Copyright (C) 2016 guest(r) - guest.r@gmail.com
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either version 2
+   of the License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+*/
+
+uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
+
+sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
+
+static const float3 dt = float3(1.0,1.0,1.0);
+
+float3 texture2d(sampler2D tex, float2 coord, float4 yx) {
+
+    float3 s00 = tex2D(tex, coord + yx.zw).xyz; 
+    float3 s20 = tex2D(tex, coord + yx.xw).xyz; 
+    float3 s22 = tex2D(tex, coord + yx.xy).xyz; 
+    float3 s02 = tex2D(tex, coord + yx.zy).xyz; 
+
+    float m1=dot(abs(s00-s22),dt)+0.001;
+    float m2=dot(abs(s02-s20),dt)+0.001;
+
+    return 0.5*(m2*(s00+s22)+m1*(s02+s20))/(m1+m2);
+}
+
+
+
+float4 PS_aa_shader_40(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target
+{
+    // Calculating texel coordinates
+    float2 size     = 4.0 / NormalizedNativePixelSize;
+    float2 inv_size = 1.0 / size;
+
+    float4 yx = float4(inv_size, -inv_size);
+
+    float2 OGL2Pos = vTexCoord * size;
+
+    float2 fp = frac(OGL2Pos);
+    float2 dx = float2(inv_size.x,0.0);
+    float2 dy = float2(0.0, inv_size.y);
+    float2 g1 = float2(inv_size.x,inv_size.y);
+    float2 g2 = float2(-inv_size.x,inv_size.y);
+
+    float2 pC4 = floor(OGL2Pos) * 1.0001 * inv_size;    
+
+    // Reading the texels
+    float3 C1 = texture2d(sBackBuffer, pC4 - dy, yx);
+    float3 C0 = texture2d(sBackBuffer, pC4 - g1, yx); 
+    float3 C2 = texture2d(sBackBuffer, pC4 - g2, yx);
+    float3 C3 = texture2d(sBackBuffer, pC4 - dx, yx);
+    float3 C4 = texture2d(sBackBuffer, pC4     , yx);
+    float3 C5 = texture2d(sBackBuffer, pC4 + dx, yx);
+    float3 C6 = texture2d(sBackBuffer, pC4 + g2, yx);
+    float3 C7 = texture2d(sBackBuffer, pC4 + dy, yx);
+    float3 C8 = texture2d(sBackBuffer, pC4 + g1, yx);
+
+    float3 ul, ur, dl, dr;
+    float m1, m2;
+
+    m1 = dot(abs(C0-C4),dt)+0.001;
+    m2 = dot(abs(C1-C3),dt)+0.001;
+    ul = (m2*(C0+C4)+m1*(C1+C3))/(m1+m2);  
+
+    m1 = dot(abs(C1-C5),dt)+0.001;
+    m2 = dot(abs(C2-C4),dt)+0.001;
+    ur = (m2*(C1+C5)+m1*(C2+C4))/(m1+m2);
+
+    m1 = dot(abs(C3-C7),dt)+0.001;
+    m2 = dot(abs(C6-C4),dt)+0.001;
+    dl = (m2*(C3+C7)+m1*(C6+C4))/(m1+m2);
+
+    m1 = dot(abs(C4-C8),dt)+0.001;
+    m2 = dot(abs(C5-C7),dt)+0.001;
+    dr = (m2*(C4+C8)+m1*(C5+C7))/(m1+m2);
+
+    float3 c11 = 0.5*((dr*fp.x+dl*(1-fp.x))*fp.y+(ur*fp.x+ul*(1-fp.x))*(1-fp.y) );
+
+    return float4(c11, 1.0);
+}
+
+
+
+technique aa_shader_40
+{
+   pass
+   {
+       VertexShader = PostProcessVS;
+       PixelShader  = PS_aa_shader_40;
+   }
+}
diff --git a/data/resources/shaders/reshade/Shaders/anti-aliasing/fxaa.fx b/data/resources/shaders/reshade/Shaders/anti-aliasing/fxaa.fx
@@ -0,0 +1,271 @@
+#include "ReShade.fxh"
+
+
+/**
+ * @license
+ * Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
+ *
+ * TO  THE MAXIMUM  EXTENT PERMITTED  BY APPLICABLE  LAW, THIS SOFTWARE  IS PROVIDED
+ * *AS IS*  AND NVIDIA AND  ITS SUPPLIERS DISCLAIM  ALL WARRANTIES,  EITHER  EXPRESS
+ * OR IMPLIED, INCLUDING, BUT NOT LIMITED  TO, NONINFRINGEMENT,IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  IN NO EVENT SHALL  NVIDIA 
+ * OR ITS SUPPLIERS BE  LIABLE  FOR  ANY  DIRECT, SPECIAL,  INCIDENTAL,  INDIRECT,  OR  
+ * CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION,  DAMAGES FOR LOSS 
+ * OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR ANY 
+ * OTHER PECUNIARY LOSS) ARISING OUT OF THE  USE OF OR INABILITY  TO USE THIS SOFTWARE, 
+ * EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+ */
+
+/*
+FXAA_PRESET - Choose compile-in knob preset 0-5.
+------------------------------------------------------------------------------
+FXAA_EDGE_THRESHOLD - The minimum amount of local contrast required 
+                      to apply algorithm.
+                      1.0/3.0  - too little
+                      1.0/4.0  - good start
+                      1.0/8.0  - applies to more edges
+                      1.0/16.0 - overkill
+------------------------------------------------------------------------------
+FXAA_EDGE_THRESHOLD_MIN - Trims the algorithm from processing darks.
+                          Perf optimization.
+                          1.0/32.0 - visible limit (smaller isn't visible)
+                          1.0/16.0 - good compromise
+                          1.0/12.0 - upper limit (seeing artifacts)
+------------------------------------------------------------------------------
+FXAA_SEARCH_STEPS - Maximum number of search steps for end of span.
+------------------------------------------------------------------------------
+FXAA_SEARCH_THRESHOLD - Controls when to stop searching.
+                        1.0/4.0 - seems to be the best quality wise
+------------------------------------------------------------------------------
+FXAA_SUBPIX_TRIM - Controls sub-pixel aliasing removal.
+                   1.0/2.0 - low removal
+                   1.0/3.0 - medium removal
+                   1.0/4.0 - default removal
+                   1.0/8.0 - high removal
+                   0.0 - complete removal
+------------------------------------------------------------------------------
+FXAA_SUBPIX_CAP - Insures fine detail is not completely removed.
+                  This is important for the transition of sub-pixel detail,
+                  like fences and wires.
+                  3.0/4.0 - default (medium amount of filtering)
+                  7.0/8.0 - high amount of filtering
+                  1.0 - no capping of sub-pixel aliasing removal
+*/
+
+
+uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
+uniform float2 ViewportSize < source = "viewportsize"; >;
+
+sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=LINEAR;MinFilter=LINEAR;};
+
+
+#ifndef FXAA_PRESET
+    #define FXAA_PRESET 6
+#endif
+#if (FXAA_PRESET == 3)
+    #define FXAA_EDGE_THRESHOLD      (1.0/8.0)
+    #define FXAA_EDGE_THRESHOLD_MIN  (1.0/16.0)
+    #define FXAA_SEARCH_STEPS        16
+    #define FXAA_SEARCH_THRESHOLD    (1.0/4.0)
+    #define FXAA_SUBPIX_CAP          (3.0/4.0)
+    #define FXAA_SUBPIX_TRIM         (1.0/4.0)
+#endif
+#if (FXAA_PRESET == 4)
+    #define FXAA_EDGE_THRESHOLD      (1.0/8.0)
+    #define FXAA_EDGE_THRESHOLD_MIN  (1.0/24.0)
+    #define FXAA_SEARCH_STEPS        24
+    #define FXAA_SEARCH_THRESHOLD    (1.0/4.0)
+    #define FXAA_SUBPIX_CAP          (3.0/4.0)
+    #define FXAA_SUBPIX_TRIM         (1.0/4.0)
+#endif
+#if (FXAA_PRESET == 5)
+    #define FXAA_EDGE_THRESHOLD      (1.0/8.0)
+    #define FXAA_EDGE_THRESHOLD_MIN  (1.0/24.0)
+    #define FXAA_SEARCH_STEPS        32
+    #define FXAA_SEARCH_THRESHOLD    (1.0/4.0)
+    #define FXAA_SUBPIX_CAP          (3.0/4.0)
+    #define FXAA_SUBPIX_TRIM         (1.0/4.0)
+#endif
+#if (FXAA_PRESET == 6)
+    #define FXAA_EDGE_THRESHOLD      (1.0/8.0)
+    #define FXAA_EDGE_THRESHOLD_MIN  (1.0/24.0)
+    #define FXAA_SEARCH_STEPS        32
+    #define FXAA_SEARCH_THRESHOLD    (1.0/4.0)
+    #define FXAA_SUBPIX_CAP          (1.0)
+    #define FXAA_SUBPIX_TRIM         (0.0)
+#endif
+
+#define FXAA_SUBPIX_TRIM_SCALE (1.0/(1.0 - FXAA_SUBPIX_TRIM))
+
+// Return the luma, the estimation of luminance from rgb inputs.
+// This approximates luma using one FMA instruction,
+// skipping normalization and tossing out blue.
+// FxaaLuma() will range 0.0 to 2.963210702.
+float FxaaLuma(float3 rgb) {
+    return rgb.y * (0.587/0.299) + rgb.x;
+}
+
+float3 FxaaLerp3(float3 a, float3 b, float amountOfA) {
+    return (-float3(amountOfA, amountOfA, amountOfA) * b) + ((a * float3(amountOfA, amountOfA, amountOfA)) + b);
+}
+
+float4 FxaaTexOff(sampler2D tex, float2 pos, int2 off, float2 rcpFrame) {
+    float x = pos.x + float(off.x) * rcpFrame.x;
+    float y = pos.y + float(off.y) * rcpFrame.y;
+    return tex2D(tex, float2(x, y));
+}
+
+// pos is the output of FxaaVertexShader interpolated across screen.
+// xy -> actual texture position {0.0 to 1.0}
+// rcpFrame should be a uniform equal to  {1.0/frameWidth, 1.0/frameHeight}
+float3 FxaaPixelShader(float2 pos, sampler2D tex, float2 rcpFrame)
+{
+    float3 rgbN = FxaaTexOff(tex, pos.xy, int2( 0,-1), rcpFrame).xyz;
+    float3 rgbW = FxaaTexOff(tex, pos.xy, int2(-1, 0), rcpFrame).xyz;
+    float3 rgbM = FxaaTexOff(tex, pos.xy, int2( 0, 0), rcpFrame).xyz;
+    float3 rgbE = FxaaTexOff(tex, pos.xy, int2( 1, 0), rcpFrame).xyz;
+    float3 rgbS = FxaaTexOff(tex, pos.xy, int2( 0, 1), rcpFrame).xyz;
+
+    float lumaN = FxaaLuma(rgbN);
+    float lumaW = FxaaLuma(rgbW);
+    float lumaM = FxaaLuma(rgbM);
+    float lumaE = FxaaLuma(rgbE);
+    float lumaS = FxaaLuma(rgbS);
+    float rangeMin = min(lumaM, min(min(lumaN, lumaW), min(lumaS, lumaE)));
+    float rangeMax = max(lumaM, max(max(lumaN, lumaW), max(lumaS, lumaE)));
+
+    float range = rangeMax - rangeMin;
+    if(range < max(FXAA_EDGE_THRESHOLD_MIN, rangeMax * FXAA_EDGE_THRESHOLD))
+    {
+        return rgbM;
+    }
+
+    float3 rgbL = rgbN + rgbW + rgbM + rgbE + rgbS;
+
+    float lumaL = (lumaN + lumaW + lumaE + lumaS) * 0.25;
+    float rangeL = abs(lumaL - lumaM);
+    float blendL = max(0.0, (rangeL / range) - FXAA_SUBPIX_TRIM) * FXAA_SUBPIX_TRIM_SCALE; 
+    blendL = min(FXAA_SUBPIX_CAP, blendL);
+
+    float3 rgbNW = FxaaTexOff(tex, pos.xy, int2(-1,-1), rcpFrame).xyz;
+    float3 rgbNE = FxaaTexOff(tex, pos.xy, int2( 1,-1), rcpFrame).xyz;
+    float3 rgbSW = FxaaTexOff(tex, pos.xy, int2(-1, 1), rcpFrame).xyz;
+    float3 rgbSE = FxaaTexOff(tex, pos.xy, int2( 1, 1), rcpFrame).xyz;
+    rgbL += (rgbNW + rgbNE + rgbSW + rgbSE);
+    rgbL *= (1.0/float3(9.0, 9.0, 9.0));
+
+    float lumaNW = FxaaLuma(rgbNW);
+    float lumaNE = FxaaLuma(rgbNE);
+    float lumaSW = FxaaLuma(rgbSW);
+    float lumaSE = FxaaLuma(rgbSE);
+
+    float edgeVert = 
+        abs((0.25 * lumaNW) + (-0.5 * lumaN) + (0.25 * lumaNE)) +
+        abs((0.50 * lumaW ) + (-1.0 * lumaM) + (0.50 * lumaE )) +
+        abs((0.25 * lumaSW) + (-0.5 * lumaS) + (0.25 * lumaSE));
+    float edgeHorz = 
+        abs((0.25 * lumaNW) + (-0.5 * lumaW) + (0.25 * lumaSW)) +
+        abs((0.50 * lumaN ) + (-1.0 * lumaM) + (0.50 * lumaS )) +
+        abs((0.25 * lumaNE) + (-0.5 * lumaE) + (0.25 * lumaSE));
+
+    bool horzSpan = edgeHorz >= edgeVert;
+    float lengthSign = horzSpan ? -rcpFrame.y : -rcpFrame.x;
+
+    if(!horzSpan)
+    {
+        lumaN = lumaW;
+        lumaS = lumaE;
+    }
+
+    float gradientN = abs(lumaN - lumaM);
+    float gradientS = abs(lumaS - lumaM);
+    lumaN = (lumaN + lumaM) * 0.5;
+    lumaS = (lumaS + lumaM) * 0.5;
+
+    if (gradientN < gradientS)
+    {
+        lumaN = lumaS;
+        lumaN = lumaS;
+        gradientN = gradientS;
+        lengthSign *= -1.0;
+    }
+
+    float2 posN;
+    posN.x = pos.x + (horzSpan ? 0.0 : lengthSign * 0.5);
+    posN.y = pos.y + (horzSpan ? lengthSign * 0.5 : 0.0);
+
+    gradientN *= FXAA_SEARCH_THRESHOLD;
+
+    float2 posP = posN;
+    float2 offNP = horzSpan ? float2(rcpFrame.x, 0.0) : float2(0.0, rcpFrame.y); 
+    float lumaEndN = lumaN;
+    float lumaEndP = lumaN;
+    bool doneN = false;
+    bool doneP = false;
+    posN += offNP * float2(-1.0, -1.0);
+    posP += offNP * float2( 1.0,  1.0);
+
+    for(int i = 0; i < FXAA_SEARCH_STEPS; i++) {
+        if(!doneN)
+        {
+            lumaEndN = FxaaLuma(tex2D(tex, posN.xy).xyz);
+        }
+        if(!doneP)
+        {
+            lumaEndP = FxaaLuma(tex2D(tex, posP.xy).xyz);
+        }
+
+        doneN = doneN || (abs(lumaEndN - lumaN) >= gradientN);
+        doneP = doneP || (abs(lumaEndP - lumaN) >= gradientN);
+
+        if(doneN && doneP)
+        {
+            break;
+        }
+        if(!doneN)
+        {
+            posN -= offNP;
+        }
+        if(!doneP)
+        {
+            posP += offNP;
+        }
+    }
+
+    float dstN = horzSpan ? pos.x - posN.x : pos.y - posN.y;
+    float dstP = horzSpan ? posP.x - pos.x : posP.y - pos.y;
+    bool directionN = dstN < dstP;
+    lumaEndN = directionN ? lumaEndN : lumaEndP;
+
+    if(((lumaM - lumaN) < 0.0) == ((lumaEndN - lumaN) < 0.0))
+    {
+        lengthSign = 0.0;
+    }
+
+
+    float spanLength = (dstP + dstN);
+    dstN = directionN ? dstN : dstP;
+    float subPixelOffset = (0.5 + (dstN * (-1.0/spanLength))) * lengthSign;
+    float3 rgbF = tex2D(tex, float2(
+        pos.x + (horzSpan ? 0.0 : subPixelOffset),
+        pos.y + (horzSpan ? subPixelOffset : 0.0))).xyz;
+    return FxaaLerp3(rgbL, rgbF, blendL); 
+}
+
+float4 PS_FXAA(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target
+{
+    float3 color = FxaaPixelShader(vTexCoord, sBackBuffer, 1.0 / (ViewportSize*BufferToViewportRatio));
+
+    return float4(color, 1.0);
+}
+
+
+
+technique FXAA
+{
+   pass
+   {
+   	VertexShader = PostProcessVS;
+   	PixelShader  = PS_FXAA;
+   }
+}
diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-geom.fx b/data/resources/shaders/reshade/Shaders/crt/crt-geom.fx
@@ -183,6 +183,7 @@ uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_siz
 uniform float  UpscaleMultiplier < source = "upscale_multiplier"; >;
 uniform float2 ViewportSize < source = "viewportsize"; >;
 
+sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=POINT;MinFilter=POINT;};
 
 // Comment the next line to disable interpolation in linear gamma (and
 // gain speed).
@@ -199,9 +200,9 @@ uniform float2 ViewportSize < source = "viewportsize"; >;
 #define PI 3.141592653589
 
 #ifdef LINEAR_PROCESSING
-#       define TEX2D(c) pow(tex2D(ReShade::BackBuffer, (c)), float4(CRTgamma,CRTgamma,CRTgamma,CRTgamma))
+#       define TEX2D(c) pow(tex2D(sBackBuffer, (c)), float4(CRTgamma,CRTgamma,CRTgamma,CRTgamma))
 #else
-#       define TEX2D(c) tex2D(ReShade::BackBuffer, (c))
+#       define TEX2D(c) tex2D(sBackBuffer, (c))
 #endif
 
 // aspect ratio