From 0b2511ea90cc24361ac74546c9ac1334ddadebd9 Mon Sep 17 00:00:00 2001 From: aliaspider Date: Thu, 29 Mar 2018 23:46:02 +0100 Subject: [PATCH 1/3] D3D: use premultiplied alpha in the GPU texture scale filters. --- GPU/Directx9/PixelShaderGeneratorDX9.cpp | 117 +++++++++++++---------- 1 file changed, 67 insertions(+), 50 deletions(-) diff --git a/GPU/Directx9/PixelShaderGeneratorDX9.cpp b/GPU/Directx9/PixelShaderGeneratorDX9.cpp index 3813de00072f..026b889753b8 100644 --- a/GPU/Directx9/PixelShaderGeneratorDX9.cpp +++ b/GPU/Directx9/PixelShaderGeneratorDX9.cpp @@ -64,11 +64,11 @@ static const char* sampler_gaussian = " pos.x = offset.x - i;\n" " for (j = -2.0; j< 2.0 ;j++){\n" " pos.y = offset.y - j;\n" - " c=tex_sample_direct(coord - pos * u_texSize.zw).rgba;\n" + " c=premultiply_alpha(tex_sample_direct(coord - pos * u_texSize.zw));\n" " tempColor+=c*KERNEL(pos.x,sharpness)*KERNEL(pos.y,sharpness);\n" " }\n" " }\n" - " return tempColor;\n" + " return postdivide_alpha(tempColor);\n" "};\n"; static const char* sampler_cosine = "#define sharpness 1.0\n" @@ -88,21 +88,21 @@ static const char* sampler_cosine = " pos.x = offset.x - i;\n" " for (j = -2.0; j< 2.0 ;j++){\n" " pos.y = offset.y - j;\n" - " c=tex_sample_direct(coord - pos * u_texSize.zw).rgba;\n" + " c=premultiply_alpha(tex_sample_direct(coord - pos * u_texSize.zw));\n" " tempColor+=c*KERNEL(pos.x,sharpness)*KERNEL(pos.y,sharpness);\n" " }\n" " }\n" - " return tempColor.rgba;\n" + " return postdivide_alpha(tempColor);\n" "};\n"; static const char* sampler_xbrz = - "float c_df(float4 c1, float4 c2) {\n" - " float4 df = abs(c1 - c2);\n" - " return df.r + df.g + df.b + df.a;\n" + "float c_df(float3 c1, float3 c2) {\n" + " float3 df = abs(c1 - c2);\n" + " return df.r + df.g + df.b;\n" "}\n" "\n" "static const float coef = 2.0;\n" "\n" - "static const float4 rgbw = float4(14.352, 28.176, 5.472, 15.0);\n" + "static const float3 rgbw = float3(14.352, 28.176, 5.472);\n" "static const float4 eq_threshold = float4(15.0, 15.0, 15.0, 15.0);\n" "\n" "static const float4 delta = float4(1.0/4., 1.0/4., 1.0/4., 1.0/4.);\n" @@ -154,6 +154,7 @@ static const char* sampler_xbrz = "{\n" " float2 tc = coord * u_texSize.xy;\n" " float2 fp = frac(tc);\n" + " tc = floor(tc) + 0.5;\n" "\n" " float4 xyp_01_02_03 = u_texSize.zzzw * (tc.xxxy + float4(-1., 0., 1., -2.));\n" " float4 xyp_06_07_08 = u_texSize.zzzw * (tc.xxxy + float4(-1., 0., 1., -1.));\n" @@ -178,39 +179,39 @@ static const char* sampler_xbrz = " float4 fx_u; // inequations of straight lines.\n" "\n" "\n" - " float4 A1 = tex_sample_direct(xyp_01_02_03.xw);\n" - " float4 B1 = tex_sample_direct(xyp_01_02_03.yw);\n" - " float4 C1 = tex_sample_direct(xyp_01_02_03.zw);\n" - " float4 A = tex_sample_direct(xyp_06_07_08.xw);\n" - " float4 B = tex_sample_direct(xyp_06_07_08.yw);\n" - " float4 C = tex_sample_direct(xyp_06_07_08.zw);\n" - " float4 D = tex_sample_direct(xyp_11_12_13.xw);\n" - " float4 E = tex_sample_direct(xyp_11_12_13.yw);\n" - " float4 F = tex_sample_direct(xyp_11_12_13.zw);\n" - " float4 G = tex_sample_direct(xyp_16_17_18.xw);\n" - " float4 H = tex_sample_direct(xyp_16_17_18.yw);\n" - " float4 I = tex_sample_direct(xyp_16_17_18.zw);\n" - " float4 G5 = tex_sample_direct(xyp_21_22_23.xw);\n" - " float4 H5 = tex_sample_direct(xyp_21_22_23.yw);\n" - " float4 I5 = tex_sample_direct(xyp_21_22_23.zw);\n" - " float4 A0 = tex_sample_direct(xyp_05_10_15.xy);\n" - " float4 D0 = tex_sample_direct(xyp_05_10_15.xz);\n" - " float4 G0 = tex_sample_direct(xyp_05_10_15.xw);\n" - " float4 C4 = tex_sample_direct(xyp_09_14_09.xy);\n" - " float4 F4 = tex_sample_direct(xyp_09_14_09.xz);\n" - " float4 I4 = tex_sample_direct(xyp_09_14_09.xw);\n" - "\n" - " float4 b = float4(dot(B ,rgbw), dot(D ,rgbw), dot(H ,rgbw), dot(F ,rgbw));\n" - " float4 c = float4(dot(C ,rgbw), dot(A ,rgbw), dot(G ,rgbw), dot(I ,rgbw));\n" + " float4 A1 = premultiply_alpha(tex_sample_direct(xyp_01_02_03.xw));\n" + " float4 B1 = premultiply_alpha(tex_sample_direct(xyp_01_02_03.yw));\n" + " float4 C1 = premultiply_alpha(tex_sample_direct(xyp_01_02_03.zw));\n" + " float4 A = premultiply_alpha(tex_sample_direct(xyp_06_07_08.xw));\n" + " float4 B = premultiply_alpha(tex_sample_direct(xyp_06_07_08.yw));\n" + " float4 C = premultiply_alpha(tex_sample_direct(xyp_06_07_08.zw));\n" + " float4 D = premultiply_alpha(tex_sample_direct(xyp_11_12_13.xw));\n" + " float4 E = premultiply_alpha(tex_sample_direct(xyp_11_12_13.yw));\n" + " float4 F = premultiply_alpha(tex_sample_direct(xyp_11_12_13.zw));\n" + " float4 G = premultiply_alpha(tex_sample_direct(xyp_16_17_18.xw));\n" + " float4 H = premultiply_alpha(tex_sample_direct(xyp_16_17_18.yw));\n" + " float4 I = premultiply_alpha(tex_sample_direct(xyp_16_17_18.zw));\n" + " float4 G5 = premultiply_alpha(tex_sample_direct(xyp_21_22_23.xw));\n" + " float4 H5 = premultiply_alpha(tex_sample_direct(xyp_21_22_23.yw));\n" + " float4 I5 = premultiply_alpha(tex_sample_direct(xyp_21_22_23.zw));\n" + " float4 A0 = premultiply_alpha(tex_sample_direct(xyp_05_10_15.xy));\n" + " float4 D0 = premultiply_alpha(tex_sample_direct(xyp_05_10_15.xz));\n" + " float4 G0 = premultiply_alpha(tex_sample_direct(xyp_05_10_15.xw));\n" + " float4 C4 = premultiply_alpha(tex_sample_direct(xyp_09_14_09.xy));\n" + " float4 F4 = premultiply_alpha(tex_sample_direct(xyp_09_14_09.xz));\n" + " float4 I4 = premultiply_alpha(tex_sample_direct(xyp_09_14_09.xw));\n" + "\n" + " float4 b = float4(dot(B.rgb ,rgbw), dot(D.rgb ,rgbw), dot(H.rgb ,rgbw), dot(F.rgb ,rgbw));\n" + " float4 c = float4(dot(C.rgb ,rgbw), dot(A.rgb ,rgbw), dot(G.rgb ,rgbw), dot(I.rgb ,rgbw));\n" " float4 d = b.yzwx;\n" - " float4 e = dot(E,rgbw).xxxx;\n" + " float4 e = dot(E.rgb,rgbw).xxxx;\n" " float4 f = b.wxyz;\n" " float4 g = c.zwxy;\n" " float4 h = b.zwxy;\n" " float4 i = c.wxyz;\n" - " float4 i4 = float4(dot(I4,rgbw), dot(C1,rgbw), dot(A0,rgbw), dot(G5,rgbw));\n" - " float4 i5 = float4(dot(I5,rgbw), dot(C4,rgbw), dot(A1,rgbw), dot(G0,rgbw));\n" - " float4 h5 = float4(dot(H5,rgbw), dot(F4,rgbw), dot(B1,rgbw), dot(D0,rgbw));\n" + " float4 i4 = float4(dot(I4.rgb,rgbw), dot(C1.rgb,rgbw), dot(A0.rgb,rgbw), dot(G5.rgb,rgbw));\n" + " float4 i5 = float4(dot(I5.rgb,rgbw), dot(C4.rgb,rgbw), dot(A1.rgb,rgbw), dot(G0.rgb,rgbw));\n" + " float4 h5 = float4(dot(H5.rgb,rgbw), dot(F4.rgb,rgbw), dot(B1.rgb,rgbw), dot(D0.rgb,rgbw));\n" " float4 f4 = h5.yzwx;\n" "\n" " // These inequations define the line below which interpolation occurs.\n" @@ -218,9 +219,14 @@ static const char* sampler_xbrz = " fx_l = (Ax*fp.y+Bx*fp.x);\n" " fx_u = (Ay*fp.y+By*fp.x);\n" "\n" - " irlv1 = irlv0 = diff(e,f) * diff(e,h);\n" + " irlv0 = diff(e,f) * diff(e,h);\n" "\n" - " irlv1 = (irlv0 * ( neq(f,b) * neq(f,c) + neq(h,d) * neq(h,g) + eq(e,i) * (neq(f,f4) * neq(f,i4) + neq(h,h5) * neq(h,i5)) + eq(e,g) + eq(e,c)) );\n" +// " irlv1 = irlv0;\n" +// " irlv1 = (irlv0 * ( neq(f,b) * neq(h,d) + eq(e,i) * neq(f,i4) * neq(h,i5) + eq(e,g) + eq(e,c) ) );\n" +// " float4 c1 = i4.yzwx;\n" +// " float4 g0 = i5.wxyz;\n" +// " irlv1 = (irlv0 * ( neq(f,b) * neq(h,d) + eq(e,i) * neq(f,i4) * neq(h,i5) + eq(e,g) + eq(e,c) ) * (diff(f,f4) * diff(f,i) + diff(h,h5) * diff(h,i) + diff(h,g) + diff(f,c) + eq(b,c1) * eq(d,g0)));\n" + " irlv1 = (irlv0 * ( neq(f,b) * neq(f,c) + neq(h,d) * neq(h,g) + eq(e,i) * (neq(f,f4) * neq(f,i4) + neq(h,h5) * neq(h,i5)) + eq(e,g) + eq(e,c)) );\n" "\n" " irlv2l = diff(e,g) * diff(d,g);\n" " irlv2u = diff(e,c) * diff(b,c);\n" @@ -246,6 +252,7 @@ static const char* sampler_xbrz = " px = step(df(e,f), df(e,h));\n" "\n" " float4 maximos = max(max(fx30, fx60), max(fx45, fx45i));\n" +// " float4 maximos = max(max(fx30, fx60), fx45);" "\n" " float4 res1 = E;\n" " res1 = lerp(res1, lerp(H, F, px.x), maximos.x);\n" @@ -255,12 +262,13 @@ static const char* sampler_xbrz = " res2 = lerp(res2, lerp(F, B, px.y), maximos.y);\n" " res2 = lerp(res2, lerp(D, H, px.w), maximos.w);\n" "\n" - " return lerp(res1, res2, step(c_df(E, res1), c_df(E, res2)));\n" + " float4 res = lerp(res1, res2, step(c_df(E.rgb, res1.rgb), c_df(E.rgb, res2.rgb)));\n" + " return postdivide_alpha(res);\n" "}\n"; static const char* sampler_sabr = "float c_df(float4 c1, float4 c2) {\n" - " float4 df = abs(c1 - c2);\n" - " return df.r + df.g + df.b + df.a;\n" + " float3 df = abs(c1.rgb - c2.rgb);\n" + " return df.r + df.g + df.b;\n" "}\n" "static const float4 Ai = float4( 1.0, -1.0, -1.0, 1.0);\n" "static const float4 B45 = float4( 1.0, 1.0, -1.0, -1.0);\n" @@ -270,7 +278,7 @@ static const char* sampler_sabr = "static const float3 lum = float3(0.21, 0.72, 0.07);\n" "\n" "float lum_to(float4 v) {\n" - " return dot(lum, v.rgb) * v.a;\n" + " return dot(lum, v.rgb);\n" "}\n" "float4 lum_to(float4 v0, float4 v1, float4 v2, float4 v3) {\n" " return float4(lum_to(v0), lum_to(v1), lum_to(v2), lum_to(v3));\n" @@ -290,11 +298,11 @@ static const char* sampler_sabr = " +-----+-----+-----+\n" "*/\n" "// Store mask values\n" - " float4 P07 = tex_sample_direct(coord + u_texSize.zw * float2( 0.0, -1.0));\n" - " float4 P11 = tex_sample_direct(coord + u_texSize.zw * float2(-1.0, 0.0));\n" - " float4 P12 = tex_sample_direct(coord + u_texSize.zw * float2( 0.0, 0.0));\n" - " float4 P13 = tex_sample_direct(coord + u_texSize.zw * float2( 1.0, 0.0));\n" - " float4 P17 = tex_sample_direct(coord + u_texSize.zw * float2( 0.0, 1.0));\n" + " float4 P07 = premultiply_alpha(tex_sample_direct(coord + u_texSize.zw * float2( 0.0, -1.0)));\n" + " float4 P11 = premultiply_alpha(tex_sample_direct(coord + u_texSize.zw * float2(-1.0, 0.0)));\n" + " float4 P12 = premultiply_alpha(tex_sample_direct(coord + u_texSize.zw * float2( 0.0, 0.0)));\n" + " float4 P13 = premultiply_alpha(tex_sample_direct(coord + u_texSize.zw * float2( 1.0, 0.0)));\n" + " float4 P17 = premultiply_alpha(tex_sample_direct(coord + u_texSize.zw * float2( 0.0, 1.0)));\n" "\n" " // Store luminance values of each point\n" " float4 p7 = lum_to(P07, P11, P17, P13);\n" @@ -319,7 +327,7 @@ static const char* sampler_sabr = " res2 = lerp(res2, lerp(P13, P17, px.x), ma45.x);\n" "\n" " float4 res = lerp(res1, res2, step(c_df(P12, res1), c_df(P12, res2)));\n" - " return res;\n" + " return postdivide_alpha(res);\n" "}\n"; static const char* sampler_hybrid = sampler_xbrz; @@ -357,11 +365,11 @@ static const char* sampler_bicubic = " pos.x = offset.x - i;\n" " for (j = -2.0; j < 3.0 ;j++){\n" " pos.y = offset.y - j;\n" - " c=tex_sample_direct(coord - pos * u_texSize.zw).rgba;\n" + " c=premultiply_alpha(tex_sample_direct(coord - pos * u_texSize.zw));\n" " tempColor+=c*mitchell(pos);\n" " }\n" " }\n" - " return tempColor;\n" + " return postdivide_alpha(tempColor);\n" "};\n"; // Missing: Z depth range @@ -484,6 +492,15 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag WRITE(p, "};\n"); if (!isModeClear && doTexture) { + if (doTextureAlpha) { + // TODO: check why the [0.0,1.0] clamp is necessary here + WRITE(p, "float4 premultiply_alpha(float4 c) { float a = clamp(c.a, 0.0, 1.0); return float4(c.rgb * a, a); }"); + WRITE(p, "float4 postdivide_alpha(float4 c) { return c.a < 0.001f? 0.0f : float4(c.rgb / c.a, c.a); }\n"); + } else { + WRITE(p, "#define premultiply_alpha(c) (c)\n"); + WRITE(p, "#define postdivide_alpha(c) (c)\n"); + } + WRITE(p, "float4 tex_sample_direct(float2 coord) {\n"); if (lang == HLSL_D3D11 || lang == HLSL_D3D11_LEVEL9) { WRITE(p, " return tex.Sample(samp, coord)%s;\n", bgraTexture ? ".bgra" : ""); From 45a08097688ffd94eba2f0aff8922c2347b14ef4 Mon Sep 17 00:00:00 2001 From: aliaspider Date: Fri, 30 Mar 2018 19:21:57 +0100 Subject: [PATCH 2/3] D3D: load gpu scaler code from the assets folder at runtime. also add xbrz code in addition to xbr. --- CMakeLists.txt | 3 +- GPU/Common/TextureScalerCommon.cpp | 5 +- GPU/Common/TextureScalerCommon.h | 2 +- GPU/D3D11/FragmentShaderGeneratorD3D11.cpp | 4 +- GPU/D3D11/FragmentShaderGeneratorD3D11.h | 2 +- GPU/D3D11/ShaderManagerD3D11.cpp | 32 +- GPU/D3D11/ShaderManagerD3D11.h | 2 + GPU/Directx9/PixelShaderGeneratorDX9.cpp | 376 +-------------------- GPU/Directx9/PixelShaderGeneratorDX9.h | 2 +- GPU/Directx9/ShaderManagerDX9.cpp | 2 +- UI/GameSettingsScreen.cpp | 2 +- Windows/MainWindowMenu.cpp | 2 + Windows/ppsspp.rc | 1 + Windows/resource.h | 7 +- assets/scalers/bicubic.hlsl | 49 +++ assets/scalers/cosine.hlsl | 24 ++ assets/scalers/gaussian.hlsl | 33 ++ assets/scalers/hybrid.hlsl | 4 + assets/scalers/hybrid_bicubic.hlsl | 4 + assets/scalers/sabr.hlsl | 64 ++++ assets/scalers/xbr.hlsl | 210 ++++++++++++ assets/scalers/xbrz.hlsl | 335 ++++++++++++++++++ libretro/libretro.cpp | 2 +- 23 files changed, 779 insertions(+), 388 deletions(-) create mode 100644 assets/scalers/bicubic.hlsl create mode 100644 assets/scalers/cosine.hlsl create mode 100644 assets/scalers/gaussian.hlsl create mode 100644 assets/scalers/hybrid.hlsl create mode 100644 assets/scalers/hybrid_bicubic.hlsl create mode 100644 assets/scalers/sabr.hlsl create mode 100644 assets/scalers/xbr.hlsl create mode 100644 assets/scalers/xbrz.hlsl diff --git a/CMakeLists.txt b/CMakeLists.txt index ff1f78a1d287..39b40360eb55 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1851,7 +1851,8 @@ endif() set(NativeAssets android/assets/ui_atlas.zim assets/lang - assets/shaders + assets/shaders + assets/scalers assets/Roboto-Condensed.ttf assets/7z.png assets/compat.ini diff --git a/GPU/Common/TextureScalerCommon.cpp b/GPU/Common/TextureScalerCommon.cpp index bf4d9f14e82f..d01dd3e605a2 100644 --- a/GPU/Common/TextureScalerCommon.cpp +++ b/GPU/Common/TextureScalerCommon.cpp @@ -555,8 +555,9 @@ bool TextureScalerCommon::ScaleInto(u32 *outputBuf, u32 *src, u32 &dstFmt, int & // scale switch (g_Config.iTexScalingType) { + case XBR: case SABR: - // no cpu implementation for sabr, fall back to xbrz + // no cpu implementation, fall back to xbrz case XBRZ: ScaleXBRZ(factor, inputBuf, outputBuf, width, height); break; @@ -565,7 +566,7 @@ bool TextureScalerCommon::ScaleInto(u32 *outputBuf, u32 *src, u32 &dstFmt, int & break; case GAUSSIAN: case COSINE: - // no cpu implementation for those, fall back to bicubic + // no cpu implementation, fall back to bicubic case BICUBIC: ScaleBicubicMitchell(factor, inputBuf, outputBuf, width, height); break; diff --git a/GPU/Common/TextureScalerCommon.h b/GPU/Common/TextureScalerCommon.h index dced8353d03d..8711f77b7cba 100644 --- a/GPU/Common/TextureScalerCommon.h +++ b/GPU/Common/TextureScalerCommon.h @@ -31,7 +31,7 @@ class TextureScalerCommon { bool Scale(u32 *&data, u32 &dstfmt, int &width, int &height, int factor); bool ScaleInto(u32 *out, u32 *src, u32 &dstfmt, int &width, int &height, int factor); - enum { XBRZ = 0, HYBRID = 1, BICUBIC = 2, HYBRID_BICUBIC = 3, SABR = 4, GAUSSIAN = 5, COSINE = 6 }; + enum { XBRZ = 0, HYBRID = 1, BICUBIC = 2, HYBRID_BICUBIC = 3, XBR = 4, SABR = 5, GAUSSIAN = 6, COSINE = 7 }; protected: virtual void ConvertTo8888(u32 format, u32 *source, u32 *&dest, int width, int height) = 0; diff --git a/GPU/D3D11/FragmentShaderGeneratorD3D11.cpp b/GPU/D3D11/FragmentShaderGeneratorD3D11.cpp index 07d37c8d5ce1..61a23e26df8f 100644 --- a/GPU/D3D11/FragmentShaderGeneratorD3D11.cpp +++ b/GPU/D3D11/FragmentShaderGeneratorD3D11.cpp @@ -19,6 +19,6 @@ #include "GPU/D3D11/FragmentShaderGeneratorD3D11.h" #include "GPU/Directx9/PixelShaderGeneratorDX9.h" -void GenerateFragmentShaderD3D11(const FShaderID &id, char *buffer, ShaderLanguage lang) { - DX9::GenerateFragmentShaderHLSL(id, buffer, lang); +void GenerateFragmentShaderD3D11(const FShaderID &id, char *buffer, char* scalerCode, ShaderLanguage lang) { + DX9::GenerateFragmentShaderHLSL(id, buffer, scalerCode, lang); } diff --git a/GPU/D3D11/FragmentShaderGeneratorD3D11.h b/GPU/D3D11/FragmentShaderGeneratorD3D11.h index cf94a63200bc..927682e799ca 100644 --- a/GPU/D3D11/FragmentShaderGeneratorD3D11.h +++ b/GPU/D3D11/FragmentShaderGeneratorD3D11.h @@ -19,4 +19,4 @@ #include "GPU/Common/ShaderId.h" -void GenerateFragmentShaderD3D11(const FShaderID &id, char *buffer, ShaderLanguage lang); +void GenerateFragmentShaderD3D11(const FShaderID &id, char *buffer, char *scalerCode, ShaderLanguage lang); diff --git a/GPU/D3D11/ShaderManagerD3D11.cpp b/GPU/D3D11/ShaderManagerD3D11.cpp index 7e79d5722450..2ce967bf2065 100644 --- a/GPU/D3D11/ShaderManagerD3D11.cpp +++ b/GPU/D3D11/ShaderManagerD3D11.cpp @@ -26,6 +26,7 @@ #include "math/lin/matrix4x4.h" #include "math/math_util.h" #include "math/dataconv.h" +#include "file/vfs.h" #include "util/text/utf8.h" #include "Common/Common.h" #include "Core/Config.h" @@ -89,8 +90,8 @@ std::string D3D11VertexShader::GetShaderString(DebugShaderStringType type) const } ShaderManagerD3D11::ShaderManagerD3D11(ID3D11Device *device, ID3D11DeviceContext *context, D3D_FEATURE_LEVEL featureLevel) - : device_(device), context_(context), featureLevel_(featureLevel), lastVShader_(nullptr), lastFShader_(nullptr) { - codeBuffer_ = new char[16384]; + : device_(device), context_(context), featureLevel_(featureLevel), lastVShader_(nullptr), lastFShader_(nullptr), scalerCode_(nullptr) { + codeBuffer_ = new char[16384 * 2]; memset(&ub_base, 0, sizeof(ub_base)); memset(&ub_lights, 0, sizeof(ub_lights)); @@ -101,6 +102,7 @@ ShaderManagerD3D11::ShaderManagerD3D11(ID3D11Device *device, ID3D11DeviceContext ASSERT_SUCCESS(device_->CreateBuffer(&desc, nullptr, &push_base)); desc.ByteWidth = sizeof(ub_lights); ASSERT_SUCCESS(device_->CreateBuffer(&desc, nullptr, &push_lights)); + needScalerCode = g_Config.bRealtimeTexScaling && g_Config.iTexScalingLevel != 1; } ShaderManagerD3D11::~ShaderManagerD3D11() { @@ -128,6 +130,9 @@ void ShaderManagerD3D11::ClearShaders() { Clear(); DirtyLastShader(); gstate_c.Dirty(DIRTY_ALL_UNIFORMS); + free(scalerCode_); + scalerCode_ = nullptr; + needScalerCode = g_Config.bRealtimeTexScaling && g_Config.iTexScalingLevel != 1; } void ShaderManagerD3D11::DirtyLastShader() { @@ -208,7 +213,28 @@ void ShaderManagerD3D11::GetShaders(int prim, u32 vertType, D3D11VertexShader ** D3D11FragmentShader *fs; if (fsIter == fsCache_.end()) { // Fragment shader not in cache. Let's compile it. - GenerateFragmentShaderD3D11(FSID, codeBuffer_, featureLevel_ <= D3D_FEATURE_LEVEL_9_3 ? HLSL_D3D11_LEVEL9 : HLSL_D3D11); + if(needScalerCode && !scalerCode_) { + static const char* filenames[] = { + "scalers/xbrz.hlsl", + "scalers/hybrid.hlsl", + "scalers/bicubic.hlsl", + "scalers/hybrid_bicubic.hlsl", + "scalers/xbr.hlsl", + "scalers/sabr.hlsl", + "scalers/gaussian.hlsl", + "scalers/cosine.hlsl", + }; + if ((g_Config.iTexScalingType < 0) || (g_Config.iTexScalingType >= sizeof(filenames) / sizeof(*filenames))) { + ERROR_LOG(G3D, "Unknown scaling type: %i", g_Config.iTexScalingType); + } else { + size_t sz; + scalerCode_ = (char *)VFSReadFile(filenames[g_Config.iTexScalingType], &sz); + if(!scalerCode_) + ERROR_LOG(G3D, "Scaler not found: %s", filenames[g_Config.iTexScalingType]); + } + needScalerCode = false; + } + GenerateFragmentShaderD3D11(FSID, codeBuffer_, scalerCode_, featureLevel_ <= D3D_FEATURE_LEVEL_9_3 ? HLSL_D3D11_LEVEL9 : HLSL_D3D11); fs = new D3D11FragmentShader(device_, featureLevel_, FSID, codeBuffer_, useHWTransform); fsCache_[FSID] = fs; } else { diff --git a/GPU/D3D11/ShaderManagerD3D11.h b/GPU/D3D11/ShaderManagerD3D11.h index cc0da972bd39..656b19900e91 100644 --- a/GPU/D3D11/ShaderManagerD3D11.h +++ b/GPU/D3D11/ShaderManagerD3D11.h @@ -116,6 +116,8 @@ class ShaderManagerD3D11 : public ShaderManagerCommon { VSCache vsCache_; char *codeBuffer_; + char *scalerCode_; + bool needScalerCode; // Uniform block scratchpad. These (the relevant ones) are copied to the current pushbuffer at draw time. UB_VS_FS_Base ub_base; diff --git a/GPU/Directx9/PixelShaderGeneratorDX9.cpp b/GPU/Directx9/PixelShaderGeneratorDX9.cpp index 026b889753b8..6207ab22d850 100644 --- a/GPU/Directx9/PixelShaderGeneratorDX9.cpp +++ b/GPU/Directx9/PixelShaderGeneratorDX9.cpp @@ -32,349 +32,9 @@ namespace DX9 { -static const char* sampler_default = - "float4 tex_sample(float2 coord) {\n" - " return tex_sample_direct(coord);\n" - "};\n"; - -static const char* sampler_gaussian = - "#define sharpness 1.0\n" - "#define pi 3.14159265358\n" - "#define normalGauss(x) ((exp(-(x)*(x)*0.5))/sqrt(2.0*pi))\n" - "#define normalGauss2(x) (normalGauss(x - 0.5) - 0.5)\n" - "float normalGaussIntegral(float x)\n" - "{\n" - " float a1 = 0.4361836;\n" - " float a2 = -0.1201676;\n" - " float a3 = 0.9372980;\n" - " float p = 0.3326700;\n" - " float t = 1.0 / (1.0 + p*abs(x));\n" - "\n" - " return (0.5-normalGauss(x) * (t*(a1 + t*(a2 + a3*t))))*sign(x);\n" - "}\n" - "#define KERNEL(x,b) (normalGaussIntegral(sqrt(2*pi)*b*(x - 0.5)) - normalGaussIntegral(sqrt(2*pi)*b*(x + 0.5)))\n" - "\n" - "float4 tex_sample(float2 coord) {\n" - " float2 offset = frac(coord * u_texSize.xy) - 0.5;\n" - " float4 tempColor = 0.0;\n" - " float4 c;\n" - " float i,j;\n" - " float2 pos;\n" - " for (i = -2.0; i < 2.0; i++){\n" - " pos.x = offset.x - i;\n" - " for (j = -2.0; j< 2.0 ;j++){\n" - " pos.y = offset.y - j;\n" - " c=premultiply_alpha(tex_sample_direct(coord - pos * u_texSize.zw));\n" - " tempColor+=c*KERNEL(pos.x,sharpness)*KERNEL(pos.y,sharpness);\n" - " }\n" - " }\n" - " return postdivide_alpha(tempColor);\n" - "};\n"; -static const char* sampler_cosine = - "#define sharpness 1.0\n" - "#define pi 3.14159265358\n" - "#define a(x) abs(x)\n" - "#define d(x,b) (pi*b*min(a(x)+0.5,1.0/b))\n" - "#define e(x,b) (pi*b*min(max(a(x)-0.5,-1.0/b),1.0/b))\n" - "#define KERNEL(x,b) ((d(x,b)+sin(d(x,b))-e(x,b)-sin(e(x,b)))/(2.0*pi))\n" - "\n" - "float4 tex_sample(float2 coord) {\n" - " float2 offset = frac(coord * u_texSize.xy) - 0.5;\n" - " float4 tempColor = 0.0;\n" - " float4 c;\n" - " float i,j;\n" - " float2 pos;\n" - " for (i = -2.0; i < 2.0; i++){\n" - " pos.x = offset.x - i;\n" - " for (j = -2.0; j< 2.0 ;j++){\n" - " pos.y = offset.y - j;\n" - " c=premultiply_alpha(tex_sample_direct(coord - pos * u_texSize.zw));\n" - " tempColor+=c*KERNEL(pos.x,sharpness)*KERNEL(pos.y,sharpness);\n" - " }\n" - " }\n" - " return postdivide_alpha(tempColor);\n" - "};\n"; -static const char* sampler_xbrz = - "float c_df(float3 c1, float3 c2) {\n" - " float3 df = abs(c1 - c2);\n" - " return df.r + df.g + df.b;\n" - "}\n" - "\n" - "static const float coef = 2.0;\n" - "\n" - "static const float3 rgbw = float3(14.352, 28.176, 5.472);\n" - "static const float4 eq_threshold = float4(15.0, 15.0, 15.0, 15.0);\n" - "\n" - "static const float4 delta = float4(1.0/4., 1.0/4., 1.0/4., 1.0/4.);\n" - "static const float4 delta_l = float4(0.5/4., 1.0/4., 0.5/4., 1.0/4.);\n" - "static const float4 delta_u = delta_l.yxwz;\n" - "\n" - "static const float4 Ao = float4( 1.0, -1.0, -1.0, 1.0 );\n" - "static const float4 Bo = float4( 1.0, 1.0, -1.0,-1.0 );\n" - "static const float4 Co = float4( 1.5, 0.5, -0.5, 0.5 );\n" - "static const float4 Ax = float4( 1.0, -1.0, -1.0, 1.0 );\n" - "static const float4 Bx = float4( 0.5, 2.0, -0.5,-2.0 );\n" - "static const float4 Cx = float4( 1.0, 1.0, -0.5, 0.0 );\n" - "static const float4 Ay = float4( 1.0, -1.0, -1.0, 1.0 );\n" - "static const float4 By = float4( 2.0, 0.5, -2.0,-0.5 );\n" - "static const float4 Cy = float4( 2.0, 0.0, -1.0, 0.5 );\n" - "static const float4 Ci = float4(0.25, 0.25, 0.25, 0.25);\n" - "\n" - "// Difference between vector components.\n" - "float4 df(float4 A, float4 B)\n" - "{\n" - " return float4(abs(A-B));\n" - "}\n" - "\n" - "// Compare two vectors and return their components are different.\n" - "float4 diff(float4 A, float4 B)\n" - "{\n" - " return step(0.001, df(A, B));\n" - "}\n" - "\n" - "// Determine if two vector components are equal based on a threshold.\n" - "float4 eq(float4 A, float4 B)\n" - "{\n" - " return step(df(A, B), 15.);\n" - "}\n" - "\n" - "// Determine if two vector components are NOT equal based on a threshold.\n" - "float4 neq(float4 A, float4 B)\n" - "{\n" - " return step(15., df(A, B));\n" - "}\n" - "\n" - "// Weighted distance.\n" - "float4 wd(float4 a, float4 b, float4 c, float4 d, float4 e, float4 f, float4 g, float4 h)\n" - "{\n" - " return (df(a,b) + df(a,c) + df(d,e) + df(d,f) + 4.0*df(g,h));\n" - "}\n" - "\n" - "float4 tex_sample(float2 coord)\n" - "{\n" - " float2 tc = coord * u_texSize.xy;\n" - " float2 fp = frac(tc);\n" - " tc = floor(tc) + 0.5;\n" - "\n" - " float4 xyp_01_02_03 = u_texSize.zzzw * (tc.xxxy + float4(-1., 0., 1., -2.));\n" - " float4 xyp_06_07_08 = u_texSize.zzzw * (tc.xxxy + float4(-1., 0., 1., -1.));\n" - " float4 xyp_11_12_13 = u_texSize.zzzw * (tc.xxxy + float4(-1., 0., 1., 0.));\n" - " float4 xyp_16_17_18 = u_texSize.zzzw * (tc.xxxy + float4(-1., 0., 1., 1.));\n" - " float4 xyp_21_22_23 = u_texSize.zzzw * (tc.xxxy + float4(-1., 0., 1., 2.));\n" - " float4 xyp_05_10_15 = u_texSize.zwww * (tc.xyyy + float4(-2., -1., 0., 1.));\n" - " float4 xyp_09_14_09 = u_texSize.zwww * (tc.xyyy + float4( 2., -1., 0., 1.));\n" - "\n" - " float4 edri;\n" - " float4 edr;\n" - " float4 edr_l;\n" - " float4 edr_u;\n" - " float4 px; // px = pixel, edr = edge detection rule\n" - " float4 irlv0;\n" - " float4 irlv1;\n" - " float4 irlv2l;\n" - " float4 irlv2u;\n" - " float4 block_3d;\n" - " float4 fx;\n" - " float4 fx_l;\n" - " float4 fx_u; // inequations of straight lines.\n" - "\n" - "\n" - " float4 A1 = premultiply_alpha(tex_sample_direct(xyp_01_02_03.xw));\n" - " float4 B1 = premultiply_alpha(tex_sample_direct(xyp_01_02_03.yw));\n" - " float4 C1 = premultiply_alpha(tex_sample_direct(xyp_01_02_03.zw));\n" - " float4 A = premultiply_alpha(tex_sample_direct(xyp_06_07_08.xw));\n" - " float4 B = premultiply_alpha(tex_sample_direct(xyp_06_07_08.yw));\n" - " float4 C = premultiply_alpha(tex_sample_direct(xyp_06_07_08.zw));\n" - " float4 D = premultiply_alpha(tex_sample_direct(xyp_11_12_13.xw));\n" - " float4 E = premultiply_alpha(tex_sample_direct(xyp_11_12_13.yw));\n" - " float4 F = premultiply_alpha(tex_sample_direct(xyp_11_12_13.zw));\n" - " float4 G = premultiply_alpha(tex_sample_direct(xyp_16_17_18.xw));\n" - " float4 H = premultiply_alpha(tex_sample_direct(xyp_16_17_18.yw));\n" - " float4 I = premultiply_alpha(tex_sample_direct(xyp_16_17_18.zw));\n" - " float4 G5 = premultiply_alpha(tex_sample_direct(xyp_21_22_23.xw));\n" - " float4 H5 = premultiply_alpha(tex_sample_direct(xyp_21_22_23.yw));\n" - " float4 I5 = premultiply_alpha(tex_sample_direct(xyp_21_22_23.zw));\n" - " float4 A0 = premultiply_alpha(tex_sample_direct(xyp_05_10_15.xy));\n" - " float4 D0 = premultiply_alpha(tex_sample_direct(xyp_05_10_15.xz));\n" - " float4 G0 = premultiply_alpha(tex_sample_direct(xyp_05_10_15.xw));\n" - " float4 C4 = premultiply_alpha(tex_sample_direct(xyp_09_14_09.xy));\n" - " float4 F4 = premultiply_alpha(tex_sample_direct(xyp_09_14_09.xz));\n" - " float4 I4 = premultiply_alpha(tex_sample_direct(xyp_09_14_09.xw));\n" - "\n" - " float4 b = float4(dot(B.rgb ,rgbw), dot(D.rgb ,rgbw), dot(H.rgb ,rgbw), dot(F.rgb ,rgbw));\n" - " float4 c = float4(dot(C.rgb ,rgbw), dot(A.rgb ,rgbw), dot(G.rgb ,rgbw), dot(I.rgb ,rgbw));\n" - " float4 d = b.yzwx;\n" - " float4 e = dot(E.rgb,rgbw).xxxx;\n" - " float4 f = b.wxyz;\n" - " float4 g = c.zwxy;\n" - " float4 h = b.zwxy;\n" - " float4 i = c.wxyz;\n" - " float4 i4 = float4(dot(I4.rgb,rgbw), dot(C1.rgb,rgbw), dot(A0.rgb,rgbw), dot(G5.rgb,rgbw));\n" - " float4 i5 = float4(dot(I5.rgb,rgbw), dot(C4.rgb,rgbw), dot(A1.rgb,rgbw), dot(G0.rgb,rgbw));\n" - " float4 h5 = float4(dot(H5.rgb,rgbw), dot(F4.rgb,rgbw), dot(B1.rgb,rgbw), dot(D0.rgb,rgbw));\n" - " float4 f4 = h5.yzwx;\n" - "\n" - " // These inequations define the line below which interpolation occurs.\n" - " fx = (Ao*fp.y+Bo*fp.x);\n" - " fx_l = (Ax*fp.y+Bx*fp.x);\n" - " fx_u = (Ay*fp.y+By*fp.x);\n" - "\n" - " irlv0 = diff(e,f) * diff(e,h);\n" - "\n" -// " irlv1 = irlv0;\n" -// " irlv1 = (irlv0 * ( neq(f,b) * neq(h,d) + eq(e,i) * neq(f,i4) * neq(h,i5) + eq(e,g) + eq(e,c) ) );\n" -// " float4 c1 = i4.yzwx;\n" -// " float4 g0 = i5.wxyz;\n" -// " irlv1 = (irlv0 * ( neq(f,b) * neq(h,d) + eq(e,i) * neq(f,i4) * neq(h,i5) + eq(e,g) + eq(e,c) ) * (diff(f,f4) * diff(f,i) + diff(h,h5) * diff(h,i) + diff(h,g) + diff(f,c) + eq(b,c1) * eq(d,g0)));\n" - " irlv1 = (irlv0 * ( neq(f,b) * neq(f,c) + neq(h,d) * neq(h,g) + eq(e,i) * (neq(f,f4) * neq(f,i4) + neq(h,h5) * neq(h,i5)) + eq(e,g) + eq(e,c)) );\n" - "\n" - " irlv2l = diff(e,g) * diff(d,g);\n" - " irlv2u = diff(e,c) * diff(b,c);\n" - "\n" - " float4 fx45i = clamp((fx + delta -Co - Ci)/(2.0*delta ), 0.0, 1.0);\n" - " float4 fx45 = clamp((fx + delta -Co )/(2.0*delta ), 0.0, 1.0);\n" - " float4 fx30 = clamp((fx_l + delta_l -Cx )/(2.0*delta_l), 0.0, 1.0);\n" - " float4 fx60 = clamp((fx_u + delta_u -Cy )/(2.0*delta_u), 0.0, 1.0);\n" - "\n" - " float4 wd1 = wd( e, c, g, i, h5, f4, h, f);\n" - " float4 wd2 = wd( h, d, i5, f, i4, b, e, i);\n" - "\n" - " edri = step(wd1, wd2) * irlv0;\n" - " edr = step(wd1 + float4(0.1, 0.1, 0.1, 0.1), wd2) * step(float4(0.5, 0.5, 0.5, 0.5), irlv1);\n" - " edr_l = step( 2.*df(f,g), df(h,c) ) * irlv2l * edr;\n" - " edr_u = step( 2.*df(h,c), df(f,g) ) * irlv2u * edr;\n" - "\n" - " fx45 = edr * fx45;\n" - " fx30 = edr_l * fx30;\n" - " fx60 = edr_u * fx60;\n" - " fx45i = edri * fx45i;\n" - "\n" - " px = step(df(e,f), df(e,h));\n" - "\n" - " float4 maximos = max(max(fx30, fx60), max(fx45, fx45i));\n" -// " float4 maximos = max(max(fx30, fx60), fx45);" - "\n" - " float4 res1 = E;\n" - " res1 = lerp(res1, lerp(H, F, px.x), maximos.x);\n" - " res1 = lerp(res1, lerp(B, D, px.z), maximos.z);\n" - "\n" - " float4 res2 = E;\n" - " res2 = lerp(res2, lerp(F, B, px.y), maximos.y);\n" - " res2 = lerp(res2, lerp(D, H, px.w), maximos.w);\n" - "\n" - " float4 res = lerp(res1, res2, step(c_df(E.rgb, res1.rgb), c_df(E.rgb, res2.rgb)));\n" - " return postdivide_alpha(res);\n" - "}\n"; -static const char* sampler_sabr = - "float c_df(float4 c1, float4 c2) {\n" - " float3 df = abs(c1.rgb - c2.rgb);\n" - " return df.r + df.g + df.b;\n" - "}\n" - "static const float4 Ai = float4( 1.0, -1.0, -1.0, 1.0);\n" - "static const float4 B45 = float4( 1.0, 1.0, -1.0, -1.0);\n" - "static const float4 C45 = float4( 1.5, 0.5, -0.5, 0.5);\n" - "static const float4 M45 = float4(0.4, 0.4, 0.4, 0.4);\n" - "static const float4 M30 = float4(0.2, 0.4, 0.2, 0.4);\n" - "static const float3 lum = float3(0.21, 0.72, 0.07);\n" - "\n" - "float lum_to(float4 v) {\n" - " return dot(lum, v.rgb);\n" - "}\n" - "float4 lum_to(float4 v0, float4 v1, float4 v2, float4 v3) {\n" - " return float4(lum_to(v0), lum_to(v1), lum_to(v2), lum_to(v3));\n" - "}\n" - "\n" - "\n" - "float4 tex_sample(float2 coord)\n" - "{\n" - "/*\n" - " Mask for algorithm\n" - " +-----+-----+-----+\n" - " | | 7 | |\n" - " +-----+-----+-----+\n" - " | 11 | 12 | 13 |\n" - " +-----+-----+-----+\n" - " | | 17 | |\n" - " +-----+-----+-----+\n" - "*/\n" - "// Store mask values\n" - " float4 P07 = premultiply_alpha(tex_sample_direct(coord + u_texSize.zw * float2( 0.0, -1.0)));\n" - " float4 P11 = premultiply_alpha(tex_sample_direct(coord + u_texSize.zw * float2(-1.0, 0.0)));\n" - " float4 P12 = premultiply_alpha(tex_sample_direct(coord + u_texSize.zw * float2( 0.0, 0.0)));\n" - " float4 P13 = premultiply_alpha(tex_sample_direct(coord + u_texSize.zw * float2( 1.0, 0.0)));\n" - " float4 P17 = premultiply_alpha(tex_sample_direct(coord + u_texSize.zw * float2( 0.0, 1.0)));\n" - "\n" - " // Store luminance values of each point\n" - " float4 p7 = lum_to(P07, P11, P17, P13);\n" - " float4 p12 = lum_to(P12);\n" - " float4 p13 = p7.wxyz; // P13, P7, P11, P17\n" - " float4 p17 = p7.zwxy; // P11, P17, P13, P7\n" - "\n" - " float2 fp = frac(coord * u_texSize.xy);\n" - " float4 ma45 = smoothstep(C45 - M45, C45 + M45, Ai * fp.y + B45 * fp.x);\n" - " float4 px = step(abs(p12 - p17), abs(p12 - p13));\n" - "\n" - " float4 res1 = P12;\n" - " res1 = lerp(res1, lerp(P13, P17, px.x), ma45.x);\n" - " res1 = lerp(res1, lerp(P07, P13, px.y), ma45.y);\n" - " res1 = lerp(res1, lerp(P11, P07, px.z), ma45.z);\n" - " res1 = lerp(res1, lerp(P17, P11, px.w), ma45.w);\n" - "\n" - " float4 res2 = P12;\n" - " res2 = lerp(res2, lerp(P17, P11, px.w), ma45.w);\n" - " res2 = lerp(res2, lerp(P11, P07, px.z), ma45.z);\n" - " res2 = lerp(res2, lerp(P07, P13, px.y), ma45.y);\n" - " res2 = lerp(res2, lerp(P13, P17, px.x), ma45.x);\n" - "\n" - " float4 res = lerp(res1, res2, step(c_df(P12, res1), c_df(P12, res2)));\n" - " return postdivide_alpha(res);\n" - "}\n"; - -static const char* sampler_hybrid = sampler_xbrz; -static const char* sampler_hybrid_bicubic = sampler_xbrz; -static const char* sampler_bicubic = - "// generate the value of a Mitchell-Netravali scaling spline at distance d, with parameters A and B\n" - "// B=1 C=0 : cubic B spline (very smooth)\n" - "// B=C=1/3 : recommended for general upscaling\n" - "// B=0 C=1/2 : Catmull-Rom spline (sharp, ringing)\n" - "// see Mitchell & Netravali, \"Reconstruction Filters in Computer Graphics\"\n" - "\n" - "//#define BSPLINE\n" - "#ifdef BSPLINE\n" - " static const float B = 1.0f;\n" - " static const float C = 0.0f;\n" - "#else\n" - " static const float B = 1.0f / 3.0f;\n" - " static const float C = 1.0f / 3.0f;\n" - "#endif\n" - "float mitchell(float2 pos) {\n" - " float x = sqrt(dot(pos, pos));\n" - " return\n" - " step(x, 2.0) * \n" - " (step(1.0, x) * ((-B - 6 * C)*(x*x*x) + (6 * B + 30 * C)*(x*x) + (-12 * B - 48 * C)*x + (8 * B + 24 * C)) +\n" - " step(x, 1.0) * ((12 - 9 * B - 6 * C)*(x*x*x) + (-18 + 12 * B + 6 * C)*(x*x) + (6 - 2 * B)))\n" - " / 6.0f;\n" - "}\n" - "float4 tex_sample(float2 coord) {\n" - " float2 offset = frac(coord * u_texSize.xy) - 0.5;\n" - " float4 tempColor = 0.0;\n" - " float4 c;\n" - " float i,j;\n" - " float2 pos;\n" - " for (i = -2.0; i < 3.0; i++){\n" - " pos.x = offset.x - i;\n" - " for (j = -2.0; j < 3.0 ;j++){\n" - " pos.y = offset.y - j;\n" - " c=premultiply_alpha(tex_sample_direct(coord - pos * u_texSize.zw));\n" - " tempColor+=c*mitchell(pos);\n" - " }\n" - " }\n" - " return postdivide_alpha(tempColor);\n" - "};\n"; - // Missing: Z depth range // Also, logic ops etc, of course, as they are not supported in DX9. -bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguage lang) { +bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, char* scalerCode, ShaderLanguage lang) { char *p = buffer; bool lmode = id.Bit(FS_BIT_LMODE); @@ -396,7 +56,6 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag GEComparison alphaTestFunc = (GEComparison)id.Bits(FS_BIT_ALPHA_TEST_FUNC, 3); GEComparison colorTestFunc = (GEComparison)id.Bits(FS_BIT_COLOR_TEST_FUNC, 2); bool needShaderTexClamp = id.Bit(FS_BIT_SHADER_TEX_CLAMP); - bool needScaleFilter = g_Config.bRealtimeTexScaling && g_Config.iTexScalingLevel != 1 && !gstate_c.curTextureIsRT; ReplaceBlendType replaceBlend = static_cast(id.Bits(FS_BIT_REPLACE_BLEND, 3)); ReplaceAlphaType stencilToAlpha = static_cast(id.Bits(FS_BIT_STENCIL_TO_ALPHA, 2)); @@ -495,7 +154,7 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag if (doTextureAlpha) { // TODO: check why the [0.0,1.0] clamp is necessary here WRITE(p, "float4 premultiply_alpha(float4 c) { float a = clamp(c.a, 0.0, 1.0); return float4(c.rgb * a, a); }"); - WRITE(p, "float4 postdivide_alpha(float4 c) { return c.a < 0.001f? 0.0f : float4(c.rgb / c.a, c.a); }\n"); + WRITE(p, "float4 postdivide_alpha(float4 c) { return c.a < 0.0001f? 0.0f : float4(c.rgb / c.a, c.a); }\n"); } else { WRITE(p, "#define premultiply_alpha(c) (c)\n"); WRITE(p, "#define postdivide_alpha(c) (c)\n"); @@ -509,35 +168,10 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag } WRITE(p, "};\n"); - if (needScaleFilter) { - switch (g_Config.iTexScalingType) { - case TextureScalerCommon::XBRZ: - WRITE(p, sampler_xbrz); - break; - case TextureScalerCommon::HYBRID: - WRITE(p, sampler_hybrid); - break; - case TextureScalerCommon::BICUBIC: - WRITE(p, sampler_bicubic); - break; - case TextureScalerCommon::HYBRID_BICUBIC: - WRITE(p, sampler_hybrid_bicubic); - break; - case TextureScalerCommon::SABR: - WRITE(p, sampler_sabr); - break; - case TextureScalerCommon::GAUSSIAN: - WRITE(p, sampler_gaussian); - break; - case TextureScalerCommon::COSINE: - WRITE(p, sampler_cosine); - break; - default: - ERROR_LOG(G3D, "Unknown scaling type: %d", g_Config.iTexScalingType); - break; - } + if (scalerCode && !gstate_c.curTextureIsRT) { + WRITE(p, scalerCode); } else { - WRITE(p, sampler_default); + WRITE(p, "#define tex_sample(x) tex_sample_direct(x)\n"); } } diff --git a/GPU/Directx9/PixelShaderGeneratorDX9.h b/GPU/Directx9/PixelShaderGeneratorDX9.h index f845f5dcf8cc..4779d9f10317 100644 --- a/GPU/Directx9/PixelShaderGeneratorDX9.h +++ b/GPU/Directx9/PixelShaderGeneratorDX9.h @@ -22,7 +22,7 @@ namespace DX9 { -bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguage lang = HLSL_DX9); +bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, char *scalerCode = nullptr, ShaderLanguage lang = HLSL_DX9); #define CONST_PS_TEXENV 0 #define CONST_PS_ALPHACOLORREF 1 diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index b0a99e93dc69..f7634dadf5c6 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -476,7 +476,7 @@ void ShaderManagerDX9::VSUpdateUniforms(u64 dirtyUniforms) { } ShaderManagerDX9::ShaderManagerDX9(LPDIRECT3DDEVICE9 device) : device_(device), lastVShader_(nullptr), lastPShader_(nullptr) { - codeBuffer_ = new char[16384]; + codeBuffer_ = new char[16384 * 2]; } ShaderManagerDX9::~ShaderManagerDX9() { diff --git a/UI/GameSettingsScreen.cpp b/UI/GameSettingsScreen.cpp index bd7e37ff5eaf..5b297ff8a446 100644 --- a/UI/GameSettingsScreen.cpp +++ b/UI/GameSettingsScreen.cpp @@ -378,7 +378,7 @@ void GameSettingsScreen::CreateViews() { }); texScalingChoice->SetDisabledPtr(&g_Config.bSoftwareRendering); - static const char *texScaleAlgos[] = { "xBRZ", "Hybrid", "Bicubic", "Hybrid + Bicubic", "SABR", "Gaussian", "Cosine"}; + static const char *texScaleAlgos[] = { "xBRZ", "Hybrid", "Bicubic", "Hybrid + Bicubic", "XBR", "SABR", "Gaussian", "Cosine"}; PopupMultiChoice *texScalingType = graphicsSettings->Add(new PopupMultiChoice(&g_Config.iTexScalingType, gr->T("Upscale Type"), texScaleAlgos, 0, ARRAY_SIZE(texScaleAlgos), gr->GetName(), screenManager())); texScalingType->SetDisabledPtr(&g_Config.bSoftwareRendering); diff --git a/Windows/MainWindowMenu.cpp b/Windows/MainWindowMenu.cpp index 8aba6ff3e26c..73a4ef571cae 100644 --- a/Windows/MainWindowMenu.cpp +++ b/Windows/MainWindowMenu.cpp @@ -787,6 +787,7 @@ namespace MainWindow { case ID_TEXTURESCALING_HYBRID: setTexScalingType(TextureScalerCommon::HYBRID); break; case ID_TEXTURESCALING_BICUBIC: setTexScalingType(TextureScalerCommon::BICUBIC); break; case ID_TEXTURESCALING_HYBRID_BICUBIC: setTexScalingType(TextureScalerCommon::HYBRID_BICUBIC); break; + case ID_TEXTURESCALING_XBR: setTexScalingType(TextureScalerCommon::XBR); break; case ID_TEXTURESCALING_SABR: setTexScalingType(TextureScalerCommon::SABR); break; case ID_TEXTURESCALING_GAUSSIAN: setTexScalingType(TextureScalerCommon::GAUSSIAN); break; case ID_TEXTURESCALING_COSINE: setTexScalingType(TextureScalerCommon::COSINE); break; @@ -1214,6 +1215,7 @@ namespace MainWindow { ID_TEXTURESCALING_HYBRID, ID_TEXTURESCALING_BICUBIC, ID_TEXTURESCALING_HYBRID_BICUBIC, + ID_TEXTURESCALING_XBR, ID_TEXTURESCALING_SABR, ID_TEXTURESCALING_GAUSSIAN, ID_TEXTURESCALING_COSINE, diff --git a/Windows/ppsspp.rc b/Windows/ppsspp.rc index 53bbe7e184d2..47a74e494e17 100644 --- a/Windows/ppsspp.rc +++ b/Windows/ppsspp.rc @@ -613,6 +613,7 @@ BEGIN MENUITEM "Hybrid", ID_TEXTURESCALING_HYBRID MENUITEM "Bicubic", ID_TEXTURESCALING_BICUBIC MENUITEM "Hybrid + Bicubic", ID_TEXTURESCALING_HYBRID_BICUBIC + MENUITEM "XBR", ID_TEXTURESCALING_XBR MENUITEM "SABR", ID_TEXTURESCALING_SABR MENUITEM "Gaussian", ID_TEXTURESCALING_GAUSSIAN MENUITEM "Cosine", ID_TEXTURESCALING_COSINE diff --git a/Windows/resource.h b/Windows/resource.h index bdf1308100de..9a8c9f763d8e 100644 --- a/Windows/resource.h +++ b/Windows/resource.h @@ -342,9 +342,10 @@ #define ID_TEXTURESCALING_REALTIME 40177 #define ID_TEXTURESCALING_REALTIME_HC 40178 -#define ID_TEXTURESCALING_SABR 40179 -#define ID_TEXTURESCALING_GAUSSIAN 40180 -#define ID_TEXTURESCALING_COSINE 40181 +#define ID_TEXTURESCALING_XBR 40179 +#define ID_TEXTURESCALING_SABR 40180 +#define ID_TEXTURESCALING_GAUSSIAN 40181 +#define ID_TEXTURESCALING_COSINE 40182 // Dummy option to let the buffered rendering hotkey cycle through all the options. #define ID_OPTIONS_BUFFEREDRENDERINGDUMMY 40500 diff --git a/assets/scalers/bicubic.hlsl b/assets/scalers/bicubic.hlsl new file mode 100644 index 000000000000..8a20886e9d04 --- /dev/null +++ b/assets/scalers/bicubic.hlsl @@ -0,0 +1,49 @@ + +// generate the value of a Mitchell-Netravali scaling spline at distance d, with parameters A and B +// B=1 C=0 : cubic B spline (very smooth) +// B=C=1/3 : recommended for general upscaling +// B=0 C=1/2 : Catmull-Rom spline (sharp, ringing) +// see Mitchell & Netravali, \Reconstruction Filters in Computer Graphics\ + +//#define BSPLINE +#ifdef BSPLINE + static const float B = 1.0f; + static const float C = 0.0f; +#else + static const float B = 1.0f / 3.0f; + static const float C = 1.0f / 3.0f; +#endif + +float mitchell_0_1(float x) { + return ((12 - 9 * B - 6 * C)*(x*x*x) + (-18 + 12 * B + 6 * C)*(x*x) + (6 - 2 * B)) / 6.0f; +} + +float mitchell_1_2(float x) { + return ((-B - 6 * C)*(x*x*x) + (6 * B + 30 * C)*(x*x) + (-12 * B - 48 * C)*x + (8 * B + 24 * C)) / 6.0f; +} + +float mitchell(float2 pos) { + float x = sqrt(dot(pos, pos)); +// return lerp(mitchell_0_1(x), mitchell_1_2(x), step(1.0,x)) * step(x, 2.0); + if (x < 1.0) + return mitchell_0_1(x); + if (x < 2.0) + return mitchell_1_2(x); + return 0.0; +} +float4 tex_sample(float2 coord) { + float2 offset = frac(coord * u_texSize.xy) - 0.5; + float4 tempColor = 0.0; + float4 c; + float i,j; + float2 pos; + for (i = -2.0; i < 3.0; i++){ + pos.x = offset.x - i; + for (j = -2.0; j < 3.0 ;j++){ + pos.y = offset.y - j; + c=premultiply_alpha(tex_sample_direct(coord - pos * u_texSize.zw)); + tempColor+=c*mitchell(pos); + } + } + return postdivide_alpha(tempColor); +} diff --git a/assets/scalers/cosine.hlsl b/assets/scalers/cosine.hlsl new file mode 100644 index 000000000000..2481529f940a --- /dev/null +++ b/assets/scalers/cosine.hlsl @@ -0,0 +1,24 @@ + +#define sharpness 1.0 +#define pi 3.14159265358 +#define a(x) abs(x) +#define d(x,b) (pi*b*min(a(x)+0.5,1.0/b)) +#define e(x,b) (pi*b*min(max(a(x)-0.5,-1.0/b),1.0/b)) +#define KERNEL(x,b) ((d(x,b)+sin(d(x,b))-e(x,b)-sin(e(x,b)))/(2.0*pi)) + +float4 tex_sample(float2 coord) { + float2 offset = frac(coord * u_texSize.xy) - 0.5; + float4 tempColor = 0.0; + float4 c; + float i,j; + float2 pos; + for (i = -2.0; i < 2.0; i++){ + pos.x = offset.x - i; + for (j = -2.0; j< 2.0 ;j++){ + pos.y = offset.y - j; + c=premultiply_alpha(tex_sample_direct(coord - pos * u_texSize.zw)); + tempColor+=c*KERNEL(pos.x,sharpness)*KERNEL(pos.y,sharpness); + } + } + return postdivide_alpha(tempColor); +} diff --git a/assets/scalers/gaussian.hlsl b/assets/scalers/gaussian.hlsl new file mode 100644 index 000000000000..f57cd4ddcf58 --- /dev/null +++ b/assets/scalers/gaussian.hlsl @@ -0,0 +1,33 @@ + +#define sharpness 1.0 +#define pi 3.14159265358 +#define normalGauss(x) ((exp(-(x)*(x)*0.5))/sqrt(2.0*pi)) +#define normalGauss2(x) (normalGauss(x - 0.5) - 0.5) +float normalGaussIntegral(float x) +{ + float a1 = 0.4361836; + float a2 = -0.1201676; + float a3 = 0.9372980; + float p = 0.3326700; + float t = 1.0 / (1.0 + p*abs(x)); + + return (0.5-normalGauss(x) * (t*(a1 + t*(a2 + a3*t))))*sign(x); +} +#define KERNEL(x,b) (normalGaussIntegral(sqrt(2*pi)*b*(x - 0.5)) - normalGaussIntegral(sqrt(2*pi)*b*(x + 0.5))) + +float4 tex_sample(float2 coord) { + float2 offset = frac(coord * u_texSize.xy) - 0.5; + float4 tempColor = 0.0; + float4 c; + float i,j; + float2 pos; + for (i = -2.0; i < 2.0; i++){ + pos.x = offset.x - i; + for (j = -2.0; j< 2.0 ;j++){ + pos.y = offset.y - j; + c=premultiply_alpha(tex_sample_direct(coord - pos * u_texSize.zw)); + tempColor+=c*KERNEL(pos.x,sharpness)*KERNEL(pos.y,sharpness); + } + } + return postdivide_alpha(tempColor); +} diff --git a/assets/scalers/hybrid.hlsl b/assets/scalers/hybrid.hlsl new file mode 100644 index 000000000000..f03a50302933 --- /dev/null +++ b/assets/scalers/hybrid.hlsl @@ -0,0 +1,4 @@ + +float4 tex_sample(float2 coord) { + return tex_sample_direct(coord); +}; diff --git a/assets/scalers/hybrid_bicubic.hlsl b/assets/scalers/hybrid_bicubic.hlsl new file mode 100644 index 000000000000..f03a50302933 --- /dev/null +++ b/assets/scalers/hybrid_bicubic.hlsl @@ -0,0 +1,4 @@ + +float4 tex_sample(float2 coord) { + return tex_sample_direct(coord); +}; diff --git a/assets/scalers/sabr.hlsl b/assets/scalers/sabr.hlsl new file mode 100644 index 000000000000..05d0222e694e --- /dev/null +++ b/assets/scalers/sabr.hlsl @@ -0,0 +1,64 @@ + +float c_df(float4 c1, float4 c2) { + float3 df = abs(c1.rgb - c2.rgb); + return df.r + df.g + df.b; +} +static const float4 Ai = float4( 1.0, -1.0, -1.0, 1.0); +static const float4 B45 = float4( 1.0, 1.0, -1.0, -1.0); +static const float4 C45 = float4( 1.5, 0.5, -0.5, 0.5); +static const float4 M45 = float4(0.4, 0.4, 0.4, 0.4); +static const float4 M30 = float4(0.2, 0.4, 0.2, 0.4); +static const float3 lum = float3(0.21, 0.72, 0.07); + +float lum_to(float4 v) { + return dot(lum, v.rgb); +} +float4 lum_to(float4 v0, float4 v1, float4 v2, float4 v3) { + return float4(lum_to(v0), lum_to(v1), lum_to(v2), lum_to(v3)); +} + + +float4 tex_sample(float2 coord) +{ +/* + Mask for algorithm + +-----+-----+-----+ + | | 7 | | + +-----+-----+-----+ + | 11 | 12 | 13 | + +-----+-----+-----+ + | | 17 | | + +-----+-----+-----+ +*/ +// Store mask values + float4 P07 = premultiply_alpha(tex_sample_direct(coord + u_texSize.zw * float2( 0.0, -1.0))); + float4 P11 = premultiply_alpha(tex_sample_direct(coord + u_texSize.zw * float2(-1.0, 0.0))); + float4 P12 = premultiply_alpha(tex_sample_direct(coord + u_texSize.zw * float2( 0.0, 0.0))); + float4 P13 = premultiply_alpha(tex_sample_direct(coord + u_texSize.zw * float2( 1.0, 0.0))); + float4 P17 = premultiply_alpha(tex_sample_direct(coord + u_texSize.zw * float2( 0.0, 1.0))); + + // Store luminance values of each point + float4 p7 = lum_to(P07, P11, P17, P13); + float4 p12 = lum_to(P12); + float4 p13 = p7.wxyz; // P13, P7, P11, P17 + float4 p17 = p7.zwxy; // P11, P17, P13, P7 + + float2 fp = frac(coord * u_texSize.xy); + float4 ma45 = smoothstep(C45 - M45, C45 + M45, Ai * fp.y + B45 * fp.x); + float4 px = step(abs(p12 - p17), abs(p12 - p13)); + + float4 res1 = P12; + res1 = lerp(res1, lerp(P13, P17, px.x), ma45.x); + res1 = lerp(res1, lerp(P07, P13, px.y), ma45.y); + res1 = lerp(res1, lerp(P11, P07, px.z), ma45.z); + res1 = lerp(res1, lerp(P17, P11, px.w), ma45.w); + + float4 res2 = P12; + res2 = lerp(res2, lerp(P17, P11, px.w), ma45.w); + res2 = lerp(res2, lerp(P11, P07, px.z), ma45.z); + res2 = lerp(res2, lerp(P07, P13, px.y), ma45.y); + res2 = lerp(res2, lerp(P13, P17, px.x), ma45.x); + + float4 res = lerp(res1, res2, step(c_df(P12, res1), c_df(P12, res2))); + return postdivide_alpha(res); +} diff --git a/assets/scalers/xbr.hlsl b/assets/scalers/xbr.hlsl new file mode 100644 index 000000000000..546831f36c1a --- /dev/null +++ b/assets/scalers/xbr.hlsl @@ -0,0 +1,210 @@ +/* + Hyllian's xBR-lv2 Shader + + Copyright (C) 2011-2015 Hyllian - sergiogdb@gmail.com + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + + Incorporates some of the ideas from SABR shader. Thanks to Joshua Street. +*/ + +#define XBR_Y_WEIGHT 48.0 // 0.0 .. 100.0 +#define XBR_EQ_THRESHOLD 15.0 // 0.0 .. 50.0 +#define XBR_LV1_COEFFICIENT 0.5 // 0.0 .. 30.0 +#define XBR_LV2_COEFFICIENT 2.0 // 1.0 .. 3.0 +//#define SMALL_DETAILS + +#define CORNER_TYPE 3 // 1 .. 4 +#define XBR_SCALE 4.0 + +static const float coef = 2.0; +static const float3 rgbw = float3(14.352, 28.176, 5.472); +static const float4 eq_threshold = float4(15.0, 15.0, 15.0, 15.0); + +static const float4 delta = float4(1.0/XBR_SCALE, 1.0/XBR_SCALE, 1.0/XBR_SCALE, 1.0/XBR_SCALE); +static const float4 delta_l = float4(0.5/XBR_SCALE, 1.0/XBR_SCALE, 0.5/XBR_SCALE, 1.0/XBR_SCALE); +static const float4 delta_u = delta_l.yxwz; + +static const float4 Ao = float4( 1.0, -1.0, -1.0, 1.0 ); +static const float4 Bo = float4( 1.0, 1.0, -1.0,-1.0 ); +static const float4 Co = float4( 1.5, 0.5, -0.5, 0.5 ); +static const float4 Ax = float4( 1.0, -1.0, -1.0, 1.0 ); +static const float4 Bx = float4( 0.5, 2.0, -0.5,-2.0 ); +static const float4 Cx = float4( 1.0, 1.0, -0.5, 0.0 ); +static const float4 Ay = float4( 1.0, -1.0, -1.0, 1.0 ); +static const float4 By = float4( 2.0, 0.5, -2.0,-0.5 ); +static const float4 Cy = float4( 2.0, 0.0, -1.0, 0.5 ); +static const float4 Ci = float4(0.25, 0.25, 0.25, 0.25); + +static const float3 Y = float3(0.2126, 0.7152, 0.0722); + +// Difference between vector components. +float4 df(float4 A, float4 B) { return abs(A-B); } + +// Compare two vectors and return their components are different. +float4 diff(float4 A, float4 B) { return step(0.001, df(A, B)); } + +// Determine if two vector components are equal based on a threshold. +float4 eq(float4 A, float4 B) { return (step(df(A, B), XBR_EQ_THRESHOLD)); } + +// Determine if two vector components are NOT equal based on a threshold. +float4 neq(float4 A, float4 B) { return (1.0 - eq(A, B)); } + +// Weighted distance. +float4 wd(float4 a, float4 b, float4 c, float4 d, float4 e, float4 f, float4 g, float4 h) { + return (df(a,b) + df(a,c) + df(d,e) + df(d,f) + 4.0*df(g,h)); +} + +float4 weighted_distance(float4 a, float4 b, float4 c, float4 d, float4 e, float4 f, float4 g, float4 h, float4 i, float4 j, float4 k, float4 l) { + return (df(a,b) + df(a,c) + df(d,e) + df(d,f) + df(i,j) + df(k,l) + 2.0*df(g,h)); +} + +float c_df(float3 c1, float3 c2) { + float3 df = abs(c1 - c2); + return df.r + df.g + df.b; +} + +float4 tex_sample(float2 coord) { + float dx = u_texSize.z; + float dy = u_texSize.w; + + float4 t1 = coord.xxxy + float4( -dx, 0, dx,-2.0*dy); // A1 B1 C1 + float4 t2 = coord.xxxy + float4( -dx, 0, dx, -dy); // A B C + float4 t3 = coord.xxxy + float4( -dx, 0, dx, 0); // D E F + float4 t4 = coord.xxxy + float4( -dx, 0, dx, dy); // G H I + float4 t5 = coord.xxxy + float4( -dx, 0, dx, 2.0*dy); // G5 H5 I5 + float4 t6 = coord.xyyy + float4(-2.0*dx,-dy, 0, dy); // A0 D0 G0 + float4 t7 = coord.xyyy + float4( 2.0*dx,-dy, 0, dy); // C4 F4 I4 + float4 edri, edr, edr_l, edr_u, px; // px = pixel, edr = edge detection rule + float4 irlv0, irlv1, irlv2l, irlv2u, block_3d; + float4 fx, fx_l, fx_u; // inequations of straight lines. + + float2 fp = frac(coord*u_texSize.xy); + + float4 A1 = premultiply_alpha(tex_sample_direct(t1.xw)); + float4 B1 = premultiply_alpha(tex_sample_direct(t1.yw)); + float4 C1 = premultiply_alpha(tex_sample_direct(t1.zw)); + float4 A = premultiply_alpha(tex_sample_direct(t2.xw)); + float4 B = premultiply_alpha(tex_sample_direct(t2.yw)); + float4 C = premultiply_alpha(tex_sample_direct(t2.zw)); + float4 D = premultiply_alpha(tex_sample_direct(t3.xw)); + float4 E = premultiply_alpha(tex_sample_direct(t3.yw)); + float4 F = premultiply_alpha(tex_sample_direct(t3.zw)); + float4 G = premultiply_alpha(tex_sample_direct(t4.xw)); + float4 H = premultiply_alpha(tex_sample_direct(t4.yw)); + float4 I = premultiply_alpha(tex_sample_direct(t4.zw)); + float4 G5 = premultiply_alpha(tex_sample_direct(t5.xw)); + float4 H5 = premultiply_alpha(tex_sample_direct(t5.yw)); + float4 I5 = premultiply_alpha(tex_sample_direct(t5.zw)); + float4 A0 = premultiply_alpha(tex_sample_direct(t6.xy)); + float4 D0 = premultiply_alpha(tex_sample_direct(t6.xz)); + float4 G0 = premultiply_alpha(tex_sample_direct(t6.xw)); + float4 C4 = premultiply_alpha(tex_sample_direct(t7.xy)); + float4 F4 = premultiply_alpha(tex_sample_direct(t7.xz)); + float4 I4 = premultiply_alpha(tex_sample_direct(t7.xw)); + + float4 b = float4(dot(B.rgb ,rgbw), dot(D.rgb ,rgbw), dot(H.rgb ,rgbw), dot(F.rgb ,rgbw)); + float4 c = float4(dot(C.rgb ,rgbw), dot(A.rgb ,rgbw), dot(G.rgb ,rgbw), dot(I.rgb ,rgbw)); + float4 d = b.yzwx; + float4 e = dot(E.rgb,rgbw); + float4 f = b.wxyz; + float4 g = c.zwxy; + float4 h = b.zwxy; + float4 i = c.wxyz; + + float4 i4, i5, h5, f4; + + float y_weight = XBR_Y_WEIGHT; +#ifdef SMALL_DETAILS + i4 = mul(float4x3(I4.rgb, C1.rgb, A0.rgb, G5.rgb), y_weight * Y); + i5 = mul(float4x3(I5.rgb, C4.rgb, A1.rgb, G0.rgb), y_weight * Y); + h5 = mul(float4x3(H5.rgb, F4.rgb, B1.rgb, D0.rgb), y_weight * Y); +#else + i4 = float4(dot(I4.rgb,rgbw), dot(C1.rgb,rgbw), dot(A0.rgb,rgbw), dot(G5.rgb,rgbw)); + i5 = float4(dot(I5.rgb,rgbw), dot(C4.rgb,rgbw), dot(A1.rgb,rgbw), dot(G0.rgb,rgbw)); + h5 = float4(dot(H5.rgb,rgbw), dot(F4.rgb,rgbw), dot(B1.rgb,rgbw), dot(D0.rgb,rgbw)); +#endif + f4 = h5.yzwx; + + // These inequations define the line below which interpolation occurs. + fx = (Ao*fp.y+Bo*fp.x); + fx_l = (Ax*fp.y+Bx*fp.x); + fx_u = (Ay*fp.y+By*fp.x); + + irlv1 = irlv0 = diff(e,f) * diff(e,h); + +#if CORNER_TYPE == 1 +#define SMOOTH_TIPS +#elif CORNER_TYPE == 2 + irlv1 = (irlv0 * ( neq(f,b) * neq(h,d) + eq(e,i) * neq(f,i4) * neq(h,i5) + eq(e,g) + eq(e,c) ) ); +#elif CORNER_TYPE == 3 + irlv1 = (irlv0 * ( neq(f,b) * neq(f,c) + neq(h,d) * neq(h,g) + eq(e,i) * (neq(f,f4) * neq(f,i4) + neq(h,h5) * neq(h,i5)) + eq(e,g) + eq(e,c)) ); +#else // CORNER_TYPE == 4 + float4 c1 = i4.yzwx; + float4 g0 = i5.wxyz; + irlv1 = (irlv0 * ( neq(f,b) * neq(h,d) + eq(e,i) * neq(f,i4) * neq(h,i5) + eq(e,g) + eq(e,c) ) * (diff(f,f4) * diff(f,i) + diff(h,h5) * diff(h,i) + diff(h,g) + diff(f,c) + eq(b,c1) * eq(d,g0))); +#endif + + irlv2l = diff(e,g) * diff(d,g); + irlv2u = diff(e,c) * diff(b,c); + + float4 fx45i = clamp((fx + delta -Co - Ci)/(2.0*delta ), 0.0, 1.0); + float4 fx45 = clamp((fx + delta -Co )/(2.0*delta ), 0.0, 1.0); + float4 fx30 = clamp((fx_l + delta_l -Cx )/(2.0*delta_l), 0.0, 1.0); + float4 fx60 = clamp((fx_u + delta_u -Cy )/(2.0*delta_u), 0.0, 1.0); + + float4 wd1, wd2; +#ifdef SMALL_DETAILS + wd1 = weighted_distance( e, c, g, i, f4, h5, h, f, b, d, i4, i5); + wd2 = weighted_distance( h, d, i5, f, b, i4, e, i, g, h5, c, f4); +#else + wd1 = wd( e, c, g, i, h5, f4, h, f); + wd2 = wd( h, d, i5, f, i4, b, e, i); +#endif + + edri = step(wd1, wd2) * irlv0; + edr = step(wd1 + float4(0.1, 0.1, 0.1, 0.1), wd2) * step(float4(0.5, 0.5, 0.5, 0.5), irlv1); + edr_l = step( XBR_LV2_COEFFICIENT*df(f,g), df(h,c) ) * irlv2l * edr; + edr_u = step( XBR_LV2_COEFFICIENT*df(h,c), df(f,g) ) * irlv2u * edr; + + fx45 = edr * fx45; + fx30 = edr_l * fx30; + fx60 = edr_u * fx60; + fx45i = edri * fx45i; + + px = step(df(e,f), df(e,h)); + +#ifdef SMOOTH_TIPS + float4 maximos = max(max(fx30, fx60), max(fx45, fx45i)); +#else + float4 maximos = max(max(fx30, fx60), fx45); +#endif + + float4 res1 = E; + res1 = lerp(res1, lerp(H, F, px.x), maximos.x); + res1 = lerp(res1, lerp(B, D, px.z), maximos.z); + + float4 res2 = E; + res2 = lerp(res2, lerp(F, B, px.y), maximos.y); + res2 = lerp(res2, lerp(D, H, px.w), maximos.w); + + float4 res = lerp(res1, res2, step(c_df(E.rgb, res1.rgb), c_df(E.rgb, res2.rgb))); + return postdivide_alpha(res); +} diff --git a/assets/scalers/xbrz.hlsl b/assets/scalers/xbrz.hlsl new file mode 100644 index 000000000000..43745eb453ca --- /dev/null +++ b/assets/scalers/xbrz.hlsl @@ -0,0 +1,335 @@ + +// 4xBRZ shader - Copyright (C) 2014-2016 DeSmuME team +// +// This file is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 2 of the License, or +// (at your option) any later version. +// +// This file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with the this software. If not, see . + + +/* + Hyllian's xBR-vertex code and texel mapping + + Copyright (C) 2011/2016 Hyllian - sergiogdb@gmail.com + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +#define BLEND_NONE 0 +#define BLEND_NORMAL 1 +#define BLEND_DOMINANT 2 +#define LUMINANCE_WEIGHT 1.0 +#define EQUAL_COLOR_TOLERANCE 30.0/255.0 +#define STEEP_DIRECTION_THRESHOLD 2.2 +#define DOMINANT_DIRECTION_THRESHOLD 3.6 + +float reduce(float4 color) +{ + return dot(color.rgb, float3(65536.0, 256.0, 1.0)); +} + +float DistYCbCr(float4 pixA, float4 pixB) +{ + const float3 w = float3(0.2627, 0.6780, 0.0593); + const float scaleB = 0.5 / (1.0 - w.b); + const float scaleR = 0.5 / (1.0 - w.r); + float3 diff = pixA.rgb - pixB.rgb; + float Y = dot(diff, w); + float Cb = scaleB * (diff.b - Y); + float Cr = scaleR * (diff.r - Y); + + return sqrt( ((LUMINANCE_WEIGHT * Y) * (LUMINANCE_WEIGHT * Y)) + (Cb * Cb) + (Cr * Cr) ); +} + +bool IsPixEqual(const float4 pixA, const float4 pixB) +{ + return (DistYCbCr(pixA, pixB) < EQUAL_COLOR_TOLERANCE); +} + +bool IsBlendingNeeded(const int4 blend) +{ + return any(blend - BLEND_NONE); +} + +//--------------------------------------- +// Input Pixel Mapping: --|21|22|23|-- +// 19|06|07|08|09 +// 18|05|00|01|10 +// 17|04|03|02|11 +// --|15|14|13|-- +// +// Output Pixel Mapping: 20|21|22|23|24|25 +// 19|06|07|08|09|26 +// 18|05|00|01|10|27 +// 17|04|03|02|11|28 +// 16|15|14|13|12|29 +// 35|34|33|32|31|30 + +float4 tex_sample(float2 coord) +{ + float dx = u_texSize.z; + float dy = u_texSize.w; + + // A1 B1 C1 + // A0 A B C C4 + // D0 D E F F4 + // G0 G H I I4 + // G5 H5 I5 + + float4 t1 = coord.xxxy + float4( -dx, 0.0, dx,-2.0*dy); // A1 B1 C1 + float4 t2 = coord.xxxy + float4( -dx, 0.0, dx, -dy); // A B C + float4 t3 = coord.xxxy + float4( -dx, 0.0, dx, 0.0); // D E F + float4 t4 = coord.xxxy + float4( -dx, 0.0, dx, dy); // G H I + float4 t5 = coord.xxxy + float4( -dx, 0.0, dx, 2.0*dy); // G5 H5 I5 + float4 t6 = coord.xyyy + float4(-2.0*dx,-dy, 0.0, dy); // A0 D0 G0 + float4 t7 = coord.xyyy + float4( 2.0*dx,-dy, 0.0, dy); // C4 F4 I4 + + float2 f = frac(coord.xy * u_texSize.xy); + + //--------------------------------------- + // Input Pixel Mapping: |21|22|23| + // 19|06|07|08|09 + // 18|05|00|01|10 + // 17|04|03|02|11 + // |15|14|13| + + float4 src[25]; + + src[21] = premultiply_alpha(tex_sample_direct(t1.xw)); + src[22] = premultiply_alpha(tex_sample_direct(t1.yw)); + src[23] = premultiply_alpha(tex_sample_direct(t1.zw)); + src[ 6] = premultiply_alpha(tex_sample_direct(t2.xw)); + src[ 7] = premultiply_alpha(tex_sample_direct(t2.yw)); + src[ 8] = premultiply_alpha(tex_sample_direct(t2.zw)); + src[ 5] = premultiply_alpha(tex_sample_direct(t3.xw)); + src[ 0] = premultiply_alpha(tex_sample_direct(t3.yw)); + src[ 1] = premultiply_alpha(tex_sample_direct(t3.zw)); + src[ 4] = premultiply_alpha(tex_sample_direct(t4.xw)); + src[ 3] = premultiply_alpha(tex_sample_direct(t4.yw)); + src[ 2] = premultiply_alpha(tex_sample_direct(t4.zw)); + src[15] = premultiply_alpha(tex_sample_direct(t5.xw)); + src[14] = premultiply_alpha(tex_sample_direct(t5.yw)); + src[13] = premultiply_alpha(tex_sample_direct(t5.zw)); + src[19] = premultiply_alpha(tex_sample_direct(t6.xy)); + src[18] = premultiply_alpha(tex_sample_direct(t6.xz)); + src[17] = premultiply_alpha(tex_sample_direct(t6.xw)); + src[ 9] = premultiply_alpha(tex_sample_direct(t7.xy)); + src[10] = premultiply_alpha(tex_sample_direct(t7.xz)); + src[11] = premultiply_alpha(tex_sample_direct(t7.xw)); + + float v[9]; + v[0] = reduce(src[0]); + v[1] = reduce(src[1]); + v[2] = reduce(src[2]); + v[3] = reduce(src[3]); + v[4] = reduce(src[4]); + v[5] = reduce(src[5]); + v[6] = reduce(src[6]); + v[7] = reduce(src[7]); + v[8] = reduce(src[8]); + + int4 blendResult = BLEND_NONE; + + // Preprocess corners + // Pixel Tap Mapping: --|--|--|--|-- + // --|--|07|08|-- + // --|05|00|01|10 + // --|04|03|02|11 + // --|--|14|13|-- + // Corner (1, 1) + if ( ((v[0] == v[1] && v[3] == v[2]) || (v[0] == v[3] && v[1] == v[2])) == false) + { + float dist_03_01 = DistYCbCr(src[ 4], src[ 0]) + DistYCbCr(src[ 0], src[ 8]) + DistYCbCr(src[14], src[ 2]) + DistYCbCr(src[ 2], src[10]) + (4.0 * DistYCbCr(src[ 3], src[ 1])); + float dist_00_02 = DistYCbCr(src[ 5], src[ 3]) + DistYCbCr(src[ 3], src[13]) + DistYCbCr(src[ 7], src[ 1]) + DistYCbCr(src[ 1], src[11]) + (4.0 * DistYCbCr(src[ 0], src[ 2])); + bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_03_01) < dist_00_02; + blendResult[2] = ((dist_03_01 < dist_00_02) && (v[0] != v[1]) && (v[0] != v[3])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + // Pixel Tap Mapping: --|--|--|--|-- + // --|06|07|--|-- + // 18|05|00|01|-- + // 17|04|03|02|-- + // --|15|14|--|-- + // Corner (0, 1) + if ( ((v[5] == v[0] && v[4] == v[3]) || (v[5] == v[4] && v[0] == v[3])) == false) + { + float dist_04_00 = DistYCbCr(src[17], src[ 5]) + DistYCbCr(src[ 5], src[ 7]) + DistYCbCr(src[15], src[ 3]) + DistYCbCr(src[ 3], src[ 1]) + (4.0 * DistYCbCr(src[ 4], src[ 0])); + float dist_05_03 = DistYCbCr(src[18], src[ 4]) + DistYCbCr(src[ 4], src[14]) + DistYCbCr(src[ 6], src[ 0]) + DistYCbCr(src[ 0], src[ 2]) + (4.0 * DistYCbCr(src[ 5], src[ 3])); + bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_05_03) < dist_04_00; + blendResult[3] = ((dist_04_00 > dist_05_03) && (v[0] != v[5]) && (v[0] != v[3])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + // Pixel Tap Mapping: --|--|22|23|-- + // --|06|07|08|09 + // --|05|00|01|10 + // --|--|03|02|-- + // --|--|--|--|-- + // Corner (1, 0) + if ( ((v[7] == v[8] && v[0] == v[1]) || (v[7] == v[0] && v[8] == v[1])) == false) + { + float dist_00_08 = DistYCbCr(src[ 5], src[ 7]) + DistYCbCr(src[ 7], src[23]) + DistYCbCr(src[ 3], src[ 1]) + DistYCbCr(src[ 1], src[ 9]) + (4.0 * DistYCbCr(src[ 0], src[ 8])); + float dist_07_01 = DistYCbCr(src[ 6], src[ 0]) + DistYCbCr(src[ 0], src[ 2]) + DistYCbCr(src[22], src[ 8]) + DistYCbCr(src[ 8], src[10]) + (4.0 * DistYCbCr(src[ 7], src[ 1])); + bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_07_01) < dist_00_08; + blendResult[1] = ((dist_00_08 > dist_07_01) && (v[0] != v[7]) && (v[0] != v[1])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + // Pixel Tap Mapping: --|21|22|--|-- + // 19|06|07|08|-- + // 18|05|00|01|-- + // --|04|03|--|-- + // --|--|--|--|-- + // Corner (0, 0) + if ( ((v[6] == v[7] && v[5] == v[0]) || (v[6] == v[5] && v[7] == v[0])) == false) + { + float dist_05_07 = DistYCbCr(src[18], src[ 6]) + DistYCbCr(src[ 6], src[22]) + DistYCbCr(src[ 4], src[ 0]) + DistYCbCr(src[ 0], src[ 8]) + (4.0 * DistYCbCr(src[ 5], src[ 7])); + float dist_06_00 = DistYCbCr(src[19], src[ 5]) + DistYCbCr(src[ 5], src[ 3]) + DistYCbCr(src[21], src[ 7]) + DistYCbCr(src[ 7], src[ 1]) + (4.0 * DistYCbCr(src[ 6], src[ 0])); + bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_05_07) < dist_06_00; + blendResult[0] = ((dist_05_07 < dist_06_00) && (v[0] != v[5]) && (v[0] != v[7])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + float4 dst[16]; + dst[ 0] = src[0]; + dst[ 1] = src[0]; + dst[ 2] = src[0]; + dst[ 3] = src[0]; + dst[ 4] = src[0]; + dst[ 5] = src[0]; + dst[ 6] = src[0]; + dst[ 7] = src[0]; + dst[ 8] = src[0]; + dst[ 9] = src[0]; + dst[10] = src[0]; + dst[11] = src[0]; + dst[12] = src[0]; + dst[13] = src[0]; + dst[14] = src[0]; + dst[15] = src[0]; + + // Scale pixel + if (IsBlendingNeeded(blendResult) == true) + { + float dist_01_04 = DistYCbCr(src[1], src[4]); + float dist_03_08 = DistYCbCr(src[3], src[8]); + bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[4]) && (v[5] != v[4]); + bool haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[8]) && (v[7] != v[8]); + bool needBlend = (blendResult[2] != BLEND_NONE); + bool doLineBlend = ( blendResult[2] >= BLEND_DOMINANT || + ((blendResult[1] != BLEND_NONE && !IsPixEqual(src[0], src[4])) || + (blendResult[3] != BLEND_NONE && !IsPixEqual(src[0], src[8])) || + (IsPixEqual(src[4], src[3]) && IsPixEqual(src[3], src[2]) && IsPixEqual(src[2], src[1]) && IsPixEqual(src[1], src[8]) && IsPixEqual(src[0], src[2]) == false) ) == false ); + + float4 blendPix = ( DistYCbCr(src[0], src[1]) <= DistYCbCr(src[0], src[3]) ) ? src[1] : src[3]; + dst[ 2] = lerp(dst[ 2], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00); + dst[ 9] = lerp(dst[ 9], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00); + dst[10] = lerp(dst[10], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00); + dst[11] = lerp(dst[11], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[12] = lerp(dst[12], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00); + dst[13] = lerp(dst[13], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[14] = lerp(dst[14], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00); + dst[15] = lerp(dst[15], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00); + + dist_01_04 = DistYCbCr(src[7], src[2]); + dist_03_08 = DistYCbCr(src[1], src[6]); + haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[2]) && (v[3] != v[2]); + haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[6]) && (v[5] != v[6]); + needBlend = (blendResult[1] != BLEND_NONE); + doLineBlend = ( blendResult[1] >= BLEND_DOMINANT || + !((blendResult[0] != BLEND_NONE && !IsPixEqual(src[0], src[2])) || + (blendResult[2] != BLEND_NONE && !IsPixEqual(src[0], src[6])) || + (IsPixEqual(src[2], src[1]) && IsPixEqual(src[1], src[8]) && IsPixEqual(src[8], src[7]) && IsPixEqual(src[7], src[6]) && !IsPixEqual(src[0], src[8])) ) ); + + blendPix = ( DistYCbCr(src[0], src[7]) <= DistYCbCr(src[0], src[1]) ) ? src[7] : src[1]; + dst[ 1] = lerp(dst[ 1], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00); + dst[ 6] = lerp(dst[ 6], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00); + dst[ 7] = lerp(dst[ 7], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00); + dst[ 8] = lerp(dst[ 8], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[ 9] = lerp(dst[ 9], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00); + dst[10] = lerp(dst[10], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[11] = lerp(dst[11], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00); + dst[12] = lerp(dst[12], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00); + + dist_01_04 = DistYCbCr(src[5], src[8]); + dist_03_08 = DistYCbCr(src[7], src[4]); + haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[8]) && (v[1] != v[8]); + haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[4]) && (v[3] != v[4]); + needBlend = (blendResult[0] != BLEND_NONE); + doLineBlend = ( blendResult[0] >= BLEND_DOMINANT || + !((blendResult[3] != BLEND_NONE && !IsPixEqual(src[0], src[8])) || + (blendResult[1] != BLEND_NONE && !IsPixEqual(src[0], src[4])) || + (IsPixEqual(src[8], src[7]) && IsPixEqual(src[7], src[6]) && IsPixEqual(src[6], src[5]) && IsPixEqual(src[5], src[4]) && !IsPixEqual(src[0], src[6])) ) ); + + blendPix = ( DistYCbCr(src[0], src[5]) <= DistYCbCr(src[0], src[7]) ) ? src[5] : src[7]; + dst[ 0] = lerp(dst[ 0], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00); + dst[15] = lerp(dst[15], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00); + dst[ 4] = lerp(dst[ 4], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00); + dst[ 5] = lerp(dst[ 5], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[ 6] = lerp(dst[ 6], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00); + dst[ 7] = lerp(dst[ 7], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[ 8] = lerp(dst[ 8], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00); + dst[ 9] = lerp(dst[ 9], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00); + + + dist_01_04 = DistYCbCr(src[3], src[6]); + dist_03_08 = DistYCbCr(src[5], src[2]); + haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[6]) && (v[7] != v[6]); + haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[2]) && (v[1] != v[2]); + needBlend = (blendResult[3] != BLEND_NONE); + doLineBlend = ( blendResult[3] >= BLEND_DOMINANT || + !((blendResult[2] != BLEND_NONE && !IsPixEqual(src[0], src[6])) || + (blendResult[0] != BLEND_NONE && !IsPixEqual(src[0], src[2])) || + (IsPixEqual(src[6], src[5]) && IsPixEqual(src[5], src[4]) && IsPixEqual(src[4], src[3]) && IsPixEqual(src[3], src[2]) && !IsPixEqual(src[0], src[4])) ) ); + + blendPix = ( DistYCbCr(src[0], src[3]) <= DistYCbCr(src[0], src[5]) ) ? src[3] : src[5]; + dst[ 3] = lerp(dst[ 3], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00); + dst[12] = lerp(dst[12], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00); + dst[13] = lerp(dst[13], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00); + dst[14] = lerp(dst[14], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[15] = lerp(dst[15], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00); + dst[ 4] = lerp(dst[ 4], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[ 5] = lerp(dst[ 5], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00); + dst[ 6] = lerp(dst[ 6], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00); + } + + // select output pixel + float4 res = lerp(lerp(lerp(lerp(dst[ 6], dst[ 7], step(0.25, f.x)), + lerp(dst[ 8], dst[ 9], step(0.75, f.x)), + step(0.50, f.x)), + lerp(lerp(dst[ 5], dst[ 0], step(0.25, f.x)), + lerp(dst[ 1], dst[10], step(0.75, f.x)), + step(0.50, f.x)), + step(0.25, f.y)), + lerp(lerp(lerp(dst[ 4], dst[ 3], step(0.25, f.x)), + lerp(dst[ 2], dst[11], step(0.75, f.x)), + step(0.50, f.x)), + lerp(lerp(dst[15], dst[14], step(0.25, f.x)), + lerp(dst[13], dst[12], step(0.75, f.x)), + step(0.50, f.x)), + step(0.75, f.y)), + step(0.50, f.y)); + + return postdivide_alpha(res); +}; diff --git a/libretro/libretro.cpp b/libretro/libretro.cpp index 7a5bb611b64f..5332f805da64 100644 --- a/libretro/libretro.cpp +++ b/libretro/libretro.cpp @@ -175,7 +175,7 @@ static RetroOption ppsspp_button_preference("ppsspp_button_preference", "Co static RetroOption ppsspp_fast_memory("ppsspp_fast_memory", "Fast Memory (Speedhack)", true); static RetroOption ppsspp_block_transfer_gpu("ppsspp_block_transfer_gpu", "Block Transfer GPU", true); static RetroOption ppsspp_texture_scaling_level("ppsspp_texture_scaling_level", "Texture Scaling Level", { { "1", 1 }, { "2", 2 }, { "3", 3 }, { "4", 4 }, { "5", 5 }, { "0", 0 } }); -static RetroOption ppsspp_texture_scaling_type("ppsspp_texture_scaling_type", "Texture Scaling Type", { { "xbrz", TextureScalerCommon::XBRZ }, { "hybrid", TextureScalerCommon::HYBRID }, { "bicubic", TextureScalerCommon::BICUBIC }, { "hybrid_bicubic", TextureScalerCommon::HYBRID_BICUBIC }, { "SABR", TextureScalerCommon::SABR }, { "gaussian", TextureScalerCommon::GAUSSIAN }, { "cosine", TextureScalerCommon::COSINE } }); +static RetroOption ppsspp_texture_scaling_type("ppsspp_texture_scaling_type", "Texture Scaling Type", { { "xbrz", TextureScalerCommon::XBRZ }, { "hybrid", TextureScalerCommon::HYBRID }, { "bicubic", TextureScalerCommon::BICUBIC }, { "hybrid_bicubic", TextureScalerCommon::HYBRID_BICUBIC }, { "XBR", TextureScalerCommon::XBR }, { "SABR", TextureScalerCommon::SABR }, { "gaussian", TextureScalerCommon::GAUSSIAN }, { "cosine", TextureScalerCommon::COSINE } }); static RetroOption ppsspp_texture_scaling_realtime("ppsspp_texture_scaling_realtime", "Realtime Texture Scaling", false); static RetroOption ppsspp_texture_anisotropic_filtering("ppsspp_texture_anisotropic_filtering", "Anisotropic Filtering", { "off", "1x", "2x", "4x", "8x", "16x" }); static RetroOption ppsspp_texture_deposterize("ppsspp_texture_deposterize", "Texture Deposterize", false); From e74e7ad990cc2a0e519a884c931340a6a6ecccd0 Mon Sep 17 00:00:00 2001 From: aliaspider Date: Wed, 4 Apr 2018 07:48:31 +0100 Subject: [PATCH 3/3] D3D: gpu scalers: - rework xbrz: improve performance and make it scale independant, move the old xbrz code to 4xbrz.hlsl. - better handling of alpha in xbr/xbrz shaders, fixes text scaling with some games. --- GPU/Common/TextureScalerCommon.cpp | 1 + GPU/Common/TextureScalerCommon.h | 2 +- GPU/D3D11/ShaderManagerD3D11.cpp | 1 + GPU/Directx9/PixelShaderGeneratorDX9.cpp | 7 +- UI/GameSettingsScreen.cpp | 2 +- Windows/MainWindowMenu.cpp | 4 + Windows/ppsspp.rc | 3 +- Windows/resource.h | 9 +- assets/scalers/4xbrz.hlsl | 343 +++++++++++++++++ assets/scalers/bicubic.hlsl | 8 + assets/scalers/cosine.hlsl | 7 + assets/scalers/gaussian.hlsl | 7 + assets/scalers/sabr.hlsl | 7 + assets/scalers/xbr.hlsl | 94 ++--- assets/scalers/xbrz.hlsl | 459 +++++++++++------------ libretro/libretro.cpp | 2 +- 16 files changed, 652 insertions(+), 304 deletions(-) create mode 100644 assets/scalers/4xbrz.hlsl diff --git a/GPU/Common/TextureScalerCommon.cpp b/GPU/Common/TextureScalerCommon.cpp index d01dd3e605a2..9ea91f1cce8d 100644 --- a/GPU/Common/TextureScalerCommon.cpp +++ b/GPU/Common/TextureScalerCommon.cpp @@ -558,6 +558,7 @@ bool TextureScalerCommon::ScaleInto(u32 *outputBuf, u32 *src, u32 &dstFmt, int & case XBR: case SABR: // no cpu implementation, fall back to xbrz + case _4XBRZ: case XBRZ: ScaleXBRZ(factor, inputBuf, outputBuf, width, height); break; diff --git a/GPU/Common/TextureScalerCommon.h b/GPU/Common/TextureScalerCommon.h index 8711f77b7cba..56470c2c42ed 100644 --- a/GPU/Common/TextureScalerCommon.h +++ b/GPU/Common/TextureScalerCommon.h @@ -31,7 +31,7 @@ class TextureScalerCommon { bool Scale(u32 *&data, u32 &dstfmt, int &width, int &height, int factor); bool ScaleInto(u32 *out, u32 *src, u32 &dstfmt, int &width, int &height, int factor); - enum { XBRZ = 0, HYBRID = 1, BICUBIC = 2, HYBRID_BICUBIC = 3, XBR = 4, SABR = 5, GAUSSIAN = 6, COSINE = 7 }; + enum { XBRZ = 0, HYBRID = 1, BICUBIC = 2, HYBRID_BICUBIC = 3, _4XBRZ = 4, XBR = 5, SABR = 6, GAUSSIAN = 7, COSINE = 8 }; protected: virtual void ConvertTo8888(u32 format, u32 *source, u32 *&dest, int width, int height) = 0; diff --git a/GPU/D3D11/ShaderManagerD3D11.cpp b/GPU/D3D11/ShaderManagerD3D11.cpp index 2ce967bf2065..6cfa216de0e2 100644 --- a/GPU/D3D11/ShaderManagerD3D11.cpp +++ b/GPU/D3D11/ShaderManagerD3D11.cpp @@ -219,6 +219,7 @@ void ShaderManagerD3D11::GetShaders(int prim, u32 vertType, D3D11VertexShader ** "scalers/hybrid.hlsl", "scalers/bicubic.hlsl", "scalers/hybrid_bicubic.hlsl", + "scalers/4xbrz.hlsl", "scalers/xbr.hlsl", "scalers/sabr.hlsl", "scalers/gaussian.hlsl", diff --git a/GPU/Directx9/PixelShaderGeneratorDX9.cpp b/GPU/Directx9/PixelShaderGeneratorDX9.cpp index 6207ab22d850..093a2fd1317b 100644 --- a/GPU/Directx9/PixelShaderGeneratorDX9.cpp +++ b/GPU/Directx9/PixelShaderGeneratorDX9.cpp @@ -152,12 +152,7 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, char* scalerC if (!isModeClear && doTexture) { if (doTextureAlpha) { - // TODO: check why the [0.0,1.0] clamp is necessary here - WRITE(p, "float4 premultiply_alpha(float4 c) { float a = clamp(c.a, 0.0, 1.0); return float4(c.rgb * a, a); }"); - WRITE(p, "float4 postdivide_alpha(float4 c) { return c.a < 0.0001f? 0.0f : float4(c.rgb / c.a, c.a); }\n"); - } else { - WRITE(p, "#define premultiply_alpha(c) (c)\n"); - WRITE(p, "#define postdivide_alpha(c) (c)\n"); + WRITE(p, "#define BLEND_ALPHA\n"); } WRITE(p, "float4 tex_sample_direct(float2 coord) {\n"); diff --git a/UI/GameSettingsScreen.cpp b/UI/GameSettingsScreen.cpp index 5b297ff8a446..3b5d0fdc5644 100644 --- a/UI/GameSettingsScreen.cpp +++ b/UI/GameSettingsScreen.cpp @@ -378,7 +378,7 @@ void GameSettingsScreen::CreateViews() { }); texScalingChoice->SetDisabledPtr(&g_Config.bSoftwareRendering); - static const char *texScaleAlgos[] = { "xBRZ", "Hybrid", "Bicubic", "Hybrid + Bicubic", "XBR", "SABR", "Gaussian", "Cosine"}; + static const char *texScaleAlgos[] = { "xBRZ", "Hybrid", "Bicubic", "Hybrid + Bicubic", "4xBRZ", "xBR", "SABR", "Gaussian", "Cosine"}; PopupMultiChoice *texScalingType = graphicsSettings->Add(new PopupMultiChoice(&g_Config.iTexScalingType, gr->T("Upscale Type"), texScaleAlgos, 0, ARRAY_SIZE(texScaleAlgos), gr->GetName(), screenManager())); texScalingType->SetDisabledPtr(&g_Config.bSoftwareRendering); diff --git a/Windows/MainWindowMenu.cpp b/Windows/MainWindowMenu.cpp index 73a4ef571cae..d46688cfcb25 100644 --- a/Windows/MainWindowMenu.cpp +++ b/Windows/MainWindowMenu.cpp @@ -346,6 +346,8 @@ namespace MainWindow { TranslateMenuItem(menu, ID_TEXTURESCALING_HYBRID); TranslateMenuItem(menu, ID_TEXTURESCALING_BICUBIC); TranslateMenuItem(menu, ID_TEXTURESCALING_HYBRID_BICUBIC); + TranslateMenuItem(menu, ID_TEXTURESCALING_4XBRZ); + TranslateMenuItem(menu, ID_TEXTURESCALING_XBR); TranslateMenuItem(menu, ID_TEXTURESCALING_SABR); TranslateMenuItem(menu, ID_TEXTURESCALING_GAUSSIAN); TranslateMenuItem(menu, ID_TEXTURESCALING_COSINE); @@ -787,6 +789,7 @@ namespace MainWindow { case ID_TEXTURESCALING_HYBRID: setTexScalingType(TextureScalerCommon::HYBRID); break; case ID_TEXTURESCALING_BICUBIC: setTexScalingType(TextureScalerCommon::BICUBIC); break; case ID_TEXTURESCALING_HYBRID_BICUBIC: setTexScalingType(TextureScalerCommon::HYBRID_BICUBIC); break; + case ID_TEXTURESCALING_4XBRZ: setTexScalingType(TextureScalerCommon::_4XBRZ); break; case ID_TEXTURESCALING_XBR: setTexScalingType(TextureScalerCommon::XBR); break; case ID_TEXTURESCALING_SABR: setTexScalingType(TextureScalerCommon::SABR); break; case ID_TEXTURESCALING_GAUSSIAN: setTexScalingType(TextureScalerCommon::GAUSSIAN); break; @@ -1215,6 +1218,7 @@ namespace MainWindow { ID_TEXTURESCALING_HYBRID, ID_TEXTURESCALING_BICUBIC, ID_TEXTURESCALING_HYBRID_BICUBIC, + ID_TEXTURESCALING_4XBRZ, ID_TEXTURESCALING_XBR, ID_TEXTURESCALING_SABR, ID_TEXTURESCALING_GAUSSIAN, diff --git a/Windows/ppsspp.rc b/Windows/ppsspp.rc index 47a74e494e17..81006823d43d 100644 --- a/Windows/ppsspp.rc +++ b/Windows/ppsspp.rc @@ -613,7 +613,8 @@ BEGIN MENUITEM "Hybrid", ID_TEXTURESCALING_HYBRID MENUITEM "Bicubic", ID_TEXTURESCALING_BICUBIC MENUITEM "Hybrid + Bicubic", ID_TEXTURESCALING_HYBRID_BICUBIC - MENUITEM "XBR", ID_TEXTURESCALING_XBR + MENUITEM "4xBRZ", ID_TEXTURESCALING_4XBRZ + MENUITEM "xBR", ID_TEXTURESCALING_XBR MENUITEM "SABR", ID_TEXTURESCALING_SABR MENUITEM "Gaussian", ID_TEXTURESCALING_GAUSSIAN MENUITEM "Cosine", ID_TEXTURESCALING_COSINE diff --git a/Windows/resource.h b/Windows/resource.h index 9a8c9f763d8e..ba0fbbfc4dcb 100644 --- a/Windows/resource.h +++ b/Windows/resource.h @@ -342,10 +342,11 @@ #define ID_TEXTURESCALING_REALTIME 40177 #define ID_TEXTURESCALING_REALTIME_HC 40178 -#define ID_TEXTURESCALING_XBR 40179 -#define ID_TEXTURESCALING_SABR 40180 -#define ID_TEXTURESCALING_GAUSSIAN 40181 -#define ID_TEXTURESCALING_COSINE 40182 +#define ID_TEXTURESCALING_4XBRZ 40179 +#define ID_TEXTURESCALING_XBR 40180 +#define ID_TEXTURESCALING_SABR 40181 +#define ID_TEXTURESCALING_GAUSSIAN 40182 +#define ID_TEXTURESCALING_COSINE 40183 // Dummy option to let the buffered rendering hotkey cycle through all the options. #define ID_OPTIONS_BUFFEREDRENDERINGDUMMY 40500 diff --git a/assets/scalers/4xbrz.hlsl b/assets/scalers/4xbrz.hlsl new file mode 100644 index 000000000000..6c54b05d53d1 --- /dev/null +++ b/assets/scalers/4xbrz.hlsl @@ -0,0 +1,343 @@ + +// 4xBRZ shader - Copyright (C) 2014-2016 DeSmuME team +// +// This file is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 2 of the License, or +// (at your option) any later version. +// +// This file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with the this software. If not, see . + + +/* + Hyllian's xBR-vertex code and texel mapping + + Copyright (C) 2011/2016 Hyllian - sergiogdb@gmail.com + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +#ifdef BLEND_ALPHA +float4 premultiply_alpha(float4 c) { float a = clamp(c.a, 0.0, 1.0); return float4(c.rgb * a, a); } +float4 postdivide_alpha(float4 c) { return c.a < 0.001f? float4(0.0f,0.0f,0.0f,0.0f) : float4(c.rgb / c.a, c.a); } +#else +#define premultiply_alpha(c) (c) +#define postdivide_alpha(c) (c) +#endif + +#define BLEND_NONE 0 +#define BLEND_NORMAL 1 +#define BLEND_DOMINANT 2 +#define LUMINANCE_WEIGHT 1.0 +#define EQUAL_COLOR_TOLERANCE 30.0/255.0 +#define STEEP_DIRECTION_THRESHOLD 2.2 +#define DOMINANT_DIRECTION_THRESHOLD 3.6 + +float reduce(float4 color) +{ + return dot(color.rgb, float3(65536.0, 256.0, 1.0)); +} + +float DistYCbCr(float4 pixA, float4 pixB) +{ + const float3 w = float3(0.2627, 0.6780, 0.0593); + const float scaleB = 0.5 / (1.0 - w.b); + const float scaleR = 0.5 / (1.0 - w.r); + float4 diff = pixA - pixB; + float Y = dot(diff.rgb, w); + float Cb = scaleB * (diff.b - Y); + float Cr = scaleR * (diff.r - Y); + + return sqrt( ((LUMINANCE_WEIGHT * Y) * (LUMINANCE_WEIGHT * Y)) + (Cb * Cb) + (Cr * Cr) + (diff.a * diff.a)); +} + +bool IsPixEqual(const float4 pixA, const float4 pixB) +{ + return (DistYCbCr(pixA, pixB) < EQUAL_COLOR_TOLERANCE); +} + +bool IsBlendingNeeded(const int4 blend) +{ + return any(blend - BLEND_NONE); +} + +//--------------------------------------- +// Input Pixel Mapping: --|21|22|23|-- +// 19|06|07|08|09 +// 18|05|00|01|10 +// 17|04|03|02|11 +// --|15|14|13|-- +// +// Output Pixel Mapping: 20|21|22|23|24|25 +// 19|06|07|08|09|26 +// 18|05|00|01|10|27 +// 17|04|03|02|11|28 +// 16|15|14|13|12|29 +// 35|34|33|32|31|30 + +float4 tex_sample(float2 coord) +{ + float dx = u_texSize.z; + float dy = u_texSize.w; + + // A1 B1 C1 + // A0 A B C C4 + // D0 D E F F4 + // G0 G H I I4 + // G5 H5 I5 + + float4 t1 = coord.xxxy + float4( -dx, 0.0, dx,-2.0*dy); // A1 B1 C1 + float4 t2 = coord.xxxy + float4( -dx, 0.0, dx, -dy); // A B C + float4 t3 = coord.xxxy + float4( -dx, 0.0, dx, 0.0); // D E F + float4 t4 = coord.xxxy + float4( -dx, 0.0, dx, dy); // G H I + float4 t5 = coord.xxxy + float4( -dx, 0.0, dx, 2.0*dy); // G5 H5 I5 + float4 t6 = coord.xyyy + float4(-2.0*dx,-dy, 0.0, dy); // A0 D0 G0 + float4 t7 = coord.xyyy + float4( 2.0*dx,-dy, 0.0, dy); // C4 F4 I4 + + float2 f = frac(coord.xy * u_texSize.xy); + + //--------------------------------------- + // Input Pixel Mapping: |21|22|23| + // 19|06|07|08|09 + // 18|05|00|01|10 + // 17|04|03|02|11 + // |15|14|13| + + float4 src[25]; + + src[21] = premultiply_alpha(tex_sample_direct(t1.xw)); + src[22] = premultiply_alpha(tex_sample_direct(t1.yw)); + src[23] = premultiply_alpha(tex_sample_direct(t1.zw)); + src[ 6] = premultiply_alpha(tex_sample_direct(t2.xw)); + src[ 7] = premultiply_alpha(tex_sample_direct(t2.yw)); + src[ 8] = premultiply_alpha(tex_sample_direct(t2.zw)); + src[ 5] = premultiply_alpha(tex_sample_direct(t3.xw)); + src[ 0] = premultiply_alpha(tex_sample_direct(t3.yw)); + src[ 1] = premultiply_alpha(tex_sample_direct(t3.zw)); + src[ 4] = premultiply_alpha(tex_sample_direct(t4.xw)); + src[ 3] = premultiply_alpha(tex_sample_direct(t4.yw)); + src[ 2] = premultiply_alpha(tex_sample_direct(t4.zw)); + src[15] = premultiply_alpha(tex_sample_direct(t5.xw)); + src[14] = premultiply_alpha(tex_sample_direct(t5.yw)); + src[13] = premultiply_alpha(tex_sample_direct(t5.zw)); + src[19] = premultiply_alpha(tex_sample_direct(t6.xy)); + src[18] = premultiply_alpha(tex_sample_direct(t6.xz)); + src[17] = premultiply_alpha(tex_sample_direct(t6.xw)); + src[ 9] = premultiply_alpha(tex_sample_direct(t7.xy)); + src[10] = premultiply_alpha(tex_sample_direct(t7.xz)); + src[11] = premultiply_alpha(tex_sample_direct(t7.xw)); + + float v[9]; + v[0] = reduce(src[0]); + v[1] = reduce(src[1]); + v[2] = reduce(src[2]); + v[3] = reduce(src[3]); + v[4] = reduce(src[4]); + v[5] = reduce(src[5]); + v[6] = reduce(src[6]); + v[7] = reduce(src[7]); + v[8] = reduce(src[8]); + + int4 blendResult = BLEND_NONE; + + // Preprocess corners + // Pixel Tap Mapping: --|--|--|--|-- + // --|--|07|08|-- + // --|05|00|01|10 + // --|04|03|02|11 + // --|--|14|13|-- + // Corner (1, 1) + if ( ((v[0] == v[1] && v[3] == v[2]) || (v[0] == v[3] && v[1] == v[2])) == false) + { + float dist_03_01 = DistYCbCr(src[ 4], src[ 0]) + DistYCbCr(src[ 0], src[ 8]) + DistYCbCr(src[14], src[ 2]) + DistYCbCr(src[ 2], src[10]) + (4.0 * DistYCbCr(src[ 3], src[ 1])); + float dist_00_02 = DistYCbCr(src[ 5], src[ 3]) + DistYCbCr(src[ 3], src[13]) + DistYCbCr(src[ 7], src[ 1]) + DistYCbCr(src[ 1], src[11]) + (4.0 * DistYCbCr(src[ 0], src[ 2])); + bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_03_01) < dist_00_02; + blendResult[2] = ((dist_03_01 < dist_00_02) && (v[0] != v[1]) && (v[0] != v[3])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + // Pixel Tap Mapping: --|--|--|--|-- + // --|06|07|--|-- + // 18|05|00|01|-- + // 17|04|03|02|-- + // --|15|14|--|-- + // Corner (0, 1) + if ( ((v[5] == v[0] && v[4] == v[3]) || (v[5] == v[4] && v[0] == v[3])) == false) + { + float dist_04_00 = DistYCbCr(src[17], src[ 5]) + DistYCbCr(src[ 5], src[ 7]) + DistYCbCr(src[15], src[ 3]) + DistYCbCr(src[ 3], src[ 1]) + (4.0 * DistYCbCr(src[ 4], src[ 0])); + float dist_05_03 = DistYCbCr(src[18], src[ 4]) + DistYCbCr(src[ 4], src[14]) + DistYCbCr(src[ 6], src[ 0]) + DistYCbCr(src[ 0], src[ 2]) + (4.0 * DistYCbCr(src[ 5], src[ 3])); + bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_05_03) < dist_04_00; + blendResult[3] = ((dist_04_00 > dist_05_03) && (v[0] != v[5]) && (v[0] != v[3])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + // Pixel Tap Mapping: --|--|22|23|-- + // --|06|07|08|09 + // --|05|00|01|10 + // --|--|03|02|-- + // --|--|--|--|-- + // Corner (1, 0) + if ( ((v[7] == v[8] && v[0] == v[1]) || (v[7] == v[0] && v[8] == v[1])) == false) + { + float dist_00_08 = DistYCbCr(src[ 5], src[ 7]) + DistYCbCr(src[ 7], src[23]) + DistYCbCr(src[ 3], src[ 1]) + DistYCbCr(src[ 1], src[ 9]) + (4.0 * DistYCbCr(src[ 0], src[ 8])); + float dist_07_01 = DistYCbCr(src[ 6], src[ 0]) + DistYCbCr(src[ 0], src[ 2]) + DistYCbCr(src[22], src[ 8]) + DistYCbCr(src[ 8], src[10]) + (4.0 * DistYCbCr(src[ 7], src[ 1])); + bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_07_01) < dist_00_08; + blendResult[1] = ((dist_00_08 > dist_07_01) && (v[0] != v[7]) && (v[0] != v[1])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + // Pixel Tap Mapping: --|21|22|--|-- + // 19|06|07|08|-- + // 18|05|00|01|-- + // --|04|03|--|-- + // --|--|--|--|-- + // Corner (0, 0) + if ( ((v[6] == v[7] && v[5] == v[0]) || (v[6] == v[5] && v[7] == v[0])) == false) + { + float dist_05_07 = DistYCbCr(src[18], src[ 6]) + DistYCbCr(src[ 6], src[22]) + DistYCbCr(src[ 4], src[ 0]) + DistYCbCr(src[ 0], src[ 8]) + (4.0 * DistYCbCr(src[ 5], src[ 7])); + float dist_06_00 = DistYCbCr(src[19], src[ 5]) + DistYCbCr(src[ 5], src[ 3]) + DistYCbCr(src[21], src[ 7]) + DistYCbCr(src[ 7], src[ 1]) + (4.0 * DistYCbCr(src[ 6], src[ 0])); + bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_05_07) < dist_06_00; + blendResult[0] = ((dist_05_07 < dist_06_00) && (v[0] != v[5]) && (v[0] != v[7])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + float4 dst[16]; + dst[ 0] = src[0]; + dst[ 1] = src[0]; + dst[ 2] = src[0]; + dst[ 3] = src[0]; + dst[ 4] = src[0]; + dst[ 5] = src[0]; + dst[ 6] = src[0]; + dst[ 7] = src[0]; + dst[ 8] = src[0]; + dst[ 9] = src[0]; + dst[10] = src[0]; + dst[11] = src[0]; + dst[12] = src[0]; + dst[13] = src[0]; + dst[14] = src[0]; + dst[15] = src[0]; + + // Scale pixel + if (IsBlendingNeeded(blendResult) == true) + { + float dist_01_04 = DistYCbCr(src[1], src[4]); + float dist_03_08 = DistYCbCr(src[3], src[8]); + bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[4]) && (v[5] != v[4]); + bool haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[8]) && (v[7] != v[8]); + bool needBlend = (blendResult[2] != BLEND_NONE); + bool doLineBlend = ( blendResult[2] >= BLEND_DOMINANT || + ((blendResult[1] != BLEND_NONE && !IsPixEqual(src[0], src[4])) || + (blendResult[3] != BLEND_NONE && !IsPixEqual(src[0], src[8])) || + (IsPixEqual(src[4], src[3]) && IsPixEqual(src[3], src[2]) && IsPixEqual(src[2], src[1]) && IsPixEqual(src[1], src[8]) && IsPixEqual(src[0], src[2]) == false) ) == false ); + + float4 blendPix = ( DistYCbCr(src[0], src[1]) <= DistYCbCr(src[0], src[3]) ) ? src[1] : src[3]; + dst[ 2] = lerp(dst[ 2], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00); + dst[ 9] = lerp(dst[ 9], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00); + dst[10] = lerp(dst[10], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00); + dst[11] = lerp(dst[11], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[12] = lerp(dst[12], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00); + dst[13] = lerp(dst[13], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[14] = lerp(dst[14], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00); + dst[15] = lerp(dst[15], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00); + + dist_01_04 = DistYCbCr(src[7], src[2]); + dist_03_08 = DistYCbCr(src[1], src[6]); + haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[2]) && (v[3] != v[2]); + haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[6]) && (v[5] != v[6]); + needBlend = (blendResult[1] != BLEND_NONE); + doLineBlend = ( blendResult[1] >= BLEND_DOMINANT || + !((blendResult[0] != BLEND_NONE && !IsPixEqual(src[0], src[2])) || + (blendResult[2] != BLEND_NONE && !IsPixEqual(src[0], src[6])) || + (IsPixEqual(src[2], src[1]) && IsPixEqual(src[1], src[8]) && IsPixEqual(src[8], src[7]) && IsPixEqual(src[7], src[6]) && !IsPixEqual(src[0], src[8])) ) ); + + blendPix = ( DistYCbCr(src[0], src[7]) <= DistYCbCr(src[0], src[1]) ) ? src[7] : src[1]; + dst[ 1] = lerp(dst[ 1], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00); + dst[ 6] = lerp(dst[ 6], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00); + dst[ 7] = lerp(dst[ 7], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00); + dst[ 8] = lerp(dst[ 8], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[ 9] = lerp(dst[ 9], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00); + dst[10] = lerp(dst[10], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[11] = lerp(dst[11], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00); + dst[12] = lerp(dst[12], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00); + + dist_01_04 = DistYCbCr(src[5], src[8]); + dist_03_08 = DistYCbCr(src[7], src[4]); + haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[8]) && (v[1] != v[8]); + haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[4]) && (v[3] != v[4]); + needBlend = (blendResult[0] != BLEND_NONE); + doLineBlend = ( blendResult[0] >= BLEND_DOMINANT || + !((blendResult[3] != BLEND_NONE && !IsPixEqual(src[0], src[8])) || + (blendResult[1] != BLEND_NONE && !IsPixEqual(src[0], src[4])) || + (IsPixEqual(src[8], src[7]) && IsPixEqual(src[7], src[6]) && IsPixEqual(src[6], src[5]) && IsPixEqual(src[5], src[4]) && !IsPixEqual(src[0], src[6])) ) ); + + blendPix = ( DistYCbCr(src[0], src[5]) <= DistYCbCr(src[0], src[7]) ) ? src[5] : src[7]; + dst[ 0] = lerp(dst[ 0], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00); + dst[15] = lerp(dst[15], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00); + dst[ 4] = lerp(dst[ 4], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00); + dst[ 5] = lerp(dst[ 5], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[ 6] = lerp(dst[ 6], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00); + dst[ 7] = lerp(dst[ 7], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[ 8] = lerp(dst[ 8], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00); + dst[ 9] = lerp(dst[ 9], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00); + + + dist_01_04 = DistYCbCr(src[3], src[6]); + dist_03_08 = DistYCbCr(src[5], src[2]); + haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[6]) && (v[7] != v[6]); + haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[2]) && (v[1] != v[2]); + needBlend = (blendResult[3] != BLEND_NONE); + doLineBlend = ( blendResult[3] >= BLEND_DOMINANT || + !((blendResult[2] != BLEND_NONE && !IsPixEqual(src[0], src[6])) || + (blendResult[0] != BLEND_NONE && !IsPixEqual(src[0], src[2])) || + (IsPixEqual(src[6], src[5]) && IsPixEqual(src[5], src[4]) && IsPixEqual(src[4], src[3]) && IsPixEqual(src[3], src[2]) && !IsPixEqual(src[0], src[4])) ) ); + + blendPix = ( DistYCbCr(src[0], src[3]) <= DistYCbCr(src[0], src[5]) ) ? src[3] : src[5]; + dst[ 3] = lerp(dst[ 3], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00); + dst[12] = lerp(dst[12], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00); + dst[13] = lerp(dst[13], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00); + dst[14] = lerp(dst[14], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[15] = lerp(dst[15], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00); + dst[ 4] = lerp(dst[ 4], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[ 5] = lerp(dst[ 5], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00); + dst[ 6] = lerp(dst[ 6], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00); + } + + // select output pixel + float4 res = lerp(lerp(lerp(lerp(dst[ 6], dst[ 7], step(0.25, f.x)), + lerp(dst[ 8], dst[ 9], step(0.75, f.x)), + step(0.50, f.x)), + lerp(lerp(dst[ 5], dst[ 0], step(0.25, f.x)), + lerp(dst[ 1], dst[10], step(0.75, f.x)), + step(0.50, f.x)), + step(0.25, f.y)), + lerp(lerp(lerp(dst[ 4], dst[ 3], step(0.25, f.x)), + lerp(dst[ 2], dst[11], step(0.75, f.x)), + step(0.50, f.x)), + lerp(lerp(dst[15], dst[14], step(0.25, f.x)), + lerp(dst[13], dst[12], step(0.75, f.x)), + step(0.50, f.x)), + step(0.75, f.y)), + step(0.50, f.y)); + + return postdivide_alpha(res); +}; diff --git a/assets/scalers/bicubic.hlsl b/assets/scalers/bicubic.hlsl index 8a20886e9d04..4848632e1048 100644 --- a/assets/scalers/bicubic.hlsl +++ b/assets/scalers/bicubic.hlsl @@ -1,4 +1,12 @@ +#ifdef BLEND_ALPHA +float4 premultiply_alpha(float4 c) { float a = clamp(c.a, 0.0, 1.0); return float4(c.rgb * a, a); } +float4 postdivide_alpha(float4 c) { return c.a < 0.001f? float4(0.0f,0.0f,0.0f,0.0f) : float4(c.rgb / c.a, c.a); } +#else +#define premultiply_alpha(c) (c) +#define postdivide_alpha(c) (c) +#endif + // generate the value of a Mitchell-Netravali scaling spline at distance d, with parameters A and B // B=1 C=0 : cubic B spline (very smooth) // B=C=1/3 : recommended for general upscaling diff --git a/assets/scalers/cosine.hlsl b/assets/scalers/cosine.hlsl index 2481529f940a..a3fd5dd1cfff 100644 --- a/assets/scalers/cosine.hlsl +++ b/assets/scalers/cosine.hlsl @@ -1,3 +1,10 @@ +#ifdef BLEND_ALPHA +float4 premultiply_alpha(float4 c) { float a = clamp(c.a, 0.0, 1.0); return float4(c.rgb * a, a); } +float4 postdivide_alpha(float4 c) { return c.a < 0.001f? float4(0.0f,0.0f,0.0f,0.0f) : float4(c.rgb / c.a, c.a); } +#else +#define premultiply_alpha(c) (c) +#define postdivide_alpha(c) (c) +#endif #define sharpness 1.0 #define pi 3.14159265358 diff --git a/assets/scalers/gaussian.hlsl b/assets/scalers/gaussian.hlsl index f57cd4ddcf58..b01982b10b2a 100644 --- a/assets/scalers/gaussian.hlsl +++ b/assets/scalers/gaussian.hlsl @@ -1,3 +1,10 @@ +#ifdef BLEND_ALPHA +float4 premultiply_alpha(float4 c) { float a = clamp(c.a, 0.0, 1.0); return float4(c.rgb * a, a); } +float4 postdivide_alpha(float4 c) { return c.a < 0.001f? float4(0.0f,0.0f,0.0f,0.0f) : float4(c.rgb / c.a, c.a); } +#else +#define premultiply_alpha(c) (c) +#define postdivide_alpha(c) (c) +#endif #define sharpness 1.0 #define pi 3.14159265358 diff --git a/assets/scalers/sabr.hlsl b/assets/scalers/sabr.hlsl index 05d0222e694e..a62e0c43a239 100644 --- a/assets/scalers/sabr.hlsl +++ b/assets/scalers/sabr.hlsl @@ -1,3 +1,10 @@ +#ifdef BLEND_ALPHA +float4 premultiply_alpha(float4 c) { float a = clamp(c.a, 0.0, 1.0); return float4(c.rgb * a, a); } +float4 postdivide_alpha(float4 c) { return c.a < 0.001f? float4(0.0f,0.0f,0.0f,0.0f) : float4(c.rgb / c.a, c.a); } +#else +#define premultiply_alpha(c) (c) +#define postdivide_alpha(c) (c) +#endif float c_df(float4 c1, float4 c2) { float3 df = abs(c1.rgb - c2.rgb); diff --git a/assets/scalers/xbr.hlsl b/assets/scalers/xbr.hlsl index 546831f36c1a..6dc9a85ac68f 100644 --- a/assets/scalers/xbr.hlsl +++ b/assets/scalers/xbr.hlsl @@ -25,6 +25,14 @@ Incorporates some of the ideas from SABR shader. Thanks to Joshua Street. */ +#ifdef BLEND_ALPHA +float4 premultiply_alpha(float4 c) { float a = clamp(c.a, 0.0, 1.0); return float4(c.rgb * a, a); } +float4 postdivide_alpha(float4 c) { return c.a < 0.001f? float4(0.0f,0.0f,0.0f,0.0f) : float4(c.rgb / c.a, c.a); } +#else +#define premultiply_alpha(c) (c) +#define postdivide_alpha(c) (c) +#endif + #define XBR_Y_WEIGHT 48.0 // 0.0 .. 100.0 #define XBR_EQ_THRESHOLD 15.0 // 0.0 .. 50.0 #define XBR_LV1_COEFFICIENT 0.5 // 0.0 .. 30.0 @@ -35,7 +43,7 @@ #define XBR_SCALE 4.0 static const float coef = 2.0; -static const float3 rgbw = float3(14.352, 28.176, 5.472); +static const float4 rgbw = float4(14.352, 28.176, 5.472, 50); static const float4 eq_threshold = float4(15.0, 15.0, 15.0, 15.0); static const float4 delta = float4(1.0/XBR_SCALE, 1.0/XBR_SCALE, 1.0/XBR_SCALE, 1.0/XBR_SCALE); @@ -53,7 +61,7 @@ static const float4 By = float4( 2.0, 0.5, -2.0,-0.5 ); static const float4 Cy = float4( 2.0, 0.0, -1.0, 0.5 ); static const float4 Ci = float4(0.25, 0.25, 0.25, 0.25); -static const float3 Y = float3(0.2126, 0.7152, 0.0722); +static const float4 Y = float4(0.2126, 0.7152, 0.0722, 1.0); // Difference between vector components. float4 df(float4 A, float4 B) { return abs(A-B); } @@ -76,9 +84,9 @@ float4 weighted_distance(float4 a, float4 b, float4 c, float4 d, float4 e, float return (df(a,b) + df(a,c) + df(d,e) + df(d,f) + df(i,j) + df(k,l) + 2.0*df(g,h)); } -float c_df(float3 c1, float3 c2) { - float3 df = abs(c1 - c2); - return df.r + df.g + df.b; +float c_df(float4 c1, float4 c2) { + float4 df = abs(c1 - c2); + return df.r + df.g + df.b + df.a; } float4 tex_sample(float2 coord) { @@ -98,32 +106,32 @@ float4 tex_sample(float2 coord) { float2 fp = frac(coord*u_texSize.xy); - float4 A1 = premultiply_alpha(tex_sample_direct(t1.xw)); - float4 B1 = premultiply_alpha(tex_sample_direct(t1.yw)); - float4 C1 = premultiply_alpha(tex_sample_direct(t1.zw)); - float4 A = premultiply_alpha(tex_sample_direct(t2.xw)); - float4 B = premultiply_alpha(tex_sample_direct(t2.yw)); - float4 C = premultiply_alpha(tex_sample_direct(t2.zw)); - float4 D = premultiply_alpha(tex_sample_direct(t3.xw)); - float4 E = premultiply_alpha(tex_sample_direct(t3.yw)); - float4 F = premultiply_alpha(tex_sample_direct(t3.zw)); - float4 G = premultiply_alpha(tex_sample_direct(t4.xw)); - float4 H = premultiply_alpha(tex_sample_direct(t4.yw)); - float4 I = premultiply_alpha(tex_sample_direct(t4.zw)); - float4 G5 = premultiply_alpha(tex_sample_direct(t5.xw)); - float4 H5 = premultiply_alpha(tex_sample_direct(t5.yw)); - float4 I5 = premultiply_alpha(tex_sample_direct(t5.zw)); - float4 A0 = premultiply_alpha(tex_sample_direct(t6.xy)); - float4 D0 = premultiply_alpha(tex_sample_direct(t6.xz)); - float4 G0 = premultiply_alpha(tex_sample_direct(t6.xw)); - float4 C4 = premultiply_alpha(tex_sample_direct(t7.xy)); - float4 F4 = premultiply_alpha(tex_sample_direct(t7.xz)); - float4 I4 = premultiply_alpha(tex_sample_direct(t7.xw)); - - float4 b = float4(dot(B.rgb ,rgbw), dot(D.rgb ,rgbw), dot(H.rgb ,rgbw), dot(F.rgb ,rgbw)); - float4 c = float4(dot(C.rgb ,rgbw), dot(A.rgb ,rgbw), dot(G.rgb ,rgbw), dot(I.rgb ,rgbw)); + float4 A1 = tex_sample_direct(t1.xw); + float4 B1 = tex_sample_direct(t1.yw); + float4 C1 = tex_sample_direct(t1.zw); + float4 A = tex_sample_direct(t2.xw); + float4 B = tex_sample_direct(t2.yw); + float4 C = tex_sample_direct(t2.zw); + float4 D = tex_sample_direct(t3.xw); + float4 E = tex_sample_direct(t3.yw); + float4 F = tex_sample_direct(t3.zw); + float4 G = tex_sample_direct(t4.xw); + float4 H = tex_sample_direct(t4.yw); + float4 I = tex_sample_direct(t4.zw); + float4 G5 = tex_sample_direct(t5.xw); + float4 H5 = tex_sample_direct(t5.yw); + float4 I5 = tex_sample_direct(t5.zw); + float4 A0 = tex_sample_direct(t6.xy); + float4 D0 = tex_sample_direct(t6.xz); + float4 G0 = tex_sample_direct(t6.xw); + float4 C4 = tex_sample_direct(t7.xy); + float4 F4 = tex_sample_direct(t7.xz); + float4 I4 = tex_sample_direct(t7.xw); + + float4 b = float4(dot(B, rgbw), dot(D, rgbw), dot(H, rgbw), dot(F, rgbw)); + float4 c = float4(dot(C, rgbw), dot(A, rgbw), dot(G, rgbw), dot(I, rgbw)); float4 d = b.yzwx; - float4 e = dot(E.rgb,rgbw); + float4 e = dot(E, rgbw); float4 f = b.wxyz; float4 g = c.zwxy; float4 h = b.zwxy; @@ -133,13 +141,13 @@ float4 tex_sample(float2 coord) { float y_weight = XBR_Y_WEIGHT; #ifdef SMALL_DETAILS - i4 = mul(float4x3(I4.rgb, C1.rgb, A0.rgb, G5.rgb), y_weight * Y); - i5 = mul(float4x3(I5.rgb, C4.rgb, A1.rgb, G0.rgb), y_weight * Y); - h5 = mul(float4x3(H5.rgb, F4.rgb, B1.rgb, D0.rgb), y_weight * Y); + i4 = mul(float4x4(I4, C1, A0, G5), y_weight * Y); + i5 = mul(float4x4(I5, C4, A1, G0), y_weight * Y); + h5 = mul(float4x4(H5, F4, B1, D0), y_weight * Y); #else - i4 = float4(dot(I4.rgb,rgbw), dot(C1.rgb,rgbw), dot(A0.rgb,rgbw), dot(G5.rgb,rgbw)); - i5 = float4(dot(I5.rgb,rgbw), dot(C4.rgb,rgbw), dot(A1.rgb,rgbw), dot(G0.rgb,rgbw)); - h5 = float4(dot(H5.rgb,rgbw), dot(F4.rgb,rgbw), dot(B1.rgb,rgbw), dot(D0.rgb,rgbw)); + i4 = float4(dot(I4,rgbw), dot(C1,rgbw), dot(A0,rgbw), dot(G5,rgbw)); + i5 = float4(dot(I5,rgbw), dot(C4,rgbw), dot(A1,rgbw), dot(G0,rgbw)); + h5 = float4(dot(H5,rgbw), dot(F4,rgbw), dot(B1,rgbw), dot(D0,rgbw)); #endif f4 = h5.yzwx; @@ -197,14 +205,14 @@ float4 tex_sample(float2 coord) { float4 maximos = max(max(fx30, fx60), fx45); #endif - float4 res1 = E; - res1 = lerp(res1, lerp(H, F, px.x), maximos.x); - res1 = lerp(res1, lerp(B, D, px.z), maximos.z); + float4 res1 = premultiply_alpha(E); + res1 = lerp(res1, premultiply_alpha(lerp(H, F, px.x)), maximos.x); + res1 = lerp(res1, premultiply_alpha(lerp(B, D, px.z)), maximos.z); - float4 res2 = E; - res2 = lerp(res2, lerp(F, B, px.y), maximos.y); - res2 = lerp(res2, lerp(D, H, px.w), maximos.w); + float4 res2 = premultiply_alpha(E); + res2 = lerp(res2, premultiply_alpha(lerp(F, B, px.y)), maximos.y); + res2 = lerp(res2, premultiply_alpha(lerp(D, H, px.w)), maximos.w); - float4 res = lerp(res1, res2, step(c_df(E.rgb, res1.rgb), c_df(E.rgb, res2.rgb))); + float4 res = lerp(res1, res2, step(c_df(E, res1), c_df(E, res2))); return postdivide_alpha(res); } diff --git a/assets/scalers/xbrz.hlsl b/assets/scalers/xbrz.hlsl index 43745eb453ca..4031722b8587 100644 --- a/assets/scalers/xbrz.hlsl +++ b/assets/scalers/xbrz.hlsl @@ -1,3 +1,6 @@ +// xBRZ-freescale +// based on : + // 4xBRZ shader - Copyright (C) 2014-2016 DeSmuME team // @@ -46,23 +49,38 @@ #define EQUAL_COLOR_TOLERANCE 30.0/255.0 #define STEEP_DIRECTION_THRESHOLD 2.2 #define DOMINANT_DIRECTION_THRESHOLD 3.6 - -float reduce(float4 color) -{ - return dot(color.rgb, float3(65536.0, 256.0, 1.0)); -} +#define BLEND_ALPHA + +#ifdef BLEND_ALPHA +// TODO: check why the [0.0,1.0] clamp is necessary here +float4 premultiply_alpha(float4 c) { float a = clamp(c.a, 0.0, 1.0); return float4(c.rgb * a, a); } +float4 postdivide_alpha(float4 c) { return c.a < 0.001f? float4(0.0f,0.0f,0.0f,0.0f) : float4(c.rgb / c.a, c.a); } +#define eq(a,b) all(a == b) +#define neq(a,b) any(a != b) +#else +#define premultiply_alpha(c) (c) +#define postdivide_alpha(c) (c) +#define eq(a,b) all(a.rgb == b.rgb) +#define neq(a,b) any(a.rgb != b.rgb) +#endif + +#define P(x,y) tex_sample_direct(coord + u_texSize.zw * float2(x, y)) float DistYCbCr(float4 pixA, float4 pixB) { const float3 w = float3(0.2627, 0.6780, 0.0593); const float scaleB = 0.5 / (1.0 - w.b); const float scaleR = 0.5 / (1.0 - w.r); - float3 diff = pixA.rgb - pixB.rgb; - float Y = dot(diff, w); + float4 diff = pixA - pixB; + float Y = dot(diff.rgb, w); float Cb = scaleB * (diff.b - Y); float Cr = scaleR * (diff.r - Y); - return sqrt( ((LUMINANCE_WEIGHT * Y) * (LUMINANCE_WEIGHT * Y)) + (Cb * Cb) + (Cr * Cr) ); +#ifdef BLEND_ALPHA + return sqrt(((LUMINANCE_WEIGHT * Y) * (LUMINANCE_WEIGHT * Y)) + (Cb * Cb) + (Cr * Cr) + (diff.a * diff.a)); +#else + return sqrt(((LUMINANCE_WEIGHT * Y) * (LUMINANCE_WEIGHT * Y)) + (Cb * Cb) + (Cr * Cr)); +#endif } bool IsPixEqual(const float4 pixA, const float4 pixB) @@ -70,266 +88,213 @@ bool IsPixEqual(const float4 pixA, const float4 pixB) return (DistYCbCr(pixA, pixB) < EQUAL_COLOR_TOLERANCE); } -bool IsBlendingNeeded(const int4 blend) +float get_left_ratio(float2 center, float2 origin, float2 direction, float2 scale) { - return any(blend - BLEND_NONE); + float2 P0 = center - origin; + float2 proj = direction * (dot(P0, direction) / dot(direction, direction)); + float2 distv = P0 - proj; + float2 orth = float2(-direction.y, direction.x); + float side = sign(dot(P0, orth)); + float v = side * length(distv * scale); + +// return step(0, v); + return smoothstep(-sqrt(2.0)/2.0, sqrt(2.0)/2.0, v); } -//--------------------------------------- -// Input Pixel Mapping: --|21|22|23|-- -// 19|06|07|08|09 -// 18|05|00|01|10 -// 17|04|03|02|11 -// --|15|14|13|-- -// -// Output Pixel Mapping: 20|21|22|23|24|25 -// 19|06|07|08|09|26 -// 18|05|00|01|10|27 -// 17|04|03|02|11|28 -// 16|15|14|13|12|29 -// 35|34|33|32|31|30 - -float4 tex_sample(float2 coord) -{ - float dx = u_texSize.z; - float dy = u_texSize.w; - - // A1 B1 C1 - // A0 A B C C4 - // D0 D E F F4 - // G0 G H I I4 - // G5 H5 I5 +float4 tex_sample(float2 coord) { + //--------------------------------------- + // Input Pixel Mapping: -|x|x|x|- + // x|A|B|C|x + // x|D|E|F|x + // x|G|H|I|x + // -|x|x|x|- + + float2 scale = u_texSize.zw / float2(ddx(coord.x), ddy(coord.y)); + float2 pos = frac(coord * u_texSize.xy) - float2(0.5, 0.5); + float4 A = P(-1,-1); + float4 B = P( 0,-1); + float4 C = P( 1,-1); + float4 D = P(-1, 0); + float4 E = P( 0, 0); + float4 F = P( 1, 0); + float4 G = P(-1, 1); + float4 H = P( 0, 1); + float4 I = P( 1, 1); + + // blendResult Mapping: x|y| + // w|z| + int4 blendResult = int4(BLEND_NONE,BLEND_NONE,BLEND_NONE,BLEND_NONE); - float4 t1 = coord.xxxy + float4( -dx, 0.0, dx,-2.0*dy); // A1 B1 C1 - float4 t2 = coord.xxxy + float4( -dx, 0.0, dx, -dy); // A B C - float4 t3 = coord.xxxy + float4( -dx, 0.0, dx, 0.0); // D E F - float4 t4 = coord.xxxy + float4( -dx, 0.0, dx, dy); // G H I - float4 t5 = coord.xxxy + float4( -dx, 0.0, dx, 2.0*dy); // G5 H5 I5 - float4 t6 = coord.xyyy + float4(-2.0*dx,-dy, 0.0, dy); // A0 D0 G0 - float4 t7 = coord.xyyy + float4( 2.0*dx,-dy, 0.0, dy); // C4 F4 I4 + // Preprocess corners + // Pixel Tap Mapping: -|-|-|-|- + // -|-|B|C|- + // -|D|E|F|x + // -|G|H|I|x + // -|-|x|x|- + if (!((eq(E,F) && eq(H,I)) || (eq(E,H) && eq(F,I)))) + { + float dist_H_F = DistYCbCr(G, E) + DistYCbCr(E, C) + DistYCbCr(P(0,2), I) + DistYCbCr(I, P(2,0)) + (4.0 * DistYCbCr(H, F)); + float dist_E_I = DistYCbCr(D, H) + DistYCbCr(H, P(1,2)) + DistYCbCr(B, F) + DistYCbCr(F, P(2,1)) + (4.0 * DistYCbCr(E, I)); + bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_H_F) < dist_E_I; + blendResult.z = ((dist_H_F < dist_E_I) && neq(E,F) && neq(E,H)) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } - float2 f = frac(coord.xy * u_texSize.xy); - //--------------------------------------- - // Input Pixel Mapping: |21|22|23| - // 19|06|07|08|09 - // 18|05|00|01|10 - // 17|04|03|02|11 - // |15|14|13| - - float4 src[25]; - - src[21] = premultiply_alpha(tex_sample_direct(t1.xw)); - src[22] = premultiply_alpha(tex_sample_direct(t1.yw)); - src[23] = premultiply_alpha(tex_sample_direct(t1.zw)); - src[ 6] = premultiply_alpha(tex_sample_direct(t2.xw)); - src[ 7] = premultiply_alpha(tex_sample_direct(t2.yw)); - src[ 8] = premultiply_alpha(tex_sample_direct(t2.zw)); - src[ 5] = premultiply_alpha(tex_sample_direct(t3.xw)); - src[ 0] = premultiply_alpha(tex_sample_direct(t3.yw)); - src[ 1] = premultiply_alpha(tex_sample_direct(t3.zw)); - src[ 4] = premultiply_alpha(tex_sample_direct(t4.xw)); - src[ 3] = premultiply_alpha(tex_sample_direct(t4.yw)); - src[ 2] = premultiply_alpha(tex_sample_direct(t4.zw)); - src[15] = premultiply_alpha(tex_sample_direct(t5.xw)); - src[14] = premultiply_alpha(tex_sample_direct(t5.yw)); - src[13] = premultiply_alpha(tex_sample_direct(t5.zw)); - src[19] = premultiply_alpha(tex_sample_direct(t6.xy)); - src[18] = premultiply_alpha(tex_sample_direct(t6.xz)); - src[17] = premultiply_alpha(tex_sample_direct(t6.xw)); - src[ 9] = premultiply_alpha(tex_sample_direct(t7.xy)); - src[10] = premultiply_alpha(tex_sample_direct(t7.xz)); - src[11] = premultiply_alpha(tex_sample_direct(t7.xw)); - - float v[9]; - v[0] = reduce(src[0]); - v[1] = reduce(src[1]); - v[2] = reduce(src[2]); - v[3] = reduce(src[3]); - v[4] = reduce(src[4]); - v[5] = reduce(src[5]); - v[6] = reduce(src[6]); - v[7] = reduce(src[7]); - v[8] = reduce(src[8]); - - int4 blendResult = BLEND_NONE; + // Pixel Tap Mapping: -|-|-|-|- + // -|A|B|-|- + // x|D|E|F|- + // x|G|H|I|- + // -|x|x|-|- + if (!((eq(D,E) && eq(G,H)) || (eq(D,G) && eq(E,H)))) + { + float dist_G_E = DistYCbCr(P(-2,1) , D) + DistYCbCr(D, B) + DistYCbCr(P(-1,2), H) + DistYCbCr(H, F) + (4.0 * DistYCbCr(G, E)); + float dist_D_H = DistYCbCr(P(-2,0) , G) + DistYCbCr(G, P(0,2)) + DistYCbCr(A, E) + DistYCbCr(E, I) + (4.0 * DistYCbCr(D, H)); + bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_D_H) < dist_G_E; + blendResult.w = ((dist_G_E > dist_D_H) && neq(E,D) && neq(E,H)) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } - // Preprocess corners - // Pixel Tap Mapping: --|--|--|--|-- - // --|--|07|08|-- - // --|05|00|01|10 - // --|04|03|02|11 - // --|--|14|13|-- - // Corner (1, 1) - if ( ((v[0] == v[1] && v[3] == v[2]) || (v[0] == v[3] && v[1] == v[2])) == false) + // Pixel Tap Mapping: -|-|x|x|- + // -|A|B|C|x + // -|D|E|F|x + // -|-|H|I|- + // -|-|-|-|- + if (!((eq(B,C) && eq(E,F)) || (eq(B,E) && eq(C,F)))) { - float dist_03_01 = DistYCbCr(src[ 4], src[ 0]) + DistYCbCr(src[ 0], src[ 8]) + DistYCbCr(src[14], src[ 2]) + DistYCbCr(src[ 2], src[10]) + (4.0 * DistYCbCr(src[ 3], src[ 1])); - float dist_00_02 = DistYCbCr(src[ 5], src[ 3]) + DistYCbCr(src[ 3], src[13]) + DistYCbCr(src[ 7], src[ 1]) + DistYCbCr(src[ 1], src[11]) + (4.0 * DistYCbCr(src[ 0], src[ 2])); - bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_03_01) < dist_00_02; - blendResult[2] = ((dist_03_01 < dist_00_02) && (v[0] != v[1]) && (v[0] != v[3])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + float dist_E_C = DistYCbCr(D, B) + DistYCbCr(B, P(1,-2)) + DistYCbCr(H, F) + DistYCbCr(F, P(2,-1)) + (4.0 * DistYCbCr(E, C)); + float dist_B_F = DistYCbCr(A, E) + DistYCbCr(E, I) + DistYCbCr(P(0,-2), C) + DistYCbCr(C, P(2,0)) + (4.0 * DistYCbCr(B, F)); + bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_B_F) < dist_E_C; + blendResult.y = ((dist_E_C > dist_B_F) && neq(E,B) && neq(E,F)) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; } - // Pixel Tap Mapping: --|--|--|--|-- - // --|06|07|--|-- - // 18|05|00|01|-- - // 17|04|03|02|-- - // --|15|14|--|-- - // Corner (0, 1) - if ( ((v[5] == v[0] && v[4] == v[3]) || (v[5] == v[4] && v[0] == v[3])) == false) + // Pixel Tap Mapping: -|x|x|-|- + // x|A|B|C|- + // x|D|E|F|- + // -|G|H|-|- + // -|-|-|-|- + if (!((eq(A,B) && eq(D,E)) || (eq(A,D) && eq(B,E)))) { - float dist_04_00 = DistYCbCr(src[17], src[ 5]) + DistYCbCr(src[ 5], src[ 7]) + DistYCbCr(src[15], src[ 3]) + DistYCbCr(src[ 3], src[ 1]) + (4.0 * DistYCbCr(src[ 4], src[ 0])); - float dist_05_03 = DistYCbCr(src[18], src[ 4]) + DistYCbCr(src[ 4], src[14]) + DistYCbCr(src[ 6], src[ 0]) + DistYCbCr(src[ 0], src[ 2]) + (4.0 * DistYCbCr(src[ 5], src[ 3])); - bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_05_03) < dist_04_00; - blendResult[3] = ((dist_04_00 > dist_05_03) && (v[0] != v[5]) && (v[0] != v[3])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + float dist_D_B = DistYCbCr(P(-2,0), A) + DistYCbCr(A, P(0,-2)) + DistYCbCr(G, E) + DistYCbCr(E, C) + (4.0 * DistYCbCr(D, B)); + float dist_A_E = DistYCbCr(P(-2,-1), D) + DistYCbCr(D, H) + DistYCbCr(P(-1,-2), B) + DistYCbCr(B, F) + (4.0 * DistYCbCr(A, E)); + bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_D_B) < dist_A_E; + blendResult.x = ((dist_D_B < dist_A_E) && neq(E,D) && neq(E,B)) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; } - // Pixel Tap Mapping: --|--|22|23|-- - // --|06|07|08|09 - // --|05|00|01|10 - // --|--|03|02|-- - // --|--|--|--|-- - // Corner (1, 0) - if ( ((v[7] == v[8] && v[0] == v[1]) || (v[7] == v[0] && v[8] == v[1])) == false) + float4 res = premultiply_alpha(E); + + // Pixel Tap Mapping: -|-|-|-|- + // -|-|B|C|- + // -|D|E|F|x + // -|G|H|I|x + // -|-|x|x|- + if(blendResult.z != BLEND_NONE) { - float dist_00_08 = DistYCbCr(src[ 5], src[ 7]) + DistYCbCr(src[ 7], src[23]) + DistYCbCr(src[ 3], src[ 1]) + DistYCbCr(src[ 1], src[ 9]) + (4.0 * DistYCbCr(src[ 0], src[ 8])); - float dist_07_01 = DistYCbCr(src[ 6], src[ 0]) + DistYCbCr(src[ 0], src[ 2]) + DistYCbCr(src[22], src[ 8]) + DistYCbCr(src[ 8], src[10]) + (4.0 * DistYCbCr(src[ 7], src[ 1])); - bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_07_01) < dist_00_08; - blendResult[1] = ((dist_00_08 > dist_07_01) && (v[0] != v[7]) && (v[0] != v[1])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + float dist_F_G = DistYCbCr(F, G); + float dist_H_C = DistYCbCr(H, C); + bool doLineBlend = (blendResult.z == BLEND_DOMINANT || + !((blendResult.y != BLEND_NONE && !IsPixEqual(E, G)) || (blendResult.w != BLEND_NONE && !IsPixEqual(E, C)) || + (IsPixEqual(G, H) && IsPixEqual(H, I) && IsPixEqual(I, F) && IsPixEqual(F, C) && !IsPixEqual(E, I)))); + + float2 origin = float2(0.0, 1.0 / sqrt(2.0)); + float2 direction = float2(1.0, -1.0); + if(doLineBlend) + { + bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_F_G <= dist_H_C) && neq(E,G) && neq(D,G); + bool haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_H_C <= dist_F_G) && neq(E,C) && neq(B,C); + origin = haveShallowLine? float2(0.0, 0.25) : float2(0.0, 0.5); + direction.x += haveShallowLine? 1.0: 0.0; + direction.y -= haveSteepLine? 1.0: 0.0; + } + + float4 blendPix = premultiply_alpha(lerp(H,F, step(DistYCbCr(E, F), DistYCbCr(E, H)))); + res = lerp(res, blendPix, get_left_ratio(pos, origin, direction, scale)); } - // Pixel Tap Mapping: --|21|22|--|-- - // 19|06|07|08|-- - // 18|05|00|01|-- - // --|04|03|--|-- - // --|--|--|--|-- - // Corner (0, 0) - if ( ((v[6] == v[7] && v[5] == v[0]) || (v[6] == v[5] && v[7] == v[0])) == false) + // Pixel Tap Mapping: -|-|-|-|- + // -|A|B|-|- + // x|D|E|F|- + // x|G|H|I|- + // -|x|x|-|- + if(blendResult.w != BLEND_NONE) { - float dist_05_07 = DistYCbCr(src[18], src[ 6]) + DistYCbCr(src[ 6], src[22]) + DistYCbCr(src[ 4], src[ 0]) + DistYCbCr(src[ 0], src[ 8]) + (4.0 * DistYCbCr(src[ 5], src[ 7])); - float dist_06_00 = DistYCbCr(src[19], src[ 5]) + DistYCbCr(src[ 5], src[ 3]) + DistYCbCr(src[21], src[ 7]) + DistYCbCr(src[ 7], src[ 1]) + (4.0 * DistYCbCr(src[ 6], src[ 0])); - bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_05_07) < dist_06_00; - blendResult[0] = ((dist_05_07 < dist_06_00) && (v[0] != v[5]) && (v[0] != v[7])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + float dist_H_A = DistYCbCr(H, A); + float dist_D_I = DistYCbCr(D, I); + bool doLineBlend = (blendResult.w == BLEND_DOMINANT || + !((blendResult.z != BLEND_NONE && !IsPixEqual(E, A)) || (blendResult.x != BLEND_NONE && !IsPixEqual(E, I)) || + (IsPixEqual(A, D) && IsPixEqual(D, G) && IsPixEqual(G, H) && IsPixEqual(H, I) && !IsPixEqual(E, G)))); + + float2 origin = float2(-1.0 / sqrt(2.0), 0.0); + float2 direction = float2(1.0, 1.0); + if(doLineBlend) + { + bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_H_A <= dist_D_I) && neq(E,A) && neq(B,A); + bool haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_D_I <= dist_H_A) && neq(E,I) && neq(F,I); + origin = haveShallowLine? float2(-0.25, 0.0) : float2(-0.5, 0.0); + direction.y += haveShallowLine? 1.0: 0.0; + direction.x += haveSteepLine? 1.0: 0.0; + } + origin = origin; + direction = direction; + + float4 blendPix = premultiply_alpha(lerp(H,D, step(DistYCbCr(E, D), DistYCbCr(E, H)))); + res = lerp(res, blendPix, get_left_ratio(pos, origin, direction, scale)); } - float4 dst[16]; - dst[ 0] = src[0]; - dst[ 1] = src[0]; - dst[ 2] = src[0]; - dst[ 3] = src[0]; - dst[ 4] = src[0]; - dst[ 5] = src[0]; - dst[ 6] = src[0]; - dst[ 7] = src[0]; - dst[ 8] = src[0]; - dst[ 9] = src[0]; - dst[10] = src[0]; - dst[11] = src[0]; - dst[12] = src[0]; - dst[13] = src[0]; - dst[14] = src[0]; - dst[15] = src[0]; - - // Scale pixel - if (IsBlendingNeeded(blendResult) == true) + // Pixel Tap Mapping: -|-|x|x|- + // -|A|B|C|x + // -|D|E|F|x + // -|-|H|I|- + // -|-|-|-|- + if(blendResult.y != BLEND_NONE) { - float dist_01_04 = DistYCbCr(src[1], src[4]); - float dist_03_08 = DistYCbCr(src[3], src[8]); - bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[4]) && (v[5] != v[4]); - bool haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[8]) && (v[7] != v[8]); - bool needBlend = (blendResult[2] != BLEND_NONE); - bool doLineBlend = ( blendResult[2] >= BLEND_DOMINANT || - ((blendResult[1] != BLEND_NONE && !IsPixEqual(src[0], src[4])) || - (blendResult[3] != BLEND_NONE && !IsPixEqual(src[0], src[8])) || - (IsPixEqual(src[4], src[3]) && IsPixEqual(src[3], src[2]) && IsPixEqual(src[2], src[1]) && IsPixEqual(src[1], src[8]) && IsPixEqual(src[0], src[2]) == false) ) == false ); - - float4 blendPix = ( DistYCbCr(src[0], src[1]) <= DistYCbCr(src[0], src[3]) ) ? src[1] : src[3]; - dst[ 2] = lerp(dst[ 2], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00); - dst[ 9] = lerp(dst[ 9], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00); - dst[10] = lerp(dst[10], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00); - dst[11] = lerp(dst[11], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); - dst[12] = lerp(dst[12], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00); - dst[13] = lerp(dst[13], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); - dst[14] = lerp(dst[14], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00); - dst[15] = lerp(dst[15], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00); - - dist_01_04 = DistYCbCr(src[7], src[2]); - dist_03_08 = DistYCbCr(src[1], src[6]); - haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[2]) && (v[3] != v[2]); - haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[6]) && (v[5] != v[6]); - needBlend = (blendResult[1] != BLEND_NONE); - doLineBlend = ( blendResult[1] >= BLEND_DOMINANT || - !((blendResult[0] != BLEND_NONE && !IsPixEqual(src[0], src[2])) || - (blendResult[2] != BLEND_NONE && !IsPixEqual(src[0], src[6])) || - (IsPixEqual(src[2], src[1]) && IsPixEqual(src[1], src[8]) && IsPixEqual(src[8], src[7]) && IsPixEqual(src[7], src[6]) && !IsPixEqual(src[0], src[8])) ) ); - - blendPix = ( DistYCbCr(src[0], src[7]) <= DistYCbCr(src[0], src[1]) ) ? src[7] : src[1]; - dst[ 1] = lerp(dst[ 1], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00); - dst[ 6] = lerp(dst[ 6], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00); - dst[ 7] = lerp(dst[ 7], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00); - dst[ 8] = lerp(dst[ 8], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); - dst[ 9] = lerp(dst[ 9], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00); - dst[10] = lerp(dst[10], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); - dst[11] = lerp(dst[11], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00); - dst[12] = lerp(dst[12], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00); - - dist_01_04 = DistYCbCr(src[5], src[8]); - dist_03_08 = DistYCbCr(src[7], src[4]); - haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[8]) && (v[1] != v[8]); - haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[4]) && (v[3] != v[4]); - needBlend = (blendResult[0] != BLEND_NONE); - doLineBlend = ( blendResult[0] >= BLEND_DOMINANT || - !((blendResult[3] != BLEND_NONE && !IsPixEqual(src[0], src[8])) || - (blendResult[1] != BLEND_NONE && !IsPixEqual(src[0], src[4])) || - (IsPixEqual(src[8], src[7]) && IsPixEqual(src[7], src[6]) && IsPixEqual(src[6], src[5]) && IsPixEqual(src[5], src[4]) && !IsPixEqual(src[0], src[6])) ) ); - - blendPix = ( DistYCbCr(src[0], src[5]) <= DistYCbCr(src[0], src[7]) ) ? src[5] : src[7]; - dst[ 0] = lerp(dst[ 0], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00); - dst[15] = lerp(dst[15], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00); - dst[ 4] = lerp(dst[ 4], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00); - dst[ 5] = lerp(dst[ 5], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); - dst[ 6] = lerp(dst[ 6], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00); - dst[ 7] = lerp(dst[ 7], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); - dst[ 8] = lerp(dst[ 8], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00); - dst[ 9] = lerp(dst[ 9], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00); - - - dist_01_04 = DistYCbCr(src[3], src[6]); - dist_03_08 = DistYCbCr(src[5], src[2]); - haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[6]) && (v[7] != v[6]); - haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[2]) && (v[1] != v[2]); - needBlend = (blendResult[3] != BLEND_NONE); - doLineBlend = ( blendResult[3] >= BLEND_DOMINANT || - !((blendResult[2] != BLEND_NONE && !IsPixEqual(src[0], src[6])) || - (blendResult[0] != BLEND_NONE && !IsPixEqual(src[0], src[2])) || - (IsPixEqual(src[6], src[5]) && IsPixEqual(src[5], src[4]) && IsPixEqual(src[4], src[3]) && IsPixEqual(src[3], src[2]) && !IsPixEqual(src[0], src[4])) ) ); - - blendPix = ( DistYCbCr(src[0], src[3]) <= DistYCbCr(src[0], src[5]) ) ? src[3] : src[5]; - dst[ 3] = lerp(dst[ 3], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00); - dst[12] = lerp(dst[12], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00); - dst[13] = lerp(dst[13], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00); - dst[14] = lerp(dst[14], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); - dst[15] = lerp(dst[15], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00); - dst[ 4] = lerp(dst[ 4], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); - dst[ 5] = lerp(dst[ 5], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00); - dst[ 6] = lerp(dst[ 6], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00); + float dist_B_I = DistYCbCr(B, I); + float dist_F_A = DistYCbCr(F, A); + bool doLineBlend = (blendResult.y == BLEND_DOMINANT || + !((blendResult.x != BLEND_NONE && !IsPixEqual(E, I)) || (blendResult.z != BLEND_NONE && !IsPixEqual(E, A)) || + (IsPixEqual(I, F) && IsPixEqual(F, C) && IsPixEqual(C, B) && IsPixEqual(B, A) && !IsPixEqual(E, C)))); + + float2 origin = float2(1.0 / sqrt(2.0), 0.0); + float2 direction = float2(-1.0, -1.0); + + if(doLineBlend) + { + bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_B_I <= dist_F_A) && neq(E,I) && neq(H,I); + bool haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_F_A <= dist_B_I) && neq(E,A) && neq(D,A); + origin = haveShallowLine? float2(0.25, 0.0) : float2(0.5, 0.0); + direction.y -= haveShallowLine? 1.0: 0.0; + direction.x -= haveSteepLine? 1.0: 0.0; + } + + float4 blendPix = premultiply_alpha(lerp(F,B, step(DistYCbCr(E, B), DistYCbCr(E, F)))); + res = lerp(res, blendPix, get_left_ratio(pos, origin, direction, scale)); } - // select output pixel - float4 res = lerp(lerp(lerp(lerp(dst[ 6], dst[ 7], step(0.25, f.x)), - lerp(dst[ 8], dst[ 9], step(0.75, f.x)), - step(0.50, f.x)), - lerp(lerp(dst[ 5], dst[ 0], step(0.25, f.x)), - lerp(dst[ 1], dst[10], step(0.75, f.x)), - step(0.50, f.x)), - step(0.25, f.y)), - lerp(lerp(lerp(dst[ 4], dst[ 3], step(0.25, f.x)), - lerp(dst[ 2], dst[11], step(0.75, f.x)), - step(0.50, f.x)), - lerp(lerp(dst[15], dst[14], step(0.25, f.x)), - lerp(dst[13], dst[12], step(0.75, f.x)), - step(0.50, f.x)), - step(0.75, f.y)), - step(0.50, f.y)); + // Pixel Tap Mapping: -|x|x|-|- + // x|A|B|C|- + // x|D|E|F|- + // -|G|H|-|- + // -|-|-|-|- + if(blendResult.x != BLEND_NONE) + { + float dist_D_C = DistYCbCr(D, C); + float dist_B_G = DistYCbCr(B, G); + bool doLineBlend = (blendResult.x == BLEND_DOMINANT || + !((blendResult.w != BLEND_NONE && !IsPixEqual(E, C)) || (blendResult.y != BLEND_NONE && !IsPixEqual(E, G)) || + (IsPixEqual(C, B) && IsPixEqual(B, A) && IsPixEqual(A, D) && IsPixEqual(D, G) && !IsPixEqual(E, A)))); + + float2 origin = float2(0.0, -1.0 / sqrt(2.0)); + float2 direction = float2(-1.0, 1.0); + if(doLineBlend) + { + bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_D_C <= dist_B_G) && neq(E,C) && neq(F,C); + bool haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_B_G <= dist_D_C) && neq(E,G) && neq(H,G); + origin = haveShallowLine? float2(0.0, -0.25) : float2(0.0, -0.5); + direction.x -= haveShallowLine? 1.0: 0.0; + direction.y += haveSteepLine? 1.0: 0.0; + } + + float4 blendPix = premultiply_alpha(lerp(D,B, step(DistYCbCr(E, B), DistYCbCr(E, D)))); + res = lerp(res, blendPix, get_left_ratio(pos, origin, direction, scale)); + } return postdivide_alpha(res); -}; +} diff --git a/libretro/libretro.cpp b/libretro/libretro.cpp index 5332f805da64..a76e8a4c4461 100644 --- a/libretro/libretro.cpp +++ b/libretro/libretro.cpp @@ -175,7 +175,7 @@ static RetroOption ppsspp_button_preference("ppsspp_button_preference", "Co static RetroOption ppsspp_fast_memory("ppsspp_fast_memory", "Fast Memory (Speedhack)", true); static RetroOption ppsspp_block_transfer_gpu("ppsspp_block_transfer_gpu", "Block Transfer GPU", true); static RetroOption ppsspp_texture_scaling_level("ppsspp_texture_scaling_level", "Texture Scaling Level", { { "1", 1 }, { "2", 2 }, { "3", 3 }, { "4", 4 }, { "5", 5 }, { "0", 0 } }); -static RetroOption ppsspp_texture_scaling_type("ppsspp_texture_scaling_type", "Texture Scaling Type", { { "xbrz", TextureScalerCommon::XBRZ }, { "hybrid", TextureScalerCommon::HYBRID }, { "bicubic", TextureScalerCommon::BICUBIC }, { "hybrid_bicubic", TextureScalerCommon::HYBRID_BICUBIC }, { "XBR", TextureScalerCommon::XBR }, { "SABR", TextureScalerCommon::SABR }, { "gaussian", TextureScalerCommon::GAUSSIAN }, { "cosine", TextureScalerCommon::COSINE } }); +static RetroOption ppsspp_texture_scaling_type("ppsspp_texture_scaling_type", "Texture Scaling Type", { { "xbrz", TextureScalerCommon::XBRZ }, { "hybrid", TextureScalerCommon::HYBRID }, { "bicubic", TextureScalerCommon::BICUBIC }, { "hybrid_bicubic", TextureScalerCommon::HYBRID_BICUBIC }, { "4xBRZ", TextureScalerCommon::_4XBRZ }, { "XBR", TextureScalerCommon::XBR }, { "SABR", TextureScalerCommon::SABR }, { "gaussian", TextureScalerCommon::GAUSSIAN }, { "cosine", TextureScalerCommon::COSINE } }); static RetroOption ppsspp_texture_scaling_realtime("ppsspp_texture_scaling_realtime", "Realtime Texture Scaling", false); static RetroOption ppsspp_texture_anisotropic_filtering("ppsspp_texture_anisotropic_filtering", "Anisotropic Filtering", { "off", "1x", "2x", "4x", "8x", "16x" }); static RetroOption ppsspp_texture_deposterize("ppsspp_texture_deposterize", "Texture Deposterize", false);