From c99b4b118ace81475debb0222bb0648bb9820743 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Thu, 28 Jan 2021 00:03:00 -0800 Subject: [PATCH 1/2] Vulkan: Add MMPX upscaling texture shader. See https://casual-effects.com/research/McGuire2021PixelArt/index.html --- assets/shaders/defaultshaders.ini | 5 ++ assets/shaders/tex_mmpx.csh | 117 ++++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+) create mode 100644 assets/shaders/tex_mmpx.csh diff --git a/assets/shaders/defaultshaders.ini b/assets/shaders/defaultshaders.ini index 17b447b7c9b9..bde5df9d62ba 100644 --- a/assets/shaders/defaultshaders.ini +++ b/assets/shaders/defaultshaders.ini @@ -154,3 +154,8 @@ Type=Texture Name=4xBRZ Author=Hyllian Compute=tex_4xbrz.csh +[TexMMPX] +Type=Texture +Name=MMPX +Author=Morgan McGuire and Mara Gagiu +Compute=tex_mmpx.csh diff --git a/assets/shaders/tex_mmpx.csh b/assets/shaders/tex_mmpx.csh new file mode 100644 index 000000000000..1ba45d1ffe85 --- /dev/null +++ b/assets/shaders/tex_mmpx.csh @@ -0,0 +1,117 @@ +/* MMPX.glc + Copyright 2020 Morgan McGuire & Mara Gagiu. + Provided under the Open Source MIT license https://opensource.org/licenses/MIT + + See js-demo.html for the commented source code. + This is an optimized GLSL port of that version + by Morgan McGuire and Mara Gagiu. +*/ + +#define ABGR8 uint + +ABGR8 src(int x, int y) { + return readColoru(uvec2(clamp(x, 0, params.width - 1), clamp(y, 0, params.height - 1))); +} + +uint luma(ABGR8 C) { + uint alpha = (C & 0xFF000000u) >> 24; + return (((C & 0x00FF0000u) >> 16) + ((C & 0x0000FF00u) >> 8) + (C & 0x000000FFu) + 1u) * (256u - alpha); +} + +bool all_eq2(ABGR8 B, ABGR8 A0, ABGR8 A1) { + return ((B ^ A0) | (B ^ A1)) == 0u; +} + +bool all_eq3(ABGR8 B, ABGR8 A0, ABGR8 A1, ABGR8 A2) { + return ((B ^ A0) | (B ^ A1) | (B ^ A2)) == 0u; +} + +bool all_eq4(ABGR8 B, ABGR8 A0, ABGR8 A1, ABGR8 A2, ABGR8 A3) { + return ((B ^ A0) | (B ^ A1) | (B ^ A2) | (B ^ A3)) == 0u; +} + +bool any_eq3(ABGR8 B, ABGR8 A0, ABGR8 A1, ABGR8 A2) { + return B == A0 || B == A1 || B == A2; +} + +bool none_eq2(ABGR8 B, ABGR8 A0, ABGR8 A1) { + return (B != A0) && (B != A1); +} + +bool none_eq4(ABGR8 B, ABGR8 A0, ABGR8 A1, ABGR8 A2, ABGR8 A3) { + return B != A0 && B != A1 && B != A2 && B != A3; +} + +uint applyScalingu(uvec2 origxy, uvec2 xy) { + int srcX = int(origxy.x); + int srcY = int(origxy.y); + + ABGR8 A = src(srcX - 1, srcY - 1), B = src(srcX, srcY - 1), C = src(srcX + 1, srcY - 1); + ABGR8 D = src(srcX - 1, srcY + 0), E = src(srcX, srcY + 0), F = src(srcX + 1, srcY + 0); + ABGR8 G = src(srcX - 1, srcY + 1), H = src(srcX, srcY + 1), I = src(srcX + 1, srcY + 1); + + ABGR8 J = E, K = E, L = E, M = E; + + if (((A ^ E) | (B ^ E) | (C ^ E) | (D ^ E) | (F ^ E) | (G ^ E) | (H ^ E) | (I ^ E)) != 0u) { + ABGR8 P = src(srcX, srcY - 2), S = src(srcX, srcY + 2); + ABGR8 Q = src(srcX - 2, srcY), R = src(srcX + 2, srcY); + ABGR8 Bl = luma(B), Dl = luma(D), El = luma(E), Fl = luma(F), Hl = luma(H); + + // 1:1 slope rules + if ((D == B && D != H && D != F) && (El >= Dl || E == A) && any_eq3(E, A, C, G) && ((El < Dl) || A != D || E != P || E != Q)) J = D; + if ((B == F && B != D && B != H) && (El >= Bl || E == C) && any_eq3(E, A, C, I) && ((El < Bl) || C != B || E != P || E != R)) K = B; + if ((H == D && H != F && H != B) && (El >= Hl || E == G) && any_eq3(E, A, G, I) && ((El < Hl) || G != H || E != S || E != Q)) L = H; + if ((F == H && F != B && F != D) && (El >= Fl || E == I) && any_eq3(E, C, G, I) && ((El < Fl) || I != H || E != R || E != S)) M = F; + + // Intersection rules + if ((E != F && all_eq4(E, C, I, D, Q) && all_eq2(F, B, H)) && (F != src(srcX + 3, srcY))) K = M = F; + if ((E != D && all_eq4(E, A, G, F, R) && all_eq2(D, B, H)) && (D != src(srcX - 3, srcY))) J = L = D; + if ((E != H && all_eq4(E, G, I, B, P) && all_eq2(H, D, F)) && (H != src(srcX, srcY + 3))) L = M = H; + if ((E != B && all_eq4(E, A, C, H, S) && all_eq2(B, D, F)) && (B != src(srcX, srcY - 3))) J = K = B; + if (Bl < El && all_eq4(E, G, H, I, S) && none_eq4(E, A, D, C, F)) J = K = B; + if (Hl < El && all_eq4(E, A, B, C, P) && none_eq4(E, D, G, I, F)) L = M = H; + if (Fl < El && all_eq4(E, A, D, G, Q) && none_eq4(E, B, C, I, H)) K = M = F; + if (Dl < El && all_eq4(E, C, F, I, R) && none_eq4(E, B, A, G, H)) J = L = D; + + // 2:1 slope rules + if (H != B) { + if (H != A && H != E && H != C) { + if (all_eq3(H, G, F, R) && none_eq2(H, D, src(srcX + 2, srcY - 1))) L = M; + if (all_eq3(H, I, D, Q) && none_eq2(H, F, src(srcX - 2, srcY - 1))) M = L; + } + + if (B != I && B != G && B != E) { + if (all_eq3(B, A, F, R) && none_eq2(B, D, src(srcX + 2, srcY + 1))) J = K; + if (all_eq3(B, C, D, Q) && none_eq2(B, F, src(srcX - 2, srcY + 1))) K = J; + } + } // H !== B + + if (F != D) { + if (D != I && D != E && D != C) { + if (all_eq3(D, A, H, S) && none_eq2(D, B, src(srcX + 1, srcY + 2))) J = L; + if (all_eq3(D, G, B, P) && none_eq2(D, H, src(srcX + 1, srcY - 2))) L = J; + } + + if (F != E && F != A && F != G) { + if (all_eq3(F, C, H, S) && none_eq2(F, B, src(srcX - 1, srcY + 2))) K = M; + if (all_eq3(F, I, B, P) && none_eq2(F, H, src(srcX - 1, srcY - 2))) M = K; + } + } // F !== D + } // not constant + + // TODO: Write four pixels at once. For now, 1/4x speed. + if ((xy.y & 1u) == 0u) { + if ((xy.x & 1u) == 0u) { + return J; + } + return K; + } + if ((xy.x & 1u) == 0u) { + return L; + } + return M; +} + +vec4 applyScalingf(uvec2 origxy, uvec2 xy) { + return unpackUnorm4x8(applyScalingu(origxy, xy)); +} From c630d365cdfd03147c2a158b28ffaedf0067a2d3 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Thu, 28 Jan 2021 01:03:02 -0800 Subject: [PATCH 2/2] Vulkan: Allow tex shaders to specify a max scale. --- GPU/Common/PostShader.cpp | 1 + GPU/Common/PostShader.h | 1 + GPU/Vulkan/TextureCacheVulkan.cpp | 4 ++++ GPU/Vulkan/TextureCacheVulkan.h | 1 + 4 files changed, 7 insertions(+) diff --git a/GPU/Common/PostShader.cpp b/GPU/Common/PostShader.cpp index 9971650ba53e..e415aa92261f 100644 --- a/GPU/Common/PostShader.cpp +++ b/GPU/Common/PostShader.cpp @@ -161,6 +161,7 @@ void LoadPostShaderInfo(const std::vector &directories) { info.section = section.name(); section.Get("Name", &info.name, section.name().c_str()); section.Get("Compute", &temp, ""); + section.Get("MaxScale", &info.maxScale, 255); info.computeShaderFile = path + "/" + temp; appendTextureShader(info); diff --git a/GPU/Common/PostShader.h b/GPU/Common/PostShader.h index 42aa30d4bc16..b5c1f562f948 100644 --- a/GPU/Common/PostShader.h +++ b/GPU/Common/PostShader.h @@ -70,6 +70,7 @@ struct TextureShaderInfo { std::string name; std::string computeShaderFile; + int maxScale; bool operator == (const std::string &other) { return name == other; diff --git a/GPU/Vulkan/TextureCacheVulkan.cpp b/GPU/Vulkan/TextureCacheVulkan.cpp index 8563a8831021..e568869b100f 100644 --- a/GPU/Vulkan/TextureCacheVulkan.cpp +++ b/GPU/Vulkan/TextureCacheVulkan.cpp @@ -394,6 +394,7 @@ void TextureCacheVulkan::CompileScalingShader() { if (copyCS_ != VK_NULL_HANDLE) vulkan_->Delete().QueueDeleteShaderModule(copyCS_); textureShader_.clear(); + maxScaleFactor_ = 255; } else if (uploadCS_ || copyCS_) { // No need to recreate. return; @@ -417,6 +418,7 @@ void TextureCacheVulkan::CompileScalingShader() { _dbg_assert_msg_(copyCS_ != VK_NULL_HANDLE, "failed to compile copy shader"); textureShader_ = g_Config.sTextureShaderName; + maxScaleFactor_ = shaderInfo->maxScale; } void TextureCacheVulkan::ReleaseTexture(TexCacheEntry *entry, bool delete_them) { @@ -762,6 +764,8 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) { VkFormat dstFmt = GetDestFormat(GETextureFormat(entry->format), gstate.getClutPaletteFormat()); int scaleFactor = standardScaleFactor_; + if (scaleFactor > maxScaleFactor_) + scaleFactor = maxScaleFactor_; // Rachet down scale factor in low-memory mode. if (lowMemoryMode_) { diff --git a/GPU/Vulkan/TextureCacheVulkan.h b/GPU/Vulkan/TextureCacheVulkan.h index 254efcd40bac..7f8ae32d25ba 100644 --- a/GPU/Vulkan/TextureCacheVulkan.h +++ b/GPU/Vulkan/TextureCacheVulkan.h @@ -139,6 +139,7 @@ class TextureCacheVulkan : public TextureCacheCommon { Vulkan2D *vulkan2D_; std::string textureShader_; + int maxScaleFactor_ = 255; VkShaderModule uploadCS_ = VK_NULL_HANDLE; VkShaderModule copyCS_ = VK_NULL_HANDLE;