Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vulkan: Add MMPX upscaling texture shader #13986

Merged
merged 2 commits into from
Jan 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions GPU/Common/PostShader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ void LoadPostShaderInfo(const std::vector<std::string> &directories) {
info.section = section.name();
section.Get("Name", &info.name, section.name().c_str());
section.Get("Compute", &temp, "");
section.Get("MaxScale", &info.maxScale, 255);
info.computeShaderFile = path + "/" + temp;

appendTextureShader(info);
Expand Down
1 change: 1 addition & 0 deletions GPU/Common/PostShader.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ struct TextureShaderInfo {
std::string name;

std::string computeShaderFile;
int maxScale;

bool operator == (const std::string &other) {
return name == other;
Expand Down
4 changes: 4 additions & 0 deletions GPU/Vulkan/TextureCacheVulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,7 @@ void TextureCacheVulkan::CompileScalingShader() {
if (copyCS_ != VK_NULL_HANDLE)
vulkan_->Delete().QueueDeleteShaderModule(copyCS_);
textureShader_.clear();
maxScaleFactor_ = 255;
} else if (uploadCS_ || copyCS_) {
// No need to recreate.
return;
Expand All @@ -417,6 +418,7 @@ void TextureCacheVulkan::CompileScalingShader() {
_dbg_assert_msg_(copyCS_ != VK_NULL_HANDLE, "failed to compile copy shader");

textureShader_ = g_Config.sTextureShaderName;
maxScaleFactor_ = shaderInfo->maxScale;
}

void TextureCacheVulkan::ReleaseTexture(TexCacheEntry *entry, bool delete_them) {
Expand Down Expand Up @@ -762,6 +764,8 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
VkFormat dstFmt = GetDestFormat(GETextureFormat(entry->format), gstate.getClutPaletteFormat());

int scaleFactor = standardScaleFactor_;
if (scaleFactor > maxScaleFactor_)
scaleFactor = maxScaleFactor_;

// Rachet down scale factor in low-memory mode.
if (lowMemoryMode_) {
Expand Down
1 change: 1 addition & 0 deletions GPU/Vulkan/TextureCacheVulkan.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ class TextureCacheVulkan : public TextureCacheCommon {
Vulkan2D *vulkan2D_;

std::string textureShader_;
int maxScaleFactor_ = 255;
VkShaderModule uploadCS_ = VK_NULL_HANDLE;
VkShaderModule copyCS_ = VK_NULL_HANDLE;

Expand Down
5 changes: 5 additions & 0 deletions assets/shaders/defaultshaders.ini
Original file line number Diff line number Diff line change
Expand Up @@ -154,3 +154,8 @@ Type=Texture
Name=4xBRZ
Author=Hyllian
Compute=tex_4xbrz.csh
[TexMMPX]
Type=Texture
Name=MMPX
Author=Morgan McGuire and Mara Gagiu
Compute=tex_mmpx.csh
hrydgard marked this conversation as resolved.
Show resolved Hide resolved
117 changes: 117 additions & 0 deletions assets/shaders/tex_mmpx.csh
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
/* MMPX.glc
Copyright 2020 Morgan McGuire & Mara Gagiu.
Provided under the Open Source MIT license https://opensource.org/licenses/MIT

See js-demo.html for the commented source code.
This is an optimized GLSL port of that version
by Morgan McGuire and Mara Gagiu.
*/

#define ABGR8 uint

ABGR8 src(int x, int y) {
return readColoru(uvec2(clamp(x, 0, params.width - 1), clamp(y, 0, params.height - 1)));
}

uint luma(ABGR8 C) {
uint alpha = (C & 0xFF000000u) >> 24;
return (((C & 0x00FF0000u) >> 16) + ((C & 0x0000FF00u) >> 8) + (C & 0x000000FFu) + 1u) * (256u - alpha);
}

bool all_eq2(ABGR8 B, ABGR8 A0, ABGR8 A1) {
return ((B ^ A0) | (B ^ A1)) == 0u;
}

bool all_eq3(ABGR8 B, ABGR8 A0, ABGR8 A1, ABGR8 A2) {
return ((B ^ A0) | (B ^ A1) | (B ^ A2)) == 0u;
}

bool all_eq4(ABGR8 B, ABGR8 A0, ABGR8 A1, ABGR8 A2, ABGR8 A3) {
return ((B ^ A0) | (B ^ A1) | (B ^ A2) | (B ^ A3)) == 0u;
}

bool any_eq3(ABGR8 B, ABGR8 A0, ABGR8 A1, ABGR8 A2) {
return B == A0 || B == A1 || B == A2;
}

bool none_eq2(ABGR8 B, ABGR8 A0, ABGR8 A1) {
return (B != A0) && (B != A1);
}

bool none_eq4(ABGR8 B, ABGR8 A0, ABGR8 A1, ABGR8 A2, ABGR8 A3) {
return B != A0 && B != A1 && B != A2 && B != A3;
}

uint applyScalingu(uvec2 origxy, uvec2 xy) {
int srcX = int(origxy.x);
int srcY = int(origxy.y);

ABGR8 A = src(srcX - 1, srcY - 1), B = src(srcX, srcY - 1), C = src(srcX + 1, srcY - 1);
ABGR8 D = src(srcX - 1, srcY + 0), E = src(srcX, srcY + 0), F = src(srcX + 1, srcY + 0);
ABGR8 G = src(srcX - 1, srcY + 1), H = src(srcX, srcY + 1), I = src(srcX + 1, srcY + 1);

ABGR8 J = E, K = E, L = E, M = E;

if (((A ^ E) | (B ^ E) | (C ^ E) | (D ^ E) | (F ^ E) | (G ^ E) | (H ^ E) | (I ^ E)) != 0u) {
ABGR8 P = src(srcX, srcY - 2), S = src(srcX, srcY + 2);
ABGR8 Q = src(srcX - 2, srcY), R = src(srcX + 2, srcY);
ABGR8 Bl = luma(B), Dl = luma(D), El = luma(E), Fl = luma(F), Hl = luma(H);

// 1:1 slope rules
if ((D == B && D != H && D != F) && (El >= Dl || E == A) && any_eq3(E, A, C, G) && ((El < Dl) || A != D || E != P || E != Q)) J = D;
if ((B == F && B != D && B != H) && (El >= Bl || E == C) && any_eq3(E, A, C, I) && ((El < Bl) || C != B || E != P || E != R)) K = B;
if ((H == D && H != F && H != B) && (El >= Hl || E == G) && any_eq3(E, A, G, I) && ((El < Hl) || G != H || E != S || E != Q)) L = H;
if ((F == H && F != B && F != D) && (El >= Fl || E == I) && any_eq3(E, C, G, I) && ((El < Fl) || I != H || E != R || E != S)) M = F;

// Intersection rules
if ((E != F && all_eq4(E, C, I, D, Q) && all_eq2(F, B, H)) && (F != src(srcX + 3, srcY))) K = M = F;
if ((E != D && all_eq4(E, A, G, F, R) && all_eq2(D, B, H)) && (D != src(srcX - 3, srcY))) J = L = D;
if ((E != H && all_eq4(E, G, I, B, P) && all_eq2(H, D, F)) && (H != src(srcX, srcY + 3))) L = M = H;
if ((E != B && all_eq4(E, A, C, H, S) && all_eq2(B, D, F)) && (B != src(srcX, srcY - 3))) J = K = B;
if (Bl < El && all_eq4(E, G, H, I, S) && none_eq4(E, A, D, C, F)) J = K = B;
if (Hl < El && all_eq4(E, A, B, C, P) && none_eq4(E, D, G, I, F)) L = M = H;
if (Fl < El && all_eq4(E, A, D, G, Q) && none_eq4(E, B, C, I, H)) K = M = F;
if (Dl < El && all_eq4(E, C, F, I, R) && none_eq4(E, B, A, G, H)) J = L = D;

// 2:1 slope rules
if (H != B) {
if (H != A && H != E && H != C) {
if (all_eq3(H, G, F, R) && none_eq2(H, D, src(srcX + 2, srcY - 1))) L = M;
if (all_eq3(H, I, D, Q) && none_eq2(H, F, src(srcX - 2, srcY - 1))) M = L;
}

if (B != I && B != G && B != E) {
if (all_eq3(B, A, F, R) && none_eq2(B, D, src(srcX + 2, srcY + 1))) J = K;
if (all_eq3(B, C, D, Q) && none_eq2(B, F, src(srcX - 2, srcY + 1))) K = J;
}
} // H !== B

if (F != D) {
if (D != I && D != E && D != C) {
if (all_eq3(D, A, H, S) && none_eq2(D, B, src(srcX + 1, srcY + 2))) J = L;
if (all_eq3(D, G, B, P) && none_eq2(D, H, src(srcX + 1, srcY - 2))) L = J;
}

if (F != E && F != A && F != G) {
if (all_eq3(F, C, H, S) && none_eq2(F, B, src(srcX - 1, srcY + 2))) K = M;
if (all_eq3(F, I, B, P) && none_eq2(F, H, src(srcX - 1, srcY - 2))) M = K;
}
} // F !== D
} // not constant

// TODO: Write four pixels at once. For now, 1/4x speed.
if ((xy.y & 1u) == 0u) {
if ((xy.x & 1u) == 0u) {
return J;
}
return K;
}
if ((xy.x & 1u) == 0u) {
return L;
}
return M;
}

vec4 applyScalingf(uvec2 origxy, uvec2 xy) {
return unpackUnorm4x8(applyScalingu(origxy, xy));
}