From 870b8f84ec2514eacf73ff67b34f28ae17343598 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 20 Jul 2022 00:05:07 +0200 Subject: [PATCH] Special case depal lookups for Test Drive's strange usage. This implements the hack I suggested in #13355, where instead of first reducing the color to RGB565 as the real game does, we just take each channel at full precision and do the lookup according to the mask, linearly filtering the palette. This makes the game look a lot nicer and is also a small optimization, but the hack is very specific so kinda ugly in a way. --- Core/Compatibility.cpp | 1 + Core/Compatibility.h | 1 + GPU/Common/FragmentShaderGenerator.cpp | 25 +++++++++++++++++++++++++ GPU/Vulkan/DrawEngineVulkan.cpp | 4 ++-- GPU/Vulkan/DrawEngineVulkan.h | 2 +- assets/compat.ini | 7 +++++++ 6 files changed, 37 insertions(+), 3 deletions(-) diff --git a/Core/Compatibility.cpp b/Core/Compatibility.cpp index f17c1fd6a1f4..7a5e12d34915 100644 --- a/Core/Compatibility.cpp +++ b/Core/Compatibility.cpp @@ -98,6 +98,7 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) { CheckSetting(iniFile, gameID, "AllowLargeFBTextureOffsets", &flags_.AllowLargeFBTextureOffsets); CheckSetting(iniFile, gameID, "AtracLoopHack", &flags_.AtracLoopHack); CheckSetting(iniFile, gameID, "DeswizzleDepth", &flags_.DeswizzleDepth); + CheckSetting(iniFile, gameID, "SmoothedDepal", &flags_.SmoothedDepal); } void Compatibility::CheckSetting(IniFile &iniFile, const std::string &gameID, const char *option, bool *flag) { diff --git a/Core/Compatibility.h b/Core/Compatibility.h index 29345aee42ca..5d52e3aec928 100644 --- a/Core/Compatibility.h +++ b/Core/Compatibility.h @@ -88,6 +88,7 @@ struct CompatFlags { bool AllowLargeFBTextureOffsets; bool AtracLoopHack; bool DeswizzleDepth; + bool SmoothedDepal; }; class IniFile; diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index f956217b3884..e7fd8b5fedb6 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -23,8 +23,10 @@ #include "Common/GPU/OpenGL/GLFeatures.h" #include "Common/GPU/ShaderWriter.h" #include "Common/GPU/thin3d.h" +#include "Core/Compatibility.h" #include "Core/Reporting.h" #include "Core/Config.h" +#include "Core/System.h" #include "GPU/Common/GPUStateUtils.h" #include "GPU/Common/ShaderId.h" #include "GPU/Common/ShaderUniforms.h" @@ -88,6 +90,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu bool doFlatShading = id.Bit(FS_BIT_FLATSHADE) && !flatBug; bool shaderDepal = id.Bit(FS_BIT_SHADER_DEPAL) && !texture3D; // combination with texture3D not supported. Enforced elsewhere too. + bool smoothedDepal = PSP_CoreParameter().compat.flags().SmoothedDepal; bool bgraTexture = id.Bit(FS_BIT_BGRA_TEXTURE); bool colorWriteMask = id.Bit(FS_BIT_COLOR_WRITEMASK) && compat.bitwiseOps; @@ -590,6 +593,28 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } } } + } else if (shaderDepal && smoothedDepal) { + // Specific mode for Test Drive. Fixes the banding. + if (doTextureProjection) { + // We don't use textureProj because we need better control and it's probably not much of a savings anyway. + // However it is good for precision on older hardware like PowerVR. + WRITE(p, " vec2 uv = %s.xy/%s.z;\n vec2 uv_round;\n", texcoord, texcoord); + } else { + WRITE(p, " vec2 uv = %s.xy;\n vec2 uv_round;\n", texcoord); + } + WRITE(p, " vec4 t = %s(tex, %s.xy);\n", compat.texture, texcoord); + WRITE(p, " uint depalShift = (u_depal_mask_shift_off_fmt >> 8) & 0xFFU;\n"); + WRITE(p, " uint depalFmt = (u_depal_mask_shift_off_fmt >> 24) & 0x3U;\n"); + WRITE(p, " float index0 = t.r;\n"); + WRITE(p, " float mul = 32.0 / 256.0;\n"); + WRITE(p, " if (depalFmt == 0) {\n"); // yes, different versions of Test Drive use different formats. Could do compile time by adding more compat flags but meh. + WRITE(p, " if (depalShift == 5) { index0 = t.g; mul = 64.0 / 256.0; }\n"); + WRITE(p, " else if (depalShift == 11) { index0 = t.b; }\n"); + WRITE(p, " } else {\n"); + WRITE(p, " if (depalShift == 5) { index0 = t.g; }\n"); + WRITE(p, " else if (depalShift == 10) { index0 = t.b; }\n"); + WRITE(p, " }\n"); + WRITE(p, " t = %s(pal, vec2(index0 * mul, 0.0));\n", compat.texture); } else { if (doTextureProjection) { // We don't use textureProj because we need better control and it's probably not much of a savings anyway. diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index 256c1adbaca1..27d1fda33f95 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -185,8 +185,8 @@ void DrawEngineVulkan::InitDeviceObjects() { samp.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; samp.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; samp.flags = 0; - samp.magFilter = VK_FILTER_NEAREST; - samp.minFilter = VK_FILTER_NEAREST; + samp.magFilter = VK_FILTER_LINEAR; + samp.minFilter = VK_FILTER_LINEAR; res = vkCreateSampler(device, &samp, nullptr, &samplerSecondary_); _dbg_assert_(VK_SUCCESS == res); res = vkCreateSampler(device, &samp, nullptr, &nullSampler_); diff --git a/GPU/Vulkan/DrawEngineVulkan.h b/GPU/Vulkan/DrawEngineVulkan.h index 48b8369fd7ef..e4a75f2a35ea 100644 --- a/GPU/Vulkan/DrawEngineVulkan.h +++ b/GPU/Vulkan/DrawEngineVulkan.h @@ -218,7 +218,7 @@ class DrawEngineVulkan : public DrawEngineCommon { // Secondary texture for shader blending VkImageView boundSecondary_ = VK_NULL_HANDLE; VkImageView boundDepal_ = VK_NULL_HANDLE; - VkSampler samplerSecondary_ = VK_NULL_HANDLE; // This one is actually never used since we use fetch. + VkSampler samplerSecondary_ = VK_NULL_HANDLE; // This one is actually never used since we use fetch (except in SmoothedDepal mode for Test Drive). PrehashMap vai_; VulkanPushBuffer *vertexCache_; diff --git a/assets/compat.ini b/assets/compat.ini index 974a6216defd..55ddf688fc57 100644 --- a/assets/compat.ini +++ b/assets/compat.ini @@ -1276,3 +1276,10 @@ UCKS45048 = true UCJS18030 = true UCJS18047 = true NPJG00015 = true + +[SmoothedDepal] +# Test Drive Unlimited smoothed CLUT lookups. See comments in #13355 +ULET00386 = true +ULES00637 = true +ULKS46126 = true +ULUS10249 = true