From a2e2d00fa8f9d0ce2e0ad9a645a13f9796c32edc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 22 Aug 2022 11:07:25 +0200 Subject: [PATCH] Implement smoothed depal for the "old" depal path as well. --- GPU/Common/DepalettizeCommon.h | 186 +++++++++++++++++++++++++ GPU/Common/DepalettizeShaderCommon.cpp | 69 +++++++-- GPU/Common/DepalettizeShaderCommon.h | 1 + GPU/Common/TextureCacheCommon.cpp | 18 ++- GPU/Common/TextureShaderCommon.cpp | 36 +++-- GPU/Common/TextureShaderCommon.h | 5 +- 6 files changed, 284 insertions(+), 31 deletions(-) create mode 100644 GPU/Common/DepalettizeCommon.h diff --git a/GPU/Common/DepalettizeCommon.h b/GPU/Common/DepalettizeCommon.h new file mode 100644 index 000000000000..8c9e66fbea12 --- /dev/null +++ b/GPU/Common/DepalettizeCommon.h @@ -0,0 +1,186 @@ +// Copyright (c) 2014- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include +#include +#include + +#include "Common/CommonTypes.h" +#include "Common/GPU/Shader.h" +#include "Common/GPU/thin3d.h" +#include "GPU/ge_constants.h" +#include "GPU/Common/Draw2D.h" +#include "GPU/Common/ShaderCommon.h" +#include "GPU/Common/DepalettizeShaderCommon.h" + +class DepalShader { +public: + Draw::ShaderModule *fragShader; + Draw::Pipeline *pipeline; + std::string code; +}; + +class DepalTexture { +public: + Draw::Texture *texture; + int lastFrame; + // How many entries are continuously growing (each value larger than the previous) from entry 0. + int rampLength; +}; + +// Caches both shaders and palette textures. +class DepalShaderCache { +public: + DepalShaderCache(Draw::DrawContext *draw); + ~DepalShaderCache(); + + // This also uploads the palette and binds the correct texture. + DepalShader *GetDepalettizeShader(uint32_t clutMode, GETextureFormat texFormat, GEBufferFormat pixelFormat, bool smoothedDepal); + DepalTexture GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut); + + Draw::SamplerState *GetSampler(bool linearFilter); + + void Clear(); + void Decimate(); + std::vector DebugGetShaderIDs(DebugShaderType type); + std::string DebugGetShaderString(std::string id, DebugShaderType type, DebugShaderStringType stringType); + + void DeviceLost(); + void DeviceRestore(Draw::DrawContext *draw); + +private: + static uint32_t GenerateShaderID(uint32_t clutMode, GETextureFormat texFormat, GEBufferFormat pixelFormat, bool smoothedDepal) { + return (clutMode & 0xFFFFFF) | (pixelFormat << 24) | (texFormat << 28) | ((int)smoothedDepal << 27); + } + + static uint32_t GetClutID(GEPaletteFormat clutFormat, uint32_t clutHash) { + // Simplistic. + return clutHash ^ (uint32_t)clutFormat; + } + + Draw::DrawContext *draw_; + Draw::ShaderModule *vertexShader_ = nullptr; + Draw::SamplerState *nearestSampler_ = nullptr; + Draw::SamplerState *linearSampler_ = nullptr; + + std::map cache_; + std::map texCache_; +}; + +// TODO: Merge with DepalShaderCache? +class TextureShaderApplier { +public: + struct Pos { + float x; + float y; + }; + struct UV { + float u; + float v; + }; + + TextureShaderApplier(Draw::DrawContext *draw, DepalShader *shader, float bufferW, float bufferH, int renderW, int renderH) + : draw_(draw), shader_(shader), bufferW_(bufferW), bufferH_(bufferH), renderW_(renderW), renderH_(renderH) { + static const Pos pos[4] = { + {-1, -1 }, + { 1, -1 }, + {-1, 1 }, + { 1, 1 }, + }; + memcpy(pos_, pos, sizeof(pos_)); + + static const UV uv[4] = { + { 0, 0 }, + { 1, 0 }, + { 0, 1 }, + { 1, 1 }, + }; + memcpy(uv_, uv, sizeof(uv_)); + } + + void ApplyBounds(const KnownVertexBounds &bounds, u32 uoff, u32 voff) { + // If min is not < max, then we don't have values (wasn't set during decode.) + if (bounds.minV < bounds.maxV) { + const float invWidth = 1.0f / bufferW_; + const float invHeight = 1.0f / bufferH_; + // Inverse of half = double. + const float invHalfWidth = invWidth * 2.0f; + const float invHalfHeight = invHeight * 2.0f; + + const int u1 = bounds.minU + uoff; + const int v1 = bounds.minV + voff; + const int u2 = bounds.maxU + uoff; + const int v2 = bounds.maxV + voff; + + const float left = u1 * invHalfWidth - 1.0f; + const float right = u2 * invHalfWidth - 1.0f; + const float top = v1 * invHalfHeight - 1.0f; + const float bottom = v2 * invHalfHeight - 1.0f; + // Points are: BL, BR, TR, TL. + pos_[0] = Pos{ left, bottom }; + pos_[1] = Pos{ right, bottom }; + pos_[2] = Pos{ left, top }; + pos_[3] = Pos{ right, top }; + + // And also the UVs, same order. + const float uvleft = u1 * invWidth; + const float uvright = u2 * invWidth; + const float uvtop = v1 * invHeight; + const float uvbottom = v2 * invHeight; + uv_[0] = UV{ uvleft, uvbottom }; + uv_[1] = UV{ uvright, uvbottom }; + uv_[2] = UV{ uvleft, uvtop }; + uv_[3] = UV{ uvright, uvtop }; + + // We need to reapply the texture next time since we cropped UV. + gstate_c.Dirty(DIRTY_TEXTURE_PARAMS); + } + } + + void Use() { + draw_->BindPipeline(shader_->pipeline); + struct SimpleVertex { + float pos[2]; + float uv[2]; + }; + for (int i = 0; i < 4; i++) { + memcpy(&verts_[i].x, &pos_[i], sizeof(Pos)); + memcpy(&verts_[i].u, &uv_[i], sizeof(UV)); + } + } + + void Shade() { + Draw::Viewport vp{ 0.0f, 0.0f, (float)renderW_, (float)renderH_, 0.0f, 1.0f }; + // TODO: Half pixel offset for D3D9? + draw_->SetViewports(1, &vp); + draw_->SetScissorRect(0, 0, renderW_, renderH_); + draw_->DrawUP((const uint8_t *)verts_, 4); + } + +protected: + Draw::DrawContext *draw_; + DepalShader *shader_; + Pos pos_[4]; + UV uv_[4]; + Draw2DVertex verts_[4]; + float bufferW_; + float bufferH_; + int renderW_; + int renderH_; +}; diff --git a/GPU/Common/DepalettizeShaderCommon.cpp b/GPU/Common/DepalettizeShaderCommon.cpp index 2b9ddca41348..6e34d0240242 100644 --- a/GPU/Common/DepalettizeShaderCommon.cpp +++ b/GPU/Common/DepalettizeShaderCommon.cpp @@ -45,7 +45,7 @@ static const VaryingDef varyings[1] = { }; // Uses integer instructions available since OpenGL 3.0, ES 3.0 (and 2.0 with extensions), and of course Vulkan and D3D11. -void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, const ShaderLanguageDesc &lang) { +void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config) { const int shift = config.shift; const int mask = config.mask; @@ -140,7 +140,7 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, con } // FP only, to suit GL(ES) 2.0 and DX9 -void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, const ShaderLanguageDesc &lang) { +void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config) { char lookupMethod[128] = "index.r"; const int shift = config.shift; @@ -288,23 +288,64 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c writer.C(" vec4 outColor = ").SampleTexture2D("pal", "vec2(coord, 0.0)").C(";\n"); } +void GenerateDepalSmoothed(ShaderWriter &writer, const DepalConfig &config) { + const char *sourceChannel = "error"; + float indexMultiplier = 32.0f; + + if (config.bufferFormat == GE_FORMAT_5551) { + _dbg_assert_(config.mask == 0x1F); + switch (config.shift) { + case 0: sourceChannel = "r"; break; + case 5: sourceChannel = "g"; break; + case 10: sourceChannel = "b"; break; + default: _dbg_assert_(false); + } + } else if (config.bufferFormat == GE_FORMAT_565) { + _dbg_assert_(config.mask == 0x1F || config.mask == 0x3F); + switch (config.shift) { + case 0: sourceChannel = "r"; break; + case 5: sourceChannel = "g"; indexMultiplier = 64.0f; break; + case 11: sourceChannel = "b"; break; + default: _dbg_assert_(false); + } + } else { + _dbg_assert_(false); + } + + writer.C(" float index = ").SampleTexture2D("tex", "v_texcoord").F(".%s * %0.1f;\n", sourceChannel, indexMultiplier); + + float texturePixels = 256.f; + if (config.clutFormat != GE_CMODE_32BIT_ABGR8888) { + texturePixels = 512.f; + } + + writer.F(" float coord = (index + 0.5) * %f;\n", 1.0 / texturePixels); + writer.C(" vec4 outColor = ").SampleTexture2D("pal", "vec2(coord, 0.0)").C(";\n"); +} + void GenerateDepalFs(char *buffer, const DepalConfig &config, const ShaderLanguageDesc &lang) { ShaderWriter writer(buffer, lang, ShaderStage::Fragment); writer.DeclareSamplers(samplers); writer.HighPrecisionFloat(); writer.BeginFSMain(Slice::empty(), varyings, FSFLAG_NONE); - switch (lang.shaderLanguage) { - case HLSL_D3D9: - case GLSL_1xx: - GenerateDepalShaderFloat(writer, config, lang); - break; - case GLSL_VULKAN: - case GLSL_3xx: - case HLSL_D3D11: - GenerateDepalShader300(writer, config, lang); - break; - default: - _assert_msg_(false, "Depal shader language not supported: %d", (int)lang.shaderLanguage); + if (config.smoothedDepal) { + // Handles a limited set of cases, but doesn't need any integer math so we don't + // need two variants. + GenerateDepalSmoothed(writer, config); + } else { + switch (lang.shaderLanguage) { + case HLSL_D3D9: + case GLSL_1xx: + GenerateDepalShaderFloat(writer, config); + break; + case GLSL_VULKAN: + case GLSL_3xx: + case HLSL_D3D11: + GenerateDepalShader300(writer, config); + break; + default: + _assert_msg_(false, "Depal shader language not supported: %d", (int)lang.shaderLanguage); + } } writer.EndFSMain("outColor", FSFLAG_NONE); } diff --git a/GPU/Common/DepalettizeShaderCommon.h b/GPU/Common/DepalettizeShaderCommon.h index 91186f5d2038..322784c0f0d3 100644 --- a/GPU/Common/DepalettizeShaderCommon.h +++ b/GPU/Common/DepalettizeShaderCommon.h @@ -31,6 +31,7 @@ struct DepalConfig { GEPaletteFormat clutFormat; GETextureFormat textureFormat; GEBufferFormat bufferFormat; + bool smoothedDepal; }; void GenerateDepalFs(char *buffer, const DepalConfig &config, const ShaderLanguageDesc &lang); diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 93d120128679..0f3e82ff49d3 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -1906,12 +1906,17 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer break; } + const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); + ClutTexture clutTexture{}; + bool smoothedDepal = false; + if (need_depalettize && !g_Config.bDisableSlowFramebufEffects) { + clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_); + smoothedDepal = CanUseSmoothDepal(gstate, framebuffer->drawnFormat, clutTexture.rampLength); + if (useShaderDepal) { - const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); // Very icky conflation here of native and thin3d rendering. This will need careful work per backend in BindAsClutTexture. - ClutTexture clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_); BindAsClutTexture(clutTexture.texture); framebufferManager_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET); @@ -1927,7 +1932,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer // Since we started/ended render passes, might need these. gstate_c.Dirty(DIRTY_DEPAL); - gstate_c.SetUseShaderDepal(true, CanUseSmoothDepal(gstate, framebuffer->drawnFormat, clutTexture.rampLength)); + gstate_c.SetUseShaderDepal(true, smoothedDepal); gstate_c.depalFramebufferFormat = framebuffer->drawnFormat; const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16); const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor; @@ -1939,7 +1944,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer return; } - textureShader = textureShaderCache_->GetDepalettizeShader(clutMode, texFormat, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat); + textureShader = textureShaderCache_->GetDepalettizeShader(clutMode, texFormat, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat, smoothedDepal); gstate_c.SetUseShaderDepal(false, false); } @@ -1957,9 +1962,10 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer draw_->BindFramebufferAsTexture(framebuffer->fbo, 0, depth ? Draw::FB_DEPTH_BIT : Draw::FB_COLOR_BIT, 0); draw_->BindTexture(1, clutTexture.texture); - Draw::SamplerState *nearest = textureShaderCache_->GetSampler(); + Draw::SamplerState *nearest = textureShaderCache_->GetSampler(false); + Draw::SamplerState *clutSampler = textureShaderCache_->GetSampler(smoothedDepal); draw_->BindSamplerStates(0, 1, &nearest); - draw_->BindSamplerStates(1, 1, &nearest); + draw_->BindSamplerStates(1, 1, &clutSampler); textureShaderCache_->ApplyShader(textureShader, framebuffer->bufferWidth, framebuffer->bufferHeight, framebuffer->renderWidth, framebuffer->renderHeight, diff --git a/GPU/Common/TextureShaderCommon.cpp b/GPU/Common/TextureShaderCommon.cpp index 1f684af5da7c..c30ccb0204a7 100644 --- a/GPU/Common/TextureShaderCommon.cpp +++ b/GPU/Common/TextureShaderCommon.cpp @@ -147,6 +147,10 @@ void TextureShaderCache::Clear() { nearestSampler_->Release(); nearestSampler_ = nullptr; } + if (linearSampler_) { + linearSampler_->Release(); + linearSampler_ = nullptr; + } } void TextureShaderCache::Decimate() { @@ -161,15 +165,28 @@ void TextureShaderCache::Decimate() { } } -Draw::SamplerState *TextureShaderCache::GetSampler() { - if (!nearestSampler_) { - Draw::SamplerStateDesc desc{}; - desc.wrapU = Draw::TextureAddressMode::CLAMP_TO_EDGE; - desc.wrapV = Draw::TextureAddressMode::CLAMP_TO_EDGE; - desc.wrapW = Draw::TextureAddressMode::CLAMP_TO_EDGE; - nearestSampler_ = draw_->CreateSamplerState(desc); +Draw::SamplerState *TextureShaderCache::GetSampler(bool linearFilter) { + if (linearFilter) { + if (!linearSampler_) { + Draw::SamplerStateDesc desc{}; + desc.magFilter = Draw::TextureFilter::LINEAR; + desc.minFilter = Draw::TextureFilter::LINEAR; + desc.wrapU = Draw::TextureAddressMode::CLAMP_TO_EDGE; + desc.wrapV = Draw::TextureAddressMode::CLAMP_TO_EDGE; + desc.wrapW = Draw::TextureAddressMode::CLAMP_TO_EDGE; + linearSampler_ = draw_->CreateSamplerState(desc); + } + return linearSampler_; + } else { + if (!nearestSampler_) { + Draw::SamplerStateDesc desc{}; + desc.wrapU = Draw::TextureAddressMode::CLAMP_TO_EDGE; + desc.wrapV = Draw::TextureAddressMode::CLAMP_TO_EDGE; + desc.wrapW = Draw::TextureAddressMode::CLAMP_TO_EDGE; + nearestSampler_ = draw_->CreateSamplerState(desc); + } + return nearestSampler_; } - return nearestSampler_; } TextureShader *TextureShaderCache::CreateShader(const char *fs) { @@ -220,7 +237,7 @@ TextureShader *TextureShaderCache::CreateShader(const char *fs) { return depal; } -TextureShader *TextureShaderCache::GetDepalettizeShader(uint32_t clutMode, GETextureFormat textureFormat, GEBufferFormat bufferFormat) { +TextureShader *TextureShaderCache::GetDepalettizeShader(uint32_t clutMode, GETextureFormat textureFormat, GEBufferFormat bufferFormat, bool smoothedDepal) { using namespace Draw; // Generate an ID for depal shaders. @@ -240,6 +257,7 @@ TextureShader *TextureShaderCache::GetDepalettizeShader(uint32_t clutMode, GETex config.mask = gstate.getClutIndexMask(); config.bufferFormat = bufferFormat; config.textureFormat = textureFormat; + config.smoothedDepal = smoothedDepal; char *buffer = new char[4096]; GenerateDepalFs(buffer, config, draw_->GetShaderLanguageDesc()); diff --git a/GPU/Common/TextureShaderCommon.h b/GPU/Common/TextureShaderCommon.h index f5ff0af8dcd8..583aa3d516f3 100644 --- a/GPU/Common/TextureShaderCommon.h +++ b/GPU/Common/TextureShaderCommon.h @@ -49,10 +49,10 @@ class TextureShaderCache { TextureShaderCache(Draw::DrawContext *draw); ~TextureShaderCache(); - TextureShader *GetDepalettizeShader(uint32_t clutMode, GETextureFormat texFormat, GEBufferFormat pixelFormat); + TextureShader *GetDepalettizeShader(uint32_t clutMode, GETextureFormat texFormat, GEBufferFormat pixelFormat, bool smoothedDepal); ClutTexture GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut); - Draw::SamplerState *GetSampler(); + Draw::SamplerState *GetSampler(bool linearFilter); void ApplyShader(TextureShader *shader, float bufferW, float bufferH, int renderW, int renderH, const KnownVertexBounds &bounds, u32 uoff, u32 voff); @@ -70,6 +70,7 @@ class TextureShaderCache { Draw::DrawContext *draw_; Draw::ShaderModule *vertexShader_ = nullptr; Draw::SamplerState *nearestSampler_ = nullptr; + Draw::SamplerState *linearSampler_ = nullptr; std::map depalCache_; std::map texCache_;