From 19b4febbbf98e4b0a7568c2f697b77e452203e0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 2 Nov 2020 16:12:45 +0100 Subject: [PATCH 1/9] More reinterpret shader gen and test work. More work on reinterpret Buildsystem fixes --- CMakeLists.txt | 2 + Common/Common.vcxproj | 1 + Common/Common.vcxproj.filters | 5 +- Common/Data/Collections/Slice.h | 48 ++++++++++ Common/GPU/ShaderWriter.cpp | 129 +++++++++++++++++++++++++- Common/GPU/ShaderWriter.h | 53 ++++++++++- Core/Opcode.h | 2 +- GPU/Common/ColorReinterpret.cpp | 49 ---------- GPU/Common/ColorReinterpret.h | 6 -- GPU/Common/ReinterpretFramebuffer.cpp | 64 +++++++++++++ GPU/Common/ReinterpretFramebuffer.h | 8 ++ GPU/GPU.vcxproj | 2 + GPU/GPU.vcxproj.filters | 8 +- UWP/GPU_UWP/GPU_UWP.vcxproj | 2 + UWP/GPU_UWP/GPU_UWP.vcxproj.filters | 2 + android/jni/Android.mk | 1 + libretro/Makefile.common | 1 + unittest/TestShaderGenerators.cpp | 61 ++++++++++++ 18 files changed, 380 insertions(+), 64 deletions(-) create mode 100644 Common/Data/Collections/Slice.h delete mode 100644 GPU/Common/ColorReinterpret.cpp delete mode 100644 GPU/Common/ColorReinterpret.h create mode 100644 GPU/Common/ReinterpretFramebuffer.cpp create mode 100644 GPU/Common/ReinterpretFramebuffer.h diff --git a/CMakeLists.txt b/CMakeLists.txt index ce37815473c3..14972302475e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1354,6 +1354,8 @@ set(GPU_SOURCES GPU/Common/DrawEngineCommon.h GPU/Common/PresentationCommon.cpp GPU/Common/PresentationCommon.h + GPU/Common/ReinterpretFramebuffer.cpp + GPU/Common/ReinterpretFramebuffer.h GPU/Common/ShaderId.cpp GPU/Common/ShaderId.h GPU/Common/ShaderUniforms.cpp diff --git a/Common/Common.vcxproj b/Common/Common.vcxproj index f3ebb5328d52..fe60080c888a 100644 --- a/Common/Common.vcxproj +++ b/Common/Common.vcxproj @@ -386,6 +386,7 @@ + diff --git a/Common/Common.vcxproj.filters b/Common/Common.vcxproj.filters index 38d3896534cb..4f5d86749206 100644 --- a/Common/Common.vcxproj.filters +++ b/Common/Common.vcxproj.filters @@ -381,6 +381,9 @@ GPU + + Data\Collections + @@ -850,4 +853,4 @@ Math\fast - \ No newline at end of file + diff --git a/Common/Data/Collections/Slice.h b/Common/Data/Collections/Slice.h new file mode 100644 index 000000000000..a58b90248159 --- /dev/null +++ b/Common/Data/Collections/Slice.h @@ -0,0 +1,48 @@ +#pragma once + +#include + +// Like a const begin/end pair, just more convenient to use (and can only be used for linear array data). +// Inspired by Rust's slices and Google's StringPiece. +template +struct Slice { + // View some memory as a slice. + Slice(const T *data, size_t size) : data_(data), size_(size) {} + + // Intentionally non-explicit. + // View a const array as a slice. + template + Slice(const T(&data)[N]) : data_(data), size_(N) {} + + // Intentionally non-explicit. + // View a const array as a slice. + Slice(const std::vector &data) : data_(data.data()), size_(data.size()) {} + + const T &operator[](size_t index) const { + return data_[index]; + } + + size_t size() const { + return size_; + } + + // "Iterators" + const T *begin() const { + return data_; + } + const T *end() const { + return data_ + size_; + } + + static Slice empty() { + return Slice(nullptr, 0); + } + + bool is_empty() const { + return size_ == 0; + } + +private: + const T *data_; + size_t size_; +}; diff --git a/Common/GPU/ShaderWriter.cpp b/Common/GPU/ShaderWriter.cpp index cebb0d017af5..ee29dbb83ffa 100644 --- a/Common/GPU/ShaderWriter.cpp +++ b/Common/GPU/ShaderWriter.cpp @@ -64,11 +64,12 @@ const char *hlsl_preamble_vs = "\n"; // Unsafe. But doesn't matter, we'll use big buffers for shader gen. -void ShaderWriter::F(const char *format, ...) { +ShaderWriter & ShaderWriter::F(const char *format, ...) { va_list args; va_start(args, format); p_ += vsprintf(p_, format, args); va_end(args); + return *this; } void ShaderWriter::Preamble(const char **gl_extensions, size_t num_gl_extensions) { @@ -110,6 +111,9 @@ void ShaderWriter::Preamble(const char **gl_extensions, size_t num_gl_extensions C("#define DISCARD discard\n"); if (lang_.gles) { C("precision lowp float;\n"); + if (lang_.glslES30) { + C("precision highp int;\n"); + } } break; case ShaderStage::Vertex: @@ -128,3 +132,126 @@ void ShaderWriter::Preamble(const char **gl_extensions, size_t num_gl_extensions break; } } + +void ShaderWriter::BeginVSMain(Slice inputs, Slice uniforms, Slice varyings) { + switch (lang_.shaderLanguage) { + case HLSL_D3D11: + case HLSL_D3D9: + break; + case GLSL_VULKAN: + default: + C("void main() {\n"); + break; + } +} + +void ShaderWriter::BeginFSMain(Slice uniforms, Slice varyings) { + switch (lang_.shaderLanguage) { + case HLSL_D3D11: + if (!uniforms.is_empty()) { + for (auto &uniform : uniforms) { + //F(" %s %s : %s;\n", uniform.type, uniform.name, uniform.index); + } + } + // Let's do the varyings as parameters to main, no struct. + C("vec4 main("); + for (auto &varying : varyings) { + F(" %s %s : %s, ", varying.type, varying.name, varying.semantic); + } + // Erase the last comma + Rewind(2); + + F(") : SV_Target0 {\n"); + break; + case HLSL_D3D9: + for (auto &uniform : uniforms) { + F(" %s %s : %s;\n", uniform.type, uniform.name, uniform.index); + } + // Let's do the varyings as parameters to main, no struct. + C("vec4 main("); + for (auto &varying : varyings) { + F(" %s %s : %s, ", varying.type, varying.name, varying.semantic); + } + // Erase the last comma + Rewind(2); + + F(") : COLOR {\n"); + break; + case GLSL_VULKAN: + for (auto &varying : varyings) { + F("layout(location = %d) in %s %s; // %s\n", varying.index, varying.type, varying.name, varying.semantic); + } + C("layout (location = 0, index = 0) out vec4 fragColor0;\n"); + C("\nvoid main() {\n"); + break; + default: + for (auto &varying : varyings) { + F("in %s %s; // %s\n", varying.type, varying.name, varying.semantic); + } + if (!strcmp(lang_.fragColor0, "fragColor0")) { + C("out vec4 fragColor0;\n"); + } + C("\nvoid main() {\n"); + break; + } +} + +void ShaderWriter::EndVSMain() { + C("}\n"); +} + +void ShaderWriter::EndFSMain(const char *vec4_color_variable) { + switch (lang_.shaderLanguage) { + case HLSL_D3D11: + case HLSL_D3D9: + F(" return %s;\n", vec4_color_variable); + break; + case GLSL_VULKAN: + default: // OpenGL + F(" %s = %s;\n", lang_.fragColor0, vec4_color_variable); + break; + } + C("}\n"); +} + +void ShaderWriter::DeclareTexture2D(const char *name, int binding) { + switch (lang_.shaderLanguage) { + case HLSL_D3D11: + F("Texture2D %s : register(t%d);\n", name, binding); + break; + case HLSL_D3D9: + break; + case GLSL_VULKAN: + F("layout(set = 0, binding = %d) uniform sampler2D %s;\n", binding, name); + break; + default: + F("uniform sampler2D %s;\n", name); + break; + } +} + +void ShaderWriter::DeclareSampler2D(const char *name, int binding) { + // We only use separate samplers in HLSL D3D11, where we have no choice. + switch (lang_.shaderLanguage) { + case HLSL_D3D11: + F("SamplerState %s : register(s%d);\n", name, binding); + break; + } +} + +ShaderWriter &ShaderWriter::SampleTexture2D(const char *texName, const char *samplerName, const char *uv) { + switch (lang_.shaderLanguage) { + case HLSL_D3D11: + F("%s.Sample(%s, %s)", texName, samplerName, uv); + break; + case HLSL_D3D9: + F("tex2D(%s, %s)", texName, uv); + break; + default: + // Note: we ignore the sampler. make sure you bound samplers to the textures correctly. + F("%s(%s, %s)", lang_.texture, texName, uv); + break; + } + return *this; +} + diff --git a/Common/GPU/ShaderWriter.h b/Common/GPU/ShaderWriter.h index d870f72fe46c..2e51dcc73035 100644 --- a/Common/GPU/ShaderWriter.h +++ b/Common/GPU/ShaderWriter.h @@ -4,11 +4,35 @@ #include "Common/Log.h" #include "Common/GPU/Shader.h" +#include "GPU/ge_constants.h" +#include "GPU/GPUCommon.h" +#include "Common/Data/Collections/Slice.h" // Helps generate a shader compatible with all backends. +// +// Can use the uniform buffer support in thin3d. +// // Using #defines and magic in this class, we partially define our own shader language that basically looks // like GLSL, but has a few little oddities like splat3. +struct InputDef { + const char *type; + const char *name; +}; + +struct UniformDef { + const char *type; + const char *name; + int index; +}; + +struct VaryingDef { + const char *type; + const char *name; + const char *semantic; + int index; +}; + class ShaderWriter { public: ShaderWriter(char *buffer, const ShaderLanguageDesc &lang, ShaderStage stage, const char **gl_extensions, size_t num_gl_extensions) : p_(buffer), lang_(lang), stage_(stage) { @@ -21,23 +45,42 @@ class ShaderWriter { // Assumes the input is zero-terminated. // C : Copies a buffer directly to the stream. template - void C(const char(&text)[T]) { + ShaderWriter &C(const char(&text)[T]) { memcpy(p_, text, T); p_ += T - 1; + return *this; } // W: Writes a zero-terminated string to the stream. - void W(const char *text) { + ShaderWriter &W(const char *text) { size_t len = strlen(text); memcpy(p_, text, len + 1); p_ += len; + return *this; } // F: Formats into the buffer. - void F(const char *format, ...); + ShaderWriter &F(const char *format, ...); - // void BeginMain(); - // void EndMain(); + // Several of the shader languages ignore samplers, beware of that. + void DeclareSampler2D(const char *name, int binding); + void DeclareTexture2D(const char *name, int binding); + + ShaderWriter &SampleTexture2D(const char *texName, const char *samplerName, const char *uv); + + // Simple shaders with no special tricks. + void BeginVSMain(Slice inputs, Slice uniforms, Slice varyings); + void BeginFSMain(Slice uniforms, Slice varyings); + + // For simple shaders that output a single color, we can deal with this generically. + void EndVSMain(); + void EndFSMain(const char *vec4_color_variable); + + + void Rewind(size_t offset) { + p_ -= offset; + } + // Can probably remove this char *GetPos() { return p_; } diff --git a/Core/Opcode.h b/Core/Opcode.h index 6eb01c41fcaf..456d77dbf679 100644 --- a/Core/Opcode.h +++ b/Core/Opcode.h @@ -17,7 +17,7 @@ #pragma once -#include "CommonTypes.h" +#include "Common/CommonTypes.h" // Broken out of MemMap.h to avoid a bad include dependency. diff --git a/GPU/Common/ColorReinterpret.cpp b/GPU/Common/ColorReinterpret.cpp deleted file mode 100644 index 3699d2f8aab5..000000000000 --- a/GPU/Common/ColorReinterpret.cpp +++ /dev/null @@ -1,49 +0,0 @@ -#include - -#include "GPU/Common/ColorReinterpret.h" -#include "GPU/Common/ShaderWriter.h" - -// TODO: We could have an option to preserve any extra color precision. But gonna start without it. -// Requires full size integer math. -bool GenerateReinterpretFragmentShader(char *buffer, GEBufferFormat from, GEBufferFormat to, const ShaderLanguageDesc &lang) { - if (!lang.bitwiseOps) { - return false; - } - ShaderWriter writer(buffer, lang, ShaderStage::Fragment); - - switch (from) { - case GE_FORMAT_4444: - writer.W(" uint color = uint(in.r * 15.99) | (uint(in.g * 15.99) << 4) | (uint(in.b * 15.99) << 8) | (uint(in.a * 15.99) << 12);\n"); - break; - case GE_FORMAT_5551: - writer.W(" uint color = uint(in.r * 31.99) | (uint(in.g * 31.99) << 5) | (uint(in.b * 31.99) << 10);\n"); - writer.W(" if (in.a > 128.0) color |= 0x8000;\n"); - break; - case GE_FORMAT_565: - writer.W(" uint color = uint(in.r * 31.99) | (uint(in.g * 63.99) << 5) | (uint(in.b * 31.99) << 11);\n"); - break; - default: _assert_(false); - } - - switch (to) { - case GE_FORMAT_4444: - writer.W(" vec4 output = vec4(float(color & 0xF), float((color >> 4) & 0xF), float((color >> 8) & 0xF), float((color >> 12) & 0xF));\n"); - writer.W(" output *= 1.0 / 15.0;\n"); - break; - case GE_FORMAT_5551: - writer.W(" vec4 output = vec4(float(color & 0x1F), float((color >> 5) & 0x1F), float((color >> 10) & 0x1F), 0.0);\n"); - writer.W(" output.rgb *= 1.0 / 31.0;\n"); - writer.W(" output.a = float(color >> 15);\n"); - break; - case GE_FORMAT_565: - writer.W(" vec4 output = vec4(float(color & 0x1F), float((color >> 5) & 0x3F), float((color >> 11) & 0x1F), 1.0);\n"); - writer.W(" output.rb *= 1.0 / 31.0;\n"); - writer.W(" output.g *= 1.0 / 63.0;\n"); - break; - default: _assert_(false); - } - - writer.W("}"); - - return true; -} diff --git a/GPU/Common/ColorReinterpret.h b/GPU/Common/ColorReinterpret.h deleted file mode 100644 index c1ff33226a6f..000000000000 --- a/GPU/Common/ColorReinterpret.h +++ /dev/null @@ -1,6 +0,0 @@ -#pragma once - -#include "Common/Log.h" -#include "GPU/ge_constants.h" -#include "GPU/GPUCommon.h" -#include "GPU/Common/ShaderWriter.h" diff --git a/GPU/Common/ReinterpretFramebuffer.cpp b/GPU/Common/ReinterpretFramebuffer.cpp new file mode 100644 index 000000000000..cf039c0aa6c2 --- /dev/null +++ b/GPU/Common/ReinterpretFramebuffer.cpp @@ -0,0 +1,64 @@ +#include + +#include "Common/GPU/ShaderWriter.h" +#include "GPU/Common/ReinterpretFramebuffer.h" + +// TODO: We could have an option to preserve any extra color precision. But gonna start without it. +// Requires full size integer math. +bool GenerateReinterpretFragmentShader(char *buffer, GEBufferFormat from, GEBufferFormat to, const ShaderLanguageDesc &lang) { + if (!lang.bitwiseOps) { + return false; + } + + ShaderWriter writer(buffer, lang, ShaderStage::Fragment, nullptr, 0); + + writer.DeclareSampler2D("samp", 0); + writer.DeclareTexture2D("tex", 0); + + static const VaryingDef varyings[1] = { + { "vec4", "v_texcoord", "TEXCOORD0" }, + }; + + writer.BeginFSMain(Slice::empty(), varyings); + + writer.C(" vec4 val = ").SampleTexture2D("tex", "samp", "v_texcoord.xy").C(";\n"); + + switch (from) { + case GE_FORMAT_4444: + writer.C(" uint color = uint(val.r * 15.99) | (uint(val.g * 15.99) << 4) | (uint(val.b * 15.99) << 8) | (uint(val.a * 15.99) << 12);\n"); + break; + case GE_FORMAT_5551: + writer.C(" uint color = uint(val.r * 31.99) | (uint(val.g * 31.99) << 5) | (uint(val.b * 31.99) << 10);\n"); + writer.C(" if (val.a > 128.0) color |= 0x8000U;\n"); + break; + case GE_FORMAT_565: + writer.C(" uint color = uint(val.r * 31.99) | (uint(val.g * 63.99) << 5) | (uint(val.b * 31.99) << 11);\n"); + break; + default: + _assert_(false); + break; + } + + switch (to) { + case GE_FORMAT_4444: + writer.C(" vec4 outColor = vec4(float(color & 0xFU), float((color >> 4) & 0xFU), float((color >> 8) & 0xFU), float((color >> 12) & 0xFU));\n"); + writer.C(" outColor *= 1.0 / 15.0;\n"); + break; + case GE_FORMAT_5551: + writer.C(" vec4 outColor = vec4(float(color & 0x1FU), float((color >> 5) & 0x1FU), float((color >> 10) & 0x1FU), 0.0);\n"); + writer.C(" outColor.rgb *= 1.0 / 31.0;\n"); + writer.C(" outColor.a = float(color >> 15);\n"); + break; + case GE_FORMAT_565: + writer.C(" vec4 outColor = vec4(float(color & 0x1FU), float((color >> 5) & 0x3FU), float((color >> 11) & 0x1FU), 1.0);\n"); + writer.C(" outColor.rb *= 1.0 / 31.0;\n"); + writer.C(" outColor.g *= 1.0 / 63.0;\n"); + break; + default: + _assert_(false); + break; + } + + writer.EndFSMain("outColor"); + return true; +} diff --git a/GPU/Common/ReinterpretFramebuffer.h b/GPU/Common/ReinterpretFramebuffer.h new file mode 100644 index 000000000000..f10fb9e3a33b --- /dev/null +++ b/GPU/Common/ReinterpretFramebuffer.h @@ -0,0 +1,8 @@ +#pragma once + +#include "Common/Log.h" +#include "GPU/ge_constants.h" +#include "GPU/GPUCommon.h" +#include "Common/GPU/ShaderWriter.h" + +bool GenerateReinterpretFragmentShader(char *buffer, GEBufferFormat from, GEBufferFormat to, const ShaderLanguageDesc &lang); diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj index 675602d6d547..4f2550f63587 100644 --- a/GPU/GPU.vcxproj +++ b/GPU/GPU.vcxproj @@ -338,6 +338,7 @@ + @@ -475,6 +476,7 @@ + diff --git a/GPU/GPU.vcxproj.filters b/GPU/GPU.vcxproj.filters index 509e962588d3..2c800c5e5cab 100644 --- a/GPU/GPU.vcxproj.filters +++ b/GPU/GPU.vcxproj.filters @@ -270,6 +270,9 @@ Common + + Common + @@ -539,5 +542,8 @@ Common + + Common + - \ No newline at end of file + diff --git a/UWP/GPU_UWP/GPU_UWP.vcxproj b/UWP/GPU_UWP/GPU_UWP.vcxproj index d5b5d3167eaf..fcf535af3d0d 100644 --- a/UWP/GPU_UWP/GPU_UWP.vcxproj +++ b/UWP/GPU_UWP/GPU_UWP.vcxproj @@ -388,6 +388,7 @@ + @@ -445,6 +446,7 @@ + diff --git a/UWP/GPU_UWP/GPU_UWP.vcxproj.filters b/UWP/GPU_UWP/GPU_UWP.vcxproj.filters index 12f33ec3afb0..58617e8b912b 100644 --- a/UWP/GPU_UWP/GPU_UWP.vcxproj.filters +++ b/UWP/GPU_UWP/GPU_UWP.vcxproj.filters @@ -57,6 +57,7 @@ + @@ -114,5 +115,6 @@ + \ No newline at end of file diff --git a/android/jni/Android.mk b/android/jni/Android.mk index 845f3ca441ee..f0dde080eafc 100644 --- a/android/jni/Android.mk +++ b/android/jni/Android.mk @@ -316,6 +316,7 @@ EXEC_AND_LIB_FILES := \ $(SRC)/GPU/Common/ShaderId.cpp.arm \ $(SRC)/GPU/Common/GPUStateUtils.cpp.arm \ $(SRC)/GPU/Common/SoftwareTransformCommon.cpp.arm \ + $(SRC)/GPU/Common/ReinterpretFramebuffer.cpp \ $(SRC)/GPU/Common/VertexDecoderCommon.cpp.arm \ $(SRC)/GPU/Common/TextureCacheCommon.cpp.arm \ $(SRC)/GPU/Common/TextureScalerCommon.cpp.arm \ diff --git a/libretro/Makefile.common b/libretro/Makefile.common index 15620ed2b623..72c79b7129f4 100644 --- a/libretro/Makefile.common +++ b/libretro/Makefile.common @@ -234,6 +234,7 @@ SOURCES_CXX += \ $(GPUCOMMONDIR)/SplineCommon.cpp \ $(GPUCOMMONDIR)/FramebufferManagerCommon.cpp \ $(GPUCOMMONDIR)/PresentationCommon.cpp \ + $(GPUCOMMONDIR)/ReinterpretFramebuffer.cpp \ $(GPUCOMMONDIR)/ShaderId.cpp \ $(GPUCOMMONDIR)/ShaderCommon.cpp \ $(GPUCOMMONDIR)/ShaderUniforms.cpp \ diff --git a/unittest/TestShaderGenerators.cpp b/unittest/TestShaderGenerators.cpp index 2f67729573e4..35dcde11043e 100644 --- a/unittest/TestShaderGenerators.cpp +++ b/unittest/TestShaderGenerators.cpp @@ -11,6 +11,7 @@ #include "GPU/Common/FragmentShaderGenerator.h" #include "GPU/Common/VertexShaderGenerator.h" +#include "GPU/Common/ReinterpretFramebuffer.h" #include "GPU/D3D11/D3D11Util.h" #include "GPU/D3D11/D3D11Loader.h" @@ -141,12 +142,72 @@ void PrintDiff(const char *a, const char *b) { } } +const char *ShaderLanguageToString(ShaderLanguage lang) { + switch (lang) { + case HLSL_D3D11: return "HLSL_D3D11"; + case HLSL_D3D9: return "HLSL_D3D9"; + case GLSL_VULKAN: return "GLSL_VULKAN"; + case GLSL_1xx: return "GLSL_1xx"; + case GLSL_3xx: return "GLSL_3xx"; + default: return "N/A"; + } +} + +bool TestReinterpretShaders() { + ShaderLanguage languages[] = { + ShaderLanguage::HLSL_D3D11, + ShaderLanguage::GLSL_VULKAN, + ShaderLanguage::GLSL_3xx, + }; + GEBufferFormat fmts[3] = { + GE_FORMAT_565, + GE_FORMAT_5551, + GE_FORMAT_4444, + }; + char *buffer = new char[65536]; + + // Generate all despite failures - it's only 6. + bool failed = false; + + for (int k = 0; k < ARRAY_SIZE(languages); k++) { + printf("=== %s ===\n\n", ShaderLanguageToString(languages[k])); + + ShaderLanguageDesc desc(languages[k]); + + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 3; j++) { + if (i == j) + continue; // useless shader! + if (!GenerateReinterpretFragmentShader(buffer, fmts[i], fmts[j], desc)) { + printf("Failed!\n%s\n", buffer); + failed = true; + } else { + std::string errorMessage; + if (!TestCompileShader(buffer, languages[k], false, &errorMessage)) { + printf("Error compiling fragment shader %d:\n\n%s\n\n%s\n", (int)j, LineNumberString(buffer).c_str(), errorMessage.c_str()); + failed = true; + return false; + } else { + printf("===\n%s\n===\n", buffer); + } + } + } + } + + } + return !failed; +} + bool TestShaderGenerators() { LoadD3D11(); init_glslang(); LoadD3DCompilerDynamic(); + if (!TestReinterpretShaders()) { + return false; + } + ShaderLanguage languages[] = { ShaderLanguage::HLSL_D3D9, ShaderLanguage::HLSL_D3D11, From f2e315b9a61bf7176948099f6929a5b63b378175 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 3 Nov 2020 15:44:57 +0100 Subject: [PATCH 2/9] More shadergen work Buildfix --- Common/GPU/Shader.cpp | 7 +-- Common/GPU/Shader.h | 28 +++++++++ Common/GPU/ShaderWriter.cpp | 53 +++++++++++++++-- Common/GPU/ShaderWriter.h | 4 +- Common/GPU/thin3d.cpp | 4 +- Common/GPU/thin3d.h | 23 +------- GPU/Common/FramebufferManagerCommon.cpp | 54 +++++++++++++++++ GPU/Common/FramebufferManagerCommon.h | 10 +++- GPU/Common/PresentationCommon.cpp | 16 ++--- GPU/Common/PresentationCommon.h | 2 +- GPU/Common/ReinterpretFramebuffer.cpp | 26 +++++++-- GPU/Common/ReinterpretFramebuffer.h | 4 ++ GPU/D3D11/FramebufferManagerD3D11.cpp | 37 ------------ GPU/D3D11/FramebufferManagerD3D11.h | 1 - GPU/Directx9/FramebufferManagerDX9.cpp | 77 ------------------------- GPU/Directx9/FramebufferManagerDX9.h | 1 - GPU/GLES/FramebufferManagerGLES.cpp | 21 ------- GPU/GLES/FramebufferManagerGLES.h | 2 - GPU/Vulkan/FramebufferManagerVulkan.cpp | 26 --------- GPU/Vulkan/FramebufferManagerVulkan.h | 1 - GPU/ge_constants.h | 3 + unittest/TestShaderGenerators.cpp | 17 ++++++ 22 files changed, 203 insertions(+), 214 deletions(-) diff --git a/Common/GPU/Shader.cpp b/Common/GPU/Shader.cpp index f4bfe96b69b4..b199f0a22722 100644 --- a/Common/GPU/Shader.cpp +++ b/Common/GPU/Shader.cpp @@ -31,13 +31,12 @@ void ShaderLanguageDesc::Init(ShaderLanguage lang) { fragColor0 = "fragColor0"; fragColor1 = "fragColor1"; texture = "texture"; - texelFetch = nullptr; - bitwiseOps = false; + texelFetch = "texelFetch"; + bitwiseOps = true; lastFragData = nullptr; - gles = false; + gles = true; forceMatrix4x4 = true; glslES30 = true; - bitwiseOps = true; texelFetch = "texelFetch"; break; case GLSL_VULKAN: diff --git a/Common/GPU/Shader.h b/Common/GPU/Shader.h index d574cd47382e..69fa5c9d6513 100644 --- a/Common/GPU/Shader.h +++ b/Common/GPU/Shader.h @@ -1,5 +1,9 @@ #pragma once +#include +#include +#include // for size_t + // GLSL_1xx and GLSL_3xx each cover a lot of sub variants. All the little quirks // that differ are covered in ShaderLanguageDesc. // Defined as a bitmask so stuff like GetSupportedShaderLanguages can return combinations. @@ -47,6 +51,30 @@ struct ShaderLanguageDesc { bool coefsFromBuffers = false; }; +enum class UniformType : int8_t { + FLOAT1, + FLOAT2, + FLOAT3, + FLOAT4, + MATRIX4X4, +}; + +// Describe uniforms intricately enough that we can support them on all backends. +// This will generate a uniform struct on the newer backends and individual uniforms on the older ones. +struct UniformDesc { + const char *name; // For GL + int16_t vertexReg; // For D3D + int16_t fragmentReg; // For D3D + UniformType type; + int16_t offset; + // TODO: Support array elements etc. +}; + +struct UniformBufferDesc { + size_t uniformBufferSize; + std::vector uniforms; +}; + // For passing error messages from shader compilation (and other critical issues) back to the host. // This can run on any thread - be aware! // TODO: See if we can find a less generic name for this. diff --git a/Common/GPU/ShaderWriter.cpp b/Common/GPU/ShaderWriter.cpp index ee29dbb83ffa..866b879e0d48 100644 --- a/Common/GPU/ShaderWriter.cpp +++ b/Common/GPU/ShaderWriter.cpp @@ -120,6 +120,7 @@ void ShaderWriter::Preamble(const char **gl_extensions, size_t num_gl_extensions if (lang_.gles) { C("precision highp float;\n"); } + C("#define gl_VertexIndex gl_VertexID\n"); break; } if (!lang_.gles) { @@ -134,18 +135,47 @@ void ShaderWriter::Preamble(const char **gl_extensions, size_t num_gl_extensions } void ShaderWriter::BeginVSMain(Slice inputs, Slice uniforms, Slice varyings) { + _assert_(this->stage_ == ShaderStage::Vertex); switch (lang_.shaderLanguage) { case HLSL_D3D11: case HLSL_D3D9: + { + C("struct VS_OUTPUT {\n"); + C(" vec4 pos : POSITION;\n"); + for (auto &varying : varyings) { + F(" %s %s : %s;\n", varying.type, varying.name, varying.semantic); + } + C("};\n"); + + C("VS_OUTPUT main( "); // 2 spaces for the D3D9 rewind + if (lang_.shaderLanguage == HLSL_D3D11) { + C("uint gl_VertexIndex : SV_VertexID, "); + } + Rewind(2); // Get rid of the last comma. + C(") {\n"); + C(" vec4 gl_Position;\n"); + for (auto &varying : varyings) { + F(" %s %s;\n", varying.type, varying.name); + } break; + } case GLSL_VULKAN: - default: + for (auto &varying : varyings) { + F("layout(location = %d) out %s %s; // %s\n", varying.index, varying.type, varying.name, varying.semantic); + } + C("void main() {\n"); + break; + default: // OpenGL + for (auto &varying : varyings) { + F("%s %s %s; // %s (%d)\n", lang_.varying_vs, varying.type, varying.name, varying.semantic, varying.index); + } C("void main() {\n"); break; } } void ShaderWriter::BeginFSMain(Slice uniforms, Slice varyings) { + _assert_(this->stage_ == ShaderStage::Fragment); switch (lang_.shaderLanguage) { case HLSL_D3D11: if (!uniforms.is_empty()) { @@ -186,7 +216,7 @@ void ShaderWriter::BeginFSMain(Slice uniforms, Slice var break; default: for (auto &varying : varyings) { - F("in %s %s; // %s\n", varying.type, varying.name, varying.semantic); + F("%s %s %s; // %s\n", lang_.varying_fs, varying.type, varying.name, varying.semantic); } if (!strcmp(lang_.fragColor0, "fragColor0")) { C("out vec4 fragColor0;\n"); @@ -196,11 +226,27 @@ void ShaderWriter::BeginFSMain(Slice uniforms, Slice var } } -void ShaderWriter::EndVSMain() { +void ShaderWriter::EndVSMain(Slice varyings) { + _assert_(this->stage_ == ShaderStage::Vertex); + switch (lang_.shaderLanguage) { + case HLSL_D3D11: + case HLSL_D3D9: + C(" VS_OUTPUT vs_out;\n"); + C(" vs_out.pos = gl_Position;\n"); + for (auto &varying : varyings) { + F(" vs_out.%s = %s;\n", varying.name, varying.name); + } + C(" return vs_out;\n"); + break; + case GLSL_VULKAN: + default: // OpenGL + break; + } C("}\n"); } void ShaderWriter::EndFSMain(const char *vec4_color_variable) { + _assert_(this->stage_ == ShaderStage::Fragment); switch (lang_.shaderLanguage) { case HLSL_D3D11: case HLSL_D3D9: @@ -254,4 +300,3 @@ ShaderWriter &ShaderWriter::SampleTexture2D(const char *texName, const char *sam } return *this; } - diff --git a/Common/GPU/ShaderWriter.h b/Common/GPU/ShaderWriter.h index 2e51dcc73035..cbb2b34aea4e 100644 --- a/Common/GPU/ShaderWriter.h +++ b/Common/GPU/ShaderWriter.h @@ -8,6 +8,8 @@ #include "GPU/GPUCommon.h" #include "Common/Data/Collections/Slice.h" +#include "Common/GPU/thin3d.h" + // Helps generate a shader compatible with all backends. // // Can use the uniform buffer support in thin3d. @@ -72,7 +74,7 @@ class ShaderWriter { void BeginFSMain(Slice uniforms, Slice varyings); // For simple shaders that output a single color, we can deal with this generically. - void EndVSMain(); + void EndVSMain(Slice varyings); void EndFSMain(const char *vec4_color_variable); diff --git a/Common/GPU/thin3d.cpp b/Common/GPU/thin3d.cpp index 3b320fd8424d..ce09c0356c2f 100644 --- a/Common/GPU/thin3d.cpp +++ b/Common/GPU/thin3d.cpp @@ -396,7 +396,7 @@ DrawContext::~DrawContext() { DestroyPresets(); } -// TODO: SSE/NEON +// TODO: Use the functions we have in Common/ColorConv.cpp. // Could also make C fake-simd for 64-bit, two 8888 pixels fit in a register :) void ConvertFromRGBA8888(uint8_t *dst, const uint8_t *src, uint32_t dstStride, uint32_t srcStride, uint32_t width, uint32_t height, DataFormat format) { // Must skip stride in the cases below. Some games pack data into the cracks, like MotoGP. @@ -455,7 +455,7 @@ void ConvertFromRGBA8888(uint8_t *dst, const uint8_t *src, uint32_t dstStride, u } } -// TODO: SSE/NEON +// TODO: Use the functions we have in Common/ColorConv.cpp. // Could also make C fake-simd for 64-bit, two 8888 pixels fit in a register :) void ConvertFromBGRA8888(uint8_t *dst, const uint8_t *src, uint32_t dstStride, uint32_t srcStride, uint32_t width, uint32_t height, DataFormat format) { // Must skip stride in the cases below. Some games pack data into the cracks, like MotoGP. diff --git a/Common/GPU/thin3d.h b/Common/GPU/thin3d.h index 055cb93e1ee5..d29ac9184226 100644 --- a/Common/GPU/thin3d.h +++ b/Common/GPU/thin3d.h @@ -385,28 +385,7 @@ struct InputLayoutDesc { class InputLayout : public RefCountedObject { }; -enum class UniformType : int8_t { - FLOAT1, - FLOAT2, - FLOAT3, - FLOAT4, - MATRIX4X4, -}; - -// For emulation of uniform buffers on D3D9/GL -struct UniformDesc { - const char *name; // For GL - int16_t vertexReg; // For D3D - int16_t fragmentReg; // For D3D - UniformType type; - int16_t offset; - // TODO: Support array elements etc. -}; - -struct UniformBufferDesc { - size_t uniformBufferSize; - std::vector uniforms; -}; +// Uniform types have moved to Shader.h. class ShaderModule : public RefCountedObject { public: diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index cbfb7a58a3b6..e50f387dc720 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -35,6 +35,7 @@ #include "GPU/Common/PostShader.h" #include "GPU/Common/PresentationCommon.h" #include "GPU/Common/TextureCacheCommon.h" +#include "GPU/Common/ReinterpretFramebuffer.h" #include "GPU/Debugger/Record.h" #include "GPU/Debugger/Stepping.h" #include "GPU/GPUInterface.h" @@ -516,6 +517,59 @@ void FramebufferManagerCommon::NotifyRenderFramebufferUpdated(VirtualFramebuffer } } +void FramebufferManagerCommon::ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat oldFormat) { + if (!useBufferedRendering_ || !vfb->fbo) { + return; + } + GEBufferFormat newFormat = vfb->format; + + _assert_(newFormat != oldFormat); + + // We only reinterpret between 16 - bit formats, for now. + if (!IsGeBufferFormat16BitColor(oldFormat) || !IsGeBufferFormat16BitColor(newFormat)) { + // 16->32 and 32->16 will require some more specialized shaders. + return; + } + + if (!reinterpretVS_) { + char *buffer = new char[4000]; + const ShaderLanguageDesc &desc = draw_->GetShaderLanguageDesc(); + GenerateReinterpretVertexShader(buffer, desc); + reinterpretVS_ = draw_->CreateShaderModule(ShaderStage::Vertex, desc.shaderLanguage, (const uint8_t *)buffer, strlen(buffer), "reinterpret_vs"); + delete[] buffer; + } + + // See if we need to create a new pipeline. + if (!reinterpretFromTo_[(int)oldFormat][(int)newFormat]) { + std::vector shaders; + + using namespace Draw; + Draw::PipelineDesc desc{}; + // We use a "fullscreen triangle". + InputLayoutDesc inputDesc{}; // No inputs, we generate it in the shader. + InputLayout *inputLayout = draw_->CreateInputLayout(inputDesc); + DepthStencilState *depth = draw_->CreateDepthStencilState({ false, false, Comparison::LESS }); + BlendState *blendstateOff = draw_->CreateBlendState({ false, 0xF }); + RasterState *rasterNoCull = draw_->CreateRasterState({}); + + // No uniforms for these, only a single texture input. + PipelineDesc pipelineDesc{ Primitive::TRIANGLE_LIST, shaders, inputLayout, depth, blendstateOff, rasterNoCull, nullptr }; + Pipeline *pipeline = draw_->CreateGraphicsPipeline(pipelineDesc); + + inputLayout->Release(); + depth->Release(); + blendstateOff->Release(); + rasterNoCull->Release(); + } + + // Copy to a temp framebuffer. + Draw::Framebuffer *temp = GetTempFBO(TempFBO::COPY, vfb->renderWidth, vfb->renderHeight); + + shaderManager_->DirtyLastShader(); + textureCache_->ForgetLastTexture(); + gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE); +} + void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb, bool isClearingDepth) { if (ShouldDownloadFramebuffer(vfb) && !vfb->memoryUpdated) { ReadFramebufferToMemory(vfb, 0, 0, vfb->width, vfb->height); diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index 26c7e0232360..146a4afb11a5 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -189,7 +189,7 @@ class TextureCacheCommon; class FramebufferManagerCommon { public: - explicit FramebufferManagerCommon(Draw::DrawContext *draw); + FramebufferManagerCommon(Draw::DrawContext *draw); virtual ~FramebufferManagerCommon(); virtual void Init(); @@ -344,7 +344,7 @@ class FramebufferManagerCommon { void NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged); void NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb, bool isClearingDepth); - virtual void ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old) = 0; + void ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old); void BlitFramebufferDepth(VirtualFramebuffer *src, VirtualFramebuffer *dst); void ResizeFramebufFBO(VirtualFramebuffer *vfb, int w, int h, bool force = false, bool skipCopy = false); @@ -428,4 +428,10 @@ class FramebufferManagerCommon { FBO_OLD_AGE = 5, FBO_OLD_USAGE_FLAG = 15, }; + + // Thin3D stuff for reinterpreting image data between the various 16-bit formats. + // Safe, not optimal - there might be input attachment tricks, etc, but we can't use them + // since we don't want N different implementations. + Draw::Pipeline *reinterpretFromTo_[3][3]; + Draw::ShaderModule *reinterpretVS_ = nullptr; }; diff --git a/GPU/Common/PresentationCommon.cpp b/GPU/Common/PresentationCommon.cpp index 58233a6d2981..d60dd3447c30 100644 --- a/GPU/Common/PresentationCommon.cpp +++ b/GPU/Common/PresentationCommon.cpp @@ -253,13 +253,13 @@ bool PresentationCommon::BuildPostShader(const ShaderInfo *shaderInfo, const Sha return false; } - Draw::UniformBufferDesc postShaderDesc{ sizeof(PostShaderUniforms), { - { "gl_HalfPixel", 0, -1, Draw::UniformType::FLOAT4, offsetof(PostShaderUniforms, gl_HalfPixel) }, - { "u_texelDelta", 1, 1, Draw::UniformType::FLOAT2, offsetof(PostShaderUniforms, texelDelta) }, - { "u_pixelDelta", 2, 2, Draw::UniformType::FLOAT2, offsetof(PostShaderUniforms, pixelDelta) }, - { "u_time", 3, 3, Draw::UniformType::FLOAT4, offsetof(PostShaderUniforms, time) }, - { "u_setting", 4, 4, Draw::UniformType::FLOAT4, offsetof(PostShaderUniforms, setting) }, - { "u_video", 5, 5, Draw::UniformType::FLOAT1, offsetof(PostShaderUniforms, video) }, + UniformBufferDesc postShaderDesc{ sizeof(PostShaderUniforms), { + { "gl_HalfPixel", 0, -1, UniformType::FLOAT4, offsetof(PostShaderUniforms, gl_HalfPixel) }, + { "u_texelDelta", 1, 1, UniformType::FLOAT2, offsetof(PostShaderUniforms, texelDelta) }, + { "u_pixelDelta", 2, 2, UniformType::FLOAT2, offsetof(PostShaderUniforms, pixelDelta) }, + { "u_time", 3, 3, UniformType::FLOAT4, offsetof(PostShaderUniforms, time) }, + { "u_setting", 4, 4, UniformType::FLOAT4, offsetof(PostShaderUniforms, setting) }, + { "u_video", 5, 5, UniformType::FLOAT1, offsetof(PostShaderUniforms, video) }, } }; Draw::Pipeline *pipeline = CreatePipeline({ vs, fs }, true, &postShaderDesc); if (!pipeline) @@ -366,7 +366,7 @@ void PresentationCommon::DeviceRestore(Draw::DrawContext *draw) { CreateDeviceObjects(); } -Draw::Pipeline *PresentationCommon::CreatePipeline(std::vector shaders, bool postShader, const Draw::UniformBufferDesc *uniformDesc) { +Draw::Pipeline *PresentationCommon::CreatePipeline(std::vector shaders, bool postShader, const UniformBufferDesc *uniformDesc) { using namespace Draw; Semantic pos = SEM_POSITION; diff --git a/GPU/Common/PresentationCommon.h b/GPU/Common/PresentationCommon.h index 959e516d5f57..c3f56699f983 100644 --- a/GPU/Common/PresentationCommon.h +++ b/GPU/Common/PresentationCommon.h @@ -110,7 +110,7 @@ class PresentationCommon { void ShowPostShaderError(const std::string &errorString); Draw::ShaderModule *CompileShaderModule(ShaderStage stage, ShaderLanguage lang, const std::string &src, std::string *errorString); - Draw::Pipeline *CreatePipeline(std::vector shaders, bool postShader, const Draw::UniformBufferDesc *uniformDesc); + Draw::Pipeline *CreatePipeline(std::vector shaders, bool postShader, const UniformBufferDesc *uniformDesc); bool BuildPostShader(const ShaderInfo *shaderInfo, const ShaderInfo *next); bool AllocateFramebuffer(int w, int h); diff --git a/GPU/Common/ReinterpretFramebuffer.cpp b/GPU/Common/ReinterpretFramebuffer.cpp index cf039c0aa6c2..f915d99c79a0 100644 --- a/GPU/Common/ReinterpretFramebuffer.cpp +++ b/GPU/Common/ReinterpretFramebuffer.cpp @@ -1,8 +1,13 @@ #include +#include "Common/GPU/Shader.h" #include "Common/GPU/ShaderWriter.h" #include "GPU/Common/ReinterpretFramebuffer.h" +static const VaryingDef varyings[1] = { + { "vec2", "v_texcoord", "TEXCOORD0" }, +}; + // TODO: We could have an option to preserve any extra color precision. But gonna start without it. // Requires full size integer math. bool GenerateReinterpretFragmentShader(char *buffer, GEBufferFormat from, GEBufferFormat to, const ShaderLanguageDesc &lang) { @@ -15,10 +20,6 @@ bool GenerateReinterpretFragmentShader(char *buffer, GEBufferFormat from, GEBuff writer.DeclareSampler2D("samp", 0); writer.DeclareTexture2D("tex", 0); - static const VaryingDef varyings[1] = { - { "vec4", "v_texcoord", "TEXCOORD0" }, - }; - writer.BeginFSMain(Slice::empty(), varyings); writer.C(" vec4 val = ").SampleTexture2D("tex", "samp", "v_texcoord.xy").C(";\n"); @@ -62,3 +63,20 @@ bool GenerateReinterpretFragmentShader(char *buffer, GEBufferFormat from, GEBuff writer.EndFSMain("outColor"); return true; } + +bool GenerateReinterpretVertexShader(char *buffer, const ShaderLanguageDesc &lang) { + if (!lang.bitwiseOps) { + return false; + } + ShaderWriter writer(buffer, lang, ShaderStage::Vertex, nullptr, 0); + + writer.BeginVSMain(Slice::empty(), Slice::empty(), varyings); + + writer.C(" float x = -1.0 + float((gl_VertexIndex & 1) << 2);\n"); + writer.C(" float y = -1.0 + float((gl_VertexIndex & 2) << 1);\n"); + writer.C(" v_texcoord = (vec2(x, y) + vec2(1.0, 1.0)) * 0.5;\n"); + writer.C(" gl_Position = vec4(x, y, 0.0, 1.0);\n"); + + writer.EndVSMain(varyings); + return true; +} diff --git a/GPU/Common/ReinterpretFramebuffer.h b/GPU/Common/ReinterpretFramebuffer.h index f10fb9e3a33b..14b8b042a990 100644 --- a/GPU/Common/ReinterpretFramebuffer.h +++ b/GPU/Common/ReinterpretFramebuffer.h @@ -6,3 +6,7 @@ #include "Common/GPU/ShaderWriter.h" bool GenerateReinterpretFragmentShader(char *buffer, GEBufferFormat from, GEBufferFormat to, const ShaderLanguageDesc &lang); + +// Just a single one. Can probably be shared with a lot of similar use cases. +// Generates the coordinates for a fullscreen triangle. +bool GenerateReinterpretVertexShader(char *buffer, const ShaderLanguageDesc &lang); diff --git a/GPU/D3D11/FramebufferManagerD3D11.cpp b/GPU/D3D11/FramebufferManagerD3D11.cpp index 2538f65edb44..97e561555150 100644 --- a/GPU/D3D11/FramebufferManagerD3D11.cpp +++ b/GPU/D3D11/FramebufferManagerD3D11.cpp @@ -252,43 +252,6 @@ void FramebufferManagerD3D11::Bind2DShader() { context_->VSSetShader(quadVertexShader_, 0, 0); } -void FramebufferManagerD3D11::ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old) { - if (!useBufferedRendering_ || !vfb->fbo) { - return; - } - - // Technically, we should at this point re-interpret the bytes of the old format to the new. - // That might get tricky, and could cause unnecessary slowness in some games. - // For now, we just clear alpha/stencil from 565, which fixes shadow issues in Kingdom Hearts. - // (it uses 565 to write zeros to the buffer, then 4444 to actually render the shadow.) - // - // The best way to do this may ultimately be to create a new FBO (combine with any resize?) - // and blit with a shader to that, then replace the FBO on vfb. Stencil would still be complex - // to exactly reproduce in 4444 and 8888 formats. - if (old == GE_FORMAT_565) { - draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::CLEAR }, "ReformatFramebuffer"); - - context_->OMSetDepthStencilState(stockD3D11.depthStencilDisabled, 0xFF); - context_->OMSetBlendState(stockD3D11.blendStateDisabledWithColorMask[D3D11_COLOR_WRITE_ENABLE_ALPHA], nullptr, 0xFFFFFFFF); - context_->RSSetState(stockD3D11.rasterStateNoCull); - context_->IASetInputLayout(quadInputLayout_); - context_->PSSetShader(quadPixelShader_, nullptr, 0); - context_->VSSetShader(quadVertexShader_, nullptr, 0); - context_->IASetVertexBuffers(0, 1, &fsQuadBuffer_, &quadStride_, &quadOffset_); - context_->PSSetSamplers(0, 1, &stockD3D11.samplerPoint2DClamp); - context_->PSSetShaderResources(0, 1, &nullTextureView_); - shaderManagerD3D11_->DirtyLastShader(); - D3D11_VIEWPORT vp{ 0.0f, 0.0f, (float)vfb->renderWidth, (float)vfb->renderHeight, 0.0f, 1.0f }; - context_->RSSetViewports(1, &vp); - context_->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); - context_->Draw(4, 0); - - textureCache_->ForgetLastTexture(); - - gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE); - } -} - static void CopyPixelDepthOnly(u32 *dstp, const u32 *srcp, size_t c) { size_t x = 0; diff --git a/GPU/D3D11/FramebufferManagerD3D11.h b/GPU/D3D11/FramebufferManagerD3D11.h index ec8f1b00e02b..dc2bc15e944c 100644 --- a/GPU/D3D11/FramebufferManagerD3D11.h +++ b/GPU/D3D11/FramebufferManagerD3D11.h @@ -42,7 +42,6 @@ class FramebufferManagerD3D11 : public FramebufferManagerCommon { void DrawActiveTexture(float x, float y, float w, float h, float destW, float destH, float u0, float v0, float u1, float v1, int uvRotation, int flags) override; void EndFrame(); - void ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old) override; virtual bool NotifyStencilUpload(u32 addr, int size, StencilUpload flags = StencilUpload::NEEDS_CLEAR) override; diff --git a/GPU/Directx9/FramebufferManagerDX9.cpp b/GPU/Directx9/FramebufferManagerDX9.cpp index 4f0aafb78d4e..24c8799dc5e1 100644 --- a/GPU/Directx9/FramebufferManagerDX9.cpp +++ b/GPU/Directx9/FramebufferManagerDX9.cpp @@ -228,83 +228,6 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = { device_->SetVertexShader(pFramebufferVertexShader); } - void FramebufferManagerDX9::ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old) { - if (!useBufferedRendering_ || !vfb->fbo) { - return; - } - - // Technically, we should at this point re-interpret the bytes of the old format to the new. - // That might get tricky, and could cause unnecessary slowness in some games. - // For now, we just clear alpha/stencil from 565, which fixes shadow issues in Kingdom Hearts. - // (it uses 565 to write zeros to the buffer, then 4444 to actually render the shadow.) - // - // The best way to do this may ultimately be to create a new FBO (combine with any resize?) - // and blit with a shader to that, then replace the FBO on vfb. Stencil would still be complex - // to exactly reproduce in 4444 and 8888 formats. - - if (old == GE_FORMAT_565) { - draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::CLEAR }, "ReformatFramebuffer"); - - dxstate.scissorTest.disable(); - dxstate.depthWrite.set(FALSE); - dxstate.colorMask.set(false, false, false, true); - dxstate.stencilFunc.set(D3DCMP_ALWAYS, 0, 0); - dxstate.stencilMask.set(0xFF); - gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_TEXTURE_PARAMS | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE); - - float coord[20] = { - -1.0f,-1.0f,0, 0,0, - 1.0f,-1.0f,0, 0,0, - 1.0f,1.0f,0, 0,0, - -1.0f,1.0f,0, 0,0, - }; - - dxstate.cullMode.set(false, false); - device_->SetVertexDeclaration(pFramebufferVertexDecl); - device_->SetPixelShader(pFramebufferPixelShader); - device_->SetVertexShader(pFramebufferVertexShader); - shaderManagerDX9_->DirtyLastShader(); - device_->SetTexture(0, nullTex_); - - D3DVIEWPORT9 vp{ 0, 0, (DWORD)vfb->renderWidth, (DWORD)vfb->renderHeight, 0.0f, 1.0f }; - device_->SetViewport(&vp); - - // This should clear stencil and alpha without changing the other colors. - HRESULT hr = device_->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, coord, 5 * sizeof(float)); - if (FAILED(hr)) { - ERROR_LOG_REPORT(G3D, "ReformatFramebufferFrom() failed: %08x", hr); - } - dxstate.viewport.restore(); - - textureCache_->ForgetLastTexture(); - } - } - - static void CopyPixelDepthOnly(u32 *dstp, const u32 *srcp, size_t c) { - size_t x = 0; - -#ifdef _M_SSE - size_t sseSize = (c / 4) * 4; - const __m128i srcMask = _mm_set1_epi32(0x00FFFFFF); - const __m128i dstMask = _mm_set1_epi32(0xFF000000); - __m128i *dst = (__m128i *)dstp; - const __m128i *src = (const __m128i *)srcp; - - for (; x < sseSize; x += 4) { - const __m128i bits24 = _mm_and_si128(_mm_load_si128(src), srcMask); - const __m128i bits8 = _mm_and_si128(_mm_load_si128(dst), dstMask); - _mm_store_si128(dst, _mm_or_si128(bits24, bits8)); - dst++; - src++; - } -#endif - - // Copy the remaining pixels that didn't fit in SSE. - for (; x < c; ++x) { - memcpy(dstp + x, srcp + x, 3); - } - } - LPDIRECT3DSURFACE9 FramebufferManagerDX9::GetOffscreenSurface(LPDIRECT3DSURFACE9 similarSurface, VirtualFramebuffer *vfb) { D3DSURFACE_DESC desc = {}; HRESULT hr = similarSurface->GetDesc(&desc); diff --git a/GPU/Directx9/FramebufferManagerDX9.h b/GPU/Directx9/FramebufferManagerDX9.h index a35a154265e2..ac1dbaa484c7 100644 --- a/GPU/Directx9/FramebufferManagerDX9.h +++ b/GPU/Directx9/FramebufferManagerDX9.h @@ -47,7 +47,6 @@ class FramebufferManagerDX9 : public FramebufferManagerCommon { void DestroyAllFBOs(); void EndFrame(); - void ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old) override; virtual bool NotifyStencilUpload(u32 addr, int size, StencilUpload flags = StencilUpload::NEEDS_CLEAR) override; diff --git a/GPU/GLES/FramebufferManagerGLES.cpp b/GPU/GLES/FramebufferManagerGLES.cpp index befbe06177a1..a659377d6687 100644 --- a/GPU/GLES/FramebufferManagerGLES.cpp +++ b/GPU/GLES/FramebufferManagerGLES.cpp @@ -242,27 +242,6 @@ void FramebufferManagerGLES::DrawActiveTexture(float x, float y, float w, float } } -void FramebufferManagerGLES::ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old) { - if (!useBufferedRendering_ || !vfb->fbo) { - return; - } - - // Technically, we should at this point re-interpret the bytes of the old format to the new. - // That might get tricky, and could cause unnecessary slowness in some games. - // For now, we just clear alpha/stencil from 565, which fixes shadow issues in Kingdom Hearts. - // (it uses 565 to write zeros to the buffer, then 4444 to actually render the shadow.) - // - // The best way to do this may ultimately be to create a new FBO (combine with any resize?) - // and blit with a shader to that, then replace the FBO on vfb. Stencil would still be complex - // to exactly reproduce in 4444 and 8888 formats. - - if (old == GE_FORMAT_565) { - // Clear alpha and stencil. - draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::CLEAR }, "ReformatFramebuffer"); - render_->Clear(0, 0.0f, 0, GL_COLOR_BUFFER_BIT, 0x8, 0, 0, 0, 0); - } -} - void FramebufferManagerGLES::UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) { _assert_msg_(nvfb->fbo, "Expecting a valid nvfb in UpdateDownloadTempBuffer"); diff --git a/GPU/GLES/FramebufferManagerGLES.h b/GPU/GLES/FramebufferManagerGLES.h index 56ad6aa30789..72c91f4b9ba0 100644 --- a/GPU/GLES/FramebufferManagerGLES.h +++ b/GPU/GLES/FramebufferManagerGLES.h @@ -50,8 +50,6 @@ class FramebufferManagerGLES : public FramebufferManagerCommon { void DeviceLost() override; void DeviceRestore(Draw::DrawContext *draw) override; - void ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old) override; - bool NotifyStencilUpload(u32 addr, int size, StencilUpload flags = StencilUpload::NEEDS_CLEAR) override; bool GetOutputFramebuffer(GPUDebugBuffer &buffer) override; diff --git a/GPU/Vulkan/FramebufferManagerVulkan.cpp b/GPU/Vulkan/FramebufferManagerVulkan.cpp index a4e5873f67f0..c293c1b5b9a8 100644 --- a/GPU/Vulkan/FramebufferManagerVulkan.cpp +++ b/GPU/Vulkan/FramebufferManagerVulkan.cpp @@ -242,32 +242,6 @@ int FramebufferManagerVulkan::GetLineWidth() { } } -// This also binds vfb as the current render target. -void FramebufferManagerVulkan::ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old) { - if (!useBufferedRendering_ || !vfb->fbo) { - return; - } - - // Technically, we should at this point re-interpret the bytes of the old format to the new. - // That might get tricky, and could cause unnecessary slowness in some games. - // For now, we just clear alpha/stencil from 565, which fixes shadow issues in Kingdom Hearts. - // (it uses 565 to write zeros to the buffer, then 4444 to actually render the shadow.) - // - // The best way to do this may ultimately be to create a new FBO (combine with any resize?) - // and blit with a shader to that, then replace the FBO on vfb. Stencil would still be complex - // to exactly reproduce in 4444 and 8888 formats. - - if (old == GE_FORMAT_565) { - // We have to bind here instead of clear, since it can be that no framebuffer is bound. - // The backend can sometimes directly optimize it to a clear. - draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::KEEP, Draw::RPAction::CLEAR }, "ReformatFramebuffer"); - - // Need to dirty anything that has command buffer dynamic state, in case we started a new pass above. - // Should find a way to feed that information back, maybe... Or simply correct the issue in the rendermanager. - gstate_c.Dirty(DIRTY_DEPTHSTENCIL_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_BLEND_STATE); - } -} - void FramebufferManagerVulkan::UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) { // Nothing to do here. } diff --git a/GPU/Vulkan/FramebufferManagerVulkan.h b/GPU/Vulkan/FramebufferManagerVulkan.h index aaa1d48297f9..d8b20e3826b3 100644 --- a/GPU/Vulkan/FramebufferManagerVulkan.h +++ b/GPU/Vulkan/FramebufferManagerVulkan.h @@ -53,7 +53,6 @@ class FramebufferManagerVulkan : public FramebufferManagerCommon { void DeviceRestore(Draw::DrawContext *draw) override; int GetLineWidth(); - void ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old) override; bool NotifyStencilUpload(u32 addr, int size, StencilUpload flags = StencilUpload::NEEDS_CLEAR) override; diff --git a/GPU/ge_constants.h b/GPU/ge_constants.h index 5facb4ad0523..4f4c71154844 100644 --- a/GPU/ge_constants.h +++ b/GPU/ge_constants.h @@ -287,6 +287,9 @@ enum GEBufferFormat }; const char *GeBufferFormatToString(GEBufferFormat fmt); +inline bool IsGeBufferFormat16BitColor(GEBufferFormat fmt) { + return (int)fmt < 3; +} #define GE_VTYPE_TRANSFORM (0<<23) #define GE_VTYPE_THROUGH (1<<23) diff --git a/unittest/TestShaderGenerators.cpp b/unittest/TestShaderGenerators.cpp index 35dcde11043e..46fa21730578 100644 --- a/unittest/TestShaderGenerators.cpp +++ b/unittest/TestShaderGenerators.cpp @@ -169,6 +169,23 @@ bool TestReinterpretShaders() { // Generate all despite failures - it's only 6. bool failed = false; + for (int k = 0; k < ARRAY_SIZE(languages); k++) { + ShaderLanguageDesc desc(languages[k]); + if (!GenerateReinterpretVertexShader(buffer, desc)) { + printf("Failed!\n%s\n", buffer); + failed = true; + } else { + std::string errorMessage; + if (!TestCompileShader(buffer, languages[k], true, &errorMessage)) { + printf("Error compiling fragment shader:\n\n%s\n\n%s\n", LineNumberString(buffer).c_str(), errorMessage.c_str()); + failed = true; + return false; + } else { + printf("===\n%s\n===\n", buffer); + } + } + } + for (int k = 0; k < ARRAY_SIZE(languages); k++) { printf("=== %s ===\n\n", ShaderLanguageToString(languages[k])); From 1ccc8c129c708935266311712253f0129780c967 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 6 Nov 2020 09:55:16 +0100 Subject: [PATCH 3/9] Reinterpret code runs, no idea if it works --- Common/GPU/D3D11/thin3d_d3d11.cpp | 2 +- Common/GPU/OpenGL/thin3d_gl.cpp | 29 ++++++++++---- GPU/Common/FramebufferManagerCommon.cpp | 51 ++++++++++++++++++++----- GPU/Common/FramebufferManagerCommon.h | 3 +- 4 files changed, 66 insertions(+), 19 deletions(-) diff --git a/Common/GPU/D3D11/thin3d_d3d11.cpp b/Common/GPU/D3D11/thin3d_d3d11.cpp index 21db0ab5cb9f..3605c1725dc5 100644 --- a/Common/GPU/D3D11/thin3d_d3d11.cpp +++ b/Common/GPU/D3D11/thin3d_d3d11.cpp @@ -680,7 +680,7 @@ class D3D11Pipeline : public Pipeline { if (dynamicUniforms) dynamicUniforms->Release(); } - bool RequiresBuffer() { + bool RequiresBuffer() override { return true; } diff --git a/Common/GPU/OpenGL/thin3d_gl.cpp b/Common/GPU/OpenGL/thin3d_gl.cpp index 4e9b5b4f9232..e789c987df3c 100644 --- a/Common/GPU/OpenGL/thin3d_gl.cpp +++ b/Common/GPU/OpenGL/thin3d_gl.cpp @@ -307,7 +307,7 @@ class OpenGLPipeline : public Pipeline { bool LinkShaders(); bool RequiresBuffer() override { - return inputLayout->RequiresBuffer(); + return inputLayout && inputLayout->RequiresBuffer(); } GLuint prim = 0; @@ -1051,7 +1051,7 @@ Pipeline *OpenGLContext::CreateGraphicsPipeline(const PipelineDesc &desc) { ERROR_LOG(G3D, "Invalid primitive type"); return nullptr; } - if (!desc.depthStencil || !desc.blend || !desc.raster || !desc.inputLayout) { + if (!desc.depthStencil || !desc.blend || !desc.raster) { ERROR_LOG(G3D, "Incomplete prim desciption"); return nullptr; } @@ -1081,7 +1081,9 @@ Pipeline *OpenGLContext::CreateGraphicsPipeline(const PipelineDesc &desc) { pipeline->depthStencil->AddRef(); pipeline->blend->AddRef(); pipeline->raster->AddRef(); - pipeline->inputLayout->AddRef(); + if (pipeline->inputLayout) { + pipeline->inputLayout->AddRef(); + } return pipeline; } else { ERROR_LOG(G3D, "Failed to create pipeline - shaders failed to link"); @@ -1219,7 +1221,9 @@ void OpenGLContext::UpdateDynamicUniformBuffer(const void *ub, size_t size) { void OpenGLContext::Draw(int vertexCount, int offset) { _dbg_assert_msg_(curVBuffers_[0], "Can't call Draw without a vertex buffer"); ApplySamplers(); - renderManager_.BindVertexBuffer(curPipeline_->inputLayout->inputLayout_, curVBuffers_[0]->buffer_, curVBufferOffsets_[0]); + if (curPipeline_->inputLayout) { + renderManager_.BindVertexBuffer(curPipeline_->inputLayout->inputLayout_, curVBuffers_[0]->buffer_, curVBufferOffsets_[0]); + } renderManager_.Draw(curPipeline_->prim, offset, vertexCount); } @@ -1227,12 +1231,15 @@ void OpenGLContext::DrawIndexed(int vertexCount, int offset) { _dbg_assert_msg_(curVBuffers_[0], "Can't call DrawIndexed without a vertex buffer"); _dbg_assert_msg_(curIBuffer_, "Can't call DrawIndexed without an index buffer"); ApplySamplers(); - renderManager_.BindVertexBuffer(curPipeline_->inputLayout->inputLayout_, curVBuffers_[0]->buffer_, curVBufferOffsets_[0]); + if (curPipeline_->inputLayout) { + renderManager_.BindVertexBuffer(curPipeline_->inputLayout->inputLayout_, curVBuffers_[0]->buffer_, curVBufferOffsets_[0]); + } renderManager_.BindIndexBuffer(curIBuffer_->buffer_); renderManager_.DrawIndexed(curPipeline_->prim, vertexCount, GL_UNSIGNED_SHORT, (void *)((intptr_t)curIBufferOffset_ + offset * sizeof(uint32_t))); } void OpenGLContext::DrawUP(const void *vdata, int vertexCount) { + _assert_(curPipeline_->inputLayout); int stride = curPipeline_->inputLayout->stride; size_t dataSize = stride * vertexCount; @@ -1242,7 +1249,9 @@ void OpenGLContext::DrawUP(const void *vdata, int vertexCount) { size_t offset = frameData.push->Push(vdata, dataSize, &buf); ApplySamplers(); - renderManager_.BindVertexBuffer(curPipeline_->inputLayout->inputLayout_, buf, offset); + if (curPipeline_->inputLayout) { + renderManager_.BindVertexBuffer(curPipeline_->inputLayout->inputLayout_, buf, offset); + } renderManager_.Draw(curPipeline_->prim, 0, vertexCount); } @@ -1273,7 +1282,7 @@ OpenGLInputLayout::~OpenGLInputLayout() { void OpenGLInputLayout::Compile(const InputLayoutDesc &desc) { // TODO: This is only accurate if there's only one stream. But whatever, for now we // never use multiple streams anyway. - stride = (GLsizei)desc.bindings[0].stride; + stride = desc.bindings.empty() ? 0 : (GLsizei)desc.bindings[0].stride; std::vector entries; for (auto &attr : desc.attributes) { @@ -1310,7 +1319,11 @@ void OpenGLInputLayout::Compile(const InputLayoutDesc &desc) { entries.push_back(entry); } - inputLayout_ = render_->CreateInputLayout(entries); + if (!entries.empty()) { + inputLayout_ = render_->CreateInputLayout(entries); + } else { + inputLayout_ = nullptr; + } } Framebuffer *OpenGLContext::CreateFramebuffer(const FramebufferDesc &desc) { diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index e50f387dc720..a21585b138af 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -533,40 +533,73 @@ void FramebufferManagerCommon::ReformatFramebufferFrom(VirtualFramebuffer *vfb, if (!reinterpretVS_) { char *buffer = new char[4000]; - const ShaderLanguageDesc &desc = draw_->GetShaderLanguageDesc(); - GenerateReinterpretVertexShader(buffer, desc); - reinterpretVS_ = draw_->CreateShaderModule(ShaderStage::Vertex, desc.shaderLanguage, (const uint8_t *)buffer, strlen(buffer), "reinterpret_vs"); + const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc(); + GenerateReinterpretVertexShader(buffer, shaderLanguageDesc); + reinterpretVS_ = draw_->CreateShaderModule(ShaderStage::Vertex, shaderLanguageDesc.shaderLanguage, (const uint8_t *)buffer, strlen(buffer), "reinterpret_vs"); + _assert_(reinterpretVS_); delete[] buffer; } + if (!reinterpretSampler_) { + Draw::SamplerStateDesc samplerDesc{}; + samplerDesc.magFilter = Draw::TextureFilter::LINEAR; + samplerDesc.minFilter = Draw::TextureFilter::LINEAR; + reinterpretSampler_ = draw_->CreateSamplerState(samplerDesc); + } + // See if we need to create a new pipeline. - if (!reinterpretFromTo_[(int)oldFormat][(int)newFormat]) { + + Draw::Pipeline *pipeline = reinterpretFromTo_[(int)oldFormat][(int)newFormat]; + if (!pipeline) { + char *buffer = new char[4000]; + const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc(); + GenerateReinterpretFragmentShader(buffer, oldFormat, newFormat, shaderLanguageDesc); + Draw::ShaderModule *reinterpretFS = draw_->CreateShaderModule(ShaderStage::Fragment, shaderLanguageDesc.shaderLanguage, (const uint8_t *)buffer, strlen(buffer), "reinterpret_fs"); + _assert_(reinterpretFS); + delete[] buffer; + std::vector shaders; + shaders.push_back(reinterpretVS_); + shaders.push_back(reinterpretFS); using namespace Draw; Draw::PipelineDesc desc{}; // We use a "fullscreen triangle". - InputLayoutDesc inputDesc{}; // No inputs, we generate it in the shader. - InputLayout *inputLayout = draw_->CreateInputLayout(inputDesc); + // TODO: clear the stencil buffer. Hard to actually initialize it with the new alpha, though possible - let's see if + // we need it. DepthStencilState *depth = draw_->CreateDepthStencilState({ false, false, Comparison::LESS }); BlendState *blendstateOff = draw_->CreateBlendState({ false, 0xF }); RasterState *rasterNoCull = draw_->CreateRasterState({}); // No uniforms for these, only a single texture input. - PipelineDesc pipelineDesc{ Primitive::TRIANGLE_LIST, shaders, inputLayout, depth, blendstateOff, rasterNoCull, nullptr }; - Pipeline *pipeline = draw_->CreateGraphicsPipeline(pipelineDesc); + PipelineDesc pipelineDesc{ Primitive::TRIANGLE_LIST, shaders, nullptr, depth, blendstateOff, rasterNoCull, nullptr }; + pipeline = draw_->CreateGraphicsPipeline(pipelineDesc); + _assert_(pipeline != nullptr); + reinterpretFromTo_[(int)oldFormat][(int)newFormat] = pipeline; - inputLayout->Release(); depth->Release(); blendstateOff->Release(); rasterNoCull->Release(); + reinterpretFS->Release(); } // Copy to a temp framebuffer. Draw::Framebuffer *temp = GetTempFBO(TempFBO::COPY, vfb->renderWidth, vfb->renderHeight); + draw_->CopyFramebufferImage(vfb->fbo, 0, 0, 0, 0, temp, 0, 0, 0, 0, vfb->renderWidth, vfb->renderHeight, 1, Draw::FBChannel::FB_COLOR_BIT, "reinterpret_prep"); + draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }, "reinterpret"); + draw_->BindPipeline(pipeline); + draw_->BindFramebufferAsTexture(temp, 0, Draw::FBChannel::FB_COLOR_BIT, 0); + draw_->BindSamplerStates(0, 1, &reinterpretSampler_); + draw_->SetScissorRect(0, 0, vfb->renderWidth, vfb->renderHeight); + Draw::Viewport vp = Draw::Viewport{ 0.0f, 0.0f, (float)vfb->renderWidth, (float)vfb->renderHeight, 0.0f, 1.0f }; + draw_->SetViewports(1, &vp); + draw_->Draw(3, 0); + draw_->InvalidateCachedState(); + shaderManager_->DirtyLastShader(); textureCache_->ForgetLastTexture(); + gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE); } diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index 146a4afb11a5..48f8d384fe12 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -432,6 +432,7 @@ class FramebufferManagerCommon { // Thin3D stuff for reinterpreting image data between the various 16-bit formats. // Safe, not optimal - there might be input attachment tricks, etc, but we can't use them // since we don't want N different implementations. - Draw::Pipeline *reinterpretFromTo_[3][3]; + Draw::Pipeline *reinterpretFromTo_[3][3]{}; Draw::ShaderModule *reinterpretVS_ = nullptr; + Draw::SamplerState *reinterpretSampler_ = nullptr; }; From 981d0a2abe6baa7604933d1c97c9fa501caf415d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 6 Nov 2020 11:54:57 +0100 Subject: [PATCH 4/9] Reinterpret the data when binding a framebuffer with a different 16-bit format. Car reflections in Outrun are better (see #11358) but have some blue/yellow color garbage that will need a different fix. --- GPU/Common/FramebufferManagerCommon.cpp | 2 +- GPU/Common/FramebufferManagerCommon.h | 4 +++- GPU/Common/TextureCacheCommon.cpp | 26 +++++++++++++++++---- GPU/Common/TextureCacheCommon.h | 2 ++ GPU/ge_constants.h | 31 +++++++++++++++++++++---- 5 files changed, 53 insertions(+), 12 deletions(-) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index a21585b138af..56a358512486 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -584,7 +584,7 @@ void FramebufferManagerCommon::ReformatFramebufferFrom(VirtualFramebuffer *vfb, } // Copy to a temp framebuffer. - Draw::Framebuffer *temp = GetTempFBO(TempFBO::COPY, vfb->renderWidth, vfb->renderHeight); + Draw::Framebuffer *temp = GetTempFBO(TempFBO::REINTERPRET, vfb->renderWidth, vfb->renderHeight); draw_->CopyFramebufferImage(vfb->fbo, 0, 0, 0, 0, temp, 0, 0, 0, 0, vfb->renderWidth, vfb->renderHeight, 1, Draw::FBChannel::FB_COLOR_BIT, "reinterpret_prep"); draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }, "reinterpret"); diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index 48f8d384fe12..9f35afce116f 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -158,6 +158,8 @@ enum class TempFBO { BLIT, // For copies of framebuffers (e.g. shader blending.) COPY, + // For another type of framebuffers that can happen together with COPY (see Outrun) + REINTERPRET, // Used to copy stencil data, means we need a stencil backing. STENCIL, }; @@ -320,6 +322,7 @@ class FramebufferManagerCommon { const std::vector &Framebuffers() { return vfbs_; } + void ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old); protected: virtual void PackFramebufferSync_(VirtualFramebuffer *vfb, int x, int y, int w, int h); @@ -344,7 +347,6 @@ class FramebufferManagerCommon { void NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged); void NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb, bool isClearingDepth); - void ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old); void BlitFramebufferDepth(VirtualFramebuffer *src, VirtualFramebuffer *dst); void ResizeFramebufFBO(VirtualFramebuffer *vfb, int w, int h, bool force = false, bool skipCopy = false); diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 1c5c63cdceb7..31189408ae41 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -486,9 +486,10 @@ TexCacheEntry *TextureCacheCommon::SetTexture() { DeleteTexture(entryIter); } + const AttachCandidate &candidate = candidates[index]; nextTexture_ = nullptr; nextNeedsRebuild_ = false; - SetTextureFramebuffer(candidates[index]); + SetTextureFramebuffer(candidate); return nullptr; } } @@ -849,11 +850,19 @@ FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer( WARN_LOG_ONCE(diffStrides1, G3D, "Texturing from framebuffer with different strides %d != %d", entry.bufw, framebuffer->fb_stride); } // NOTE: This check is okay because the first texture formats are the same as the buffer formats. - if (entry.format != (GETextureFormat)framebuffer->format) { - WARN_LOG_ONCE(diffFormat1, G3D, "Texturing from framebuffer with different formats %s != %s", GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format)); - return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH }; + if (IsTextureFormatBufferCompatible(entry.format)) { + if (TextureFormatMatchesBufferFormat(entry.format, framebuffer->format)) { + return FramebufferMatchInfo{ FramebufferMatch::VALID }; + } else if (IsTextureFormat16Bit(entry.format) && IsBufferFormat16Bit(framebuffer->format)) { + WARN_LOG_ONCE(diffFormat1, G3D, "Texturing from framebuffer with reinterpretable format: %s != %s", GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format)); + return FramebufferMatchInfo{ FramebufferMatch::VALID, 0, 0, true, TextureFormatToBufferFormat(entry.format) }; + } else { + WARN_LOG_ONCE(diffFormat2, G3D, "Texturing from framebuffer with incompatible formats %s != %s", GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format)); + return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH }; + } } else { - return FramebufferMatchInfo{ FramebufferMatch::VALID }; + // Format incompatible, ignoring without comment. (maybe some really gnarly hacks will end up here...) + return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH }; } } else { // Apply to buffered mode only. @@ -944,6 +953,13 @@ void TextureCacheCommon::SetTextureFramebuffer(const AttachCandidate &candidate) VirtualFramebuffer *framebuffer = candidate.fb; FramebufferMatchInfo fbInfo = candidate.match; + if (candidate.match.reinterpret) { + // TODO: Kinda ugly, maybe switch direction of the call? + GEBufferFormat oldFormat = candidate.fb->format; + candidate.fb->format = candidate.match.reinterpretTo; + framebufferManager_->ReformatFramebufferFrom(candidate.fb, oldFormat); + } + _dbg_assert_msg_(framebuffer != nullptr, "Framebuffer must not be null."); framebuffer->usageFlags |= FB_USAGE_TEXTURE; diff --git a/GPU/Common/TextureCacheCommon.h b/GPU/Common/TextureCacheCommon.h index 1b0300724b99..4ffa7f944826 100644 --- a/GPU/Common/TextureCacheCommon.h +++ b/GPU/Common/TextureCacheCommon.h @@ -212,6 +212,8 @@ struct FramebufferMatchInfo { FramebufferMatch match; u32 xOffset; u32 yOffset; + bool reinterpret; + GEBufferFormat reinterpretTo; }; struct AttachCandidate { diff --git a/GPU/ge_constants.h b/GPU/ge_constants.h index 4f4c71154844..92c06c3f1d5e 100644 --- a/GPU/ge_constants.h +++ b/GPU/ge_constants.h @@ -419,12 +419,33 @@ enum GETextureFormat GE_TFMT_DXT5 = 10, }; -const char *GeTextureFormatToString(GETextureFormat fmt); -inline bool IsClutFormat(GETextureFormat fmt) { - return fmt == GE_TFMT_CLUT4 || fmt == GE_TFMT_CLUT8 || fmt == GE_TFMT_CLUT16 || fmt == GE_TFMT_CLUT32; +const char *GeTextureFormatToString(GETextureFormat tfmt); +inline bool IsClutFormat(GETextureFormat tfmt) { + return tfmt == GE_TFMT_CLUT4 || tfmt == GE_TFMT_CLUT8 || tfmt == GE_TFMT_CLUT16 || tfmt == GE_TFMT_CLUT32; } -inline bool IsDXTFormat(GETextureFormat fmt) { - return fmt == GE_TFMT_DXT1 || fmt == GE_TFMT_DXT3 || fmt == GE_TFMT_DXT5; +inline bool IsDXTFormat(GETextureFormat tfmt) { + return tfmt == GE_TFMT_DXT1 || tfmt == GE_TFMT_DXT3 || tfmt == GE_TFMT_DXT5; +} +inline bool IsTextureFormatBufferCompatible(GETextureFormat tfmt) { + return (int)tfmt < 4; +} +inline bool IsBufferFormat16Bit(GEBufferFormat bfmt) { + return (int)bfmt < 3; +} +inline bool IsTextureFormat16Bit(GETextureFormat tfmt) { + return (int)tfmt < 3; +} +inline bool TextureFormatMatchesBufferFormat(GETextureFormat fmt, GEBufferFormat bfmt) { + // First four matches perfectly. + if ((int)fmt < 4) { + return (int)fmt == (int)bfmt; + } else { + return false; + } +} +// only applicable if IsTextureFormatBufferCompatible(fmt) +inline GEBufferFormat TextureFormatToBufferFormat(GETextureFormat bfmt) { + return (GEBufferFormat)(int)bfmt; } enum GETexLevelMode { From 96c36d5c10faf7b432680079b77c71365c0ae9f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 6 Nov 2020 20:08:57 +0100 Subject: [PATCH 5/9] More work on reinterpret. Get Vulkan running --- Common/GPU/D3D11/thin3d_d3d11.cpp | 31 ++++++++++++++++++------- Common/GPU/ShaderWriter.cpp | 5 ++-- Common/GPU/Vulkan/thin3d_vulkan.cpp | 12 +++++++--- GPU/Common/FramebufferManagerCommon.cpp | 24 ++++++++++++------- GPU/Common/ShaderId.h | 5 ++-- 5 files changed, 53 insertions(+), 24 deletions(-) diff --git a/Common/GPU/D3D11/thin3d_d3d11.cpp b/Common/GPU/D3D11/thin3d_d3d11.cpp index 3605c1725dc5..608baeb38b69 100644 --- a/Common/GPU/D3D11/thin3d_d3d11.cpp +++ b/Common/GPU/D3D11/thin3d_d3d11.cpp @@ -664,6 +664,8 @@ InputLayout *D3D11DrawContext::CreateInputLayout(const InputLayoutDesc &desc) { return inputLayout; } +class D3D11ShaderModule; + class D3D11Pipeline : public Pipeline { public: ~D3D11Pipeline() { @@ -694,6 +696,8 @@ class D3D11Pipeline : public Pipeline { ID3D11GeometryShader *gs = nullptr; D3D11_PRIMITIVE_TOPOLOGY topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED; + std::vector shaderModules; + size_t dynamicUniformsSize = 0; ID3D11Buffer *dynamicUniforms = nullptr; }; @@ -966,7 +970,9 @@ Pipeline *D3D11DrawContext::CreateGraphicsPipeline(const PipelineDesc &desc) { dPipeline->raster = (D3D11RasterState *)desc.raster; dPipeline->blend->AddRef(); dPipeline->depth->AddRef(); - dPipeline->input->AddRef(); + if (dPipeline->input) { + dPipeline->input->AddRef(); + } dPipeline->raster->AddRef(); dPipeline->topology = primToD3D11[(int)desc.prim]; if (desc.uniformDesc) { @@ -983,6 +989,8 @@ Pipeline *D3D11DrawContext::CreateGraphicsPipeline(const PipelineDesc &desc) { std::vector shaders; D3D11ShaderModule *vshader = nullptr; for (auto iter : desc.shaders) { + iter->AddRef(); + D3D11ShaderModule *module = (D3D11ShaderModule *)iter; shaders.push_back(module); switch (module->GetStage()) { @@ -998,6 +1006,7 @@ Pipeline *D3D11DrawContext::CreateGraphicsPipeline(const PipelineDesc &desc) { break; } } + dPipeline->shaderModules = shaders; if (!vshader) { // No vertex shader - no graphics @@ -1006,11 +1015,15 @@ Pipeline *D3D11DrawContext::CreateGraphicsPipeline(const PipelineDesc &desc) { } // Can finally create the input layout - auto &inputDesc = dPipeline->input->desc; - const std::vector &elements = dPipeline->input->elements; - HRESULT hr = device_->CreateInputLayout(elements.data(), (UINT)elements.size(), vshader->byteCode_.data(), vshader->byteCode_.size(), &dPipeline->il); - if (!SUCCEEDED(hr)) { - Crash(); + if (dPipeline->input) { + auto &inputDesc = dPipeline->input->desc; + const std::vector &elements = dPipeline->input->elements; + HRESULT hr = device_->CreateInputLayout(elements.data(), (UINT)elements.size(), vshader->byteCode_.data(), vshader->byteCode_.size(), &dPipeline->il); + if (!SUCCEEDED(hr)) { + Crash(); + } + } else { + dPipeline->il = nullptr; } return dPipeline; } @@ -1081,8 +1094,10 @@ void D3D11DrawContext::ApplyCurrentState() { curTopology_ = curPipeline_->topology; } - int numVBs = (int)curPipeline_->input->strides.size(); - context_->IASetVertexBuffers(0, 1, nextVertexBuffers_, (UINT *)curPipeline_->input->strides.data(), (UINT *)nextVertexBufferOffsets_); + if (curPipeline_->input) { + int numVBs = (int)curPipeline_->input->strides.size(); + context_->IASetVertexBuffers(0, numVBs, nextVertexBuffers_, (UINT *)curPipeline_->input->strides.data(), (UINT *)nextVertexBufferOffsets_); + } if (dirtyIndexBuffer_) { context_->IASetIndexBuffer(nextIndexBuffer_, DXGI_FORMAT_R16_UINT, nextIndexBufferOffset_); dirtyIndexBuffer_ = false; diff --git a/Common/GPU/ShaderWriter.cpp b/Common/GPU/ShaderWriter.cpp index 866b879e0d48..2e05c56f799a 100644 --- a/Common/GPU/ShaderWriter.cpp +++ b/Common/GPU/ShaderWriter.cpp @@ -141,10 +141,10 @@ void ShaderWriter::BeginVSMain(Slice inputs, Slice uniform case HLSL_D3D9: { C("struct VS_OUTPUT {\n"); - C(" vec4 pos : POSITION;\n"); for (auto &varying : varyings) { F(" %s %s : %s;\n", varying.type, varying.name, varying.semantic); } + F(" vec4 pos : %s;\n", lang_.shaderLanguage == HLSL_D3D11 ? "SV_Position" : "POSITION"); C("};\n"); C("VS_OUTPUT main( "); // 2 spaces for the D3D9 rewind @@ -268,7 +268,8 @@ void ShaderWriter::DeclareTexture2D(const char *name, int binding) { case HLSL_D3D9: break; case GLSL_VULKAN: - F("layout(set = 0, binding = %d) uniform sampler2D %s;\n", binding, name); + // In the thin3d descriptor set layout, textures start at 1 in set 0. Hence the +1. + F("layout(set = 0, binding = %d) uniform sampler2D %s;\n", binding + 1, name); break; default: F("uniform sampler2D %s;\n", name); diff --git a/Common/GPU/Vulkan/thin3d_vulkan.cpp b/Common/GPU/Vulkan/thin3d_vulkan.cpp index e59f88d42cee..4fc289023097 100644 --- a/Common/GPU/Vulkan/thin3d_vulkan.cpp +++ b/Common/GPU/Vulkan/thin3d_vulkan.cpp @@ -1030,8 +1030,12 @@ Pipeline *VKContext::CreateGraphicsPipeline(const PipelineDesc &desc) { VKPipeline *pipeline = new VKPipeline(vulkan_, desc.uniformDesc ? desc.uniformDesc->uniformBufferSize : 16 * sizeof(float), (PipelineFlags)pipelineFlags); - for (int i = 0; i < (int)input->bindings.size(); i++) { - pipeline->stride[i] = input->bindings[i].stride; + if (input) { + for (int i = 0; i < (int)input->bindings.size(); i++) { + pipeline->stride[i] = input->bindings[i].stride; + } + } else { + pipeline->stride[0] = 0; } std::vector stages; @@ -1076,6 +1080,8 @@ Pipeline *VKContext::CreateGraphicsPipeline(const PipelineDesc &desc) { VkPipelineRasterizationStateCreateInfo rs{ VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO }; raster->ToVulkan(&rs); + VkPipelineVertexInputStateCreateInfo emptyVisc{ VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO }; + VkGraphicsPipelineCreateInfo createInfo[2]{}; for (auto &info : createInfo) { info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; @@ -1088,7 +1094,7 @@ Pipeline *VKContext::CreateGraphicsPipeline(const PipelineDesc &desc) { info.pInputAssemblyState = &inputAssembly; info.pTessellationState = nullptr; info.pMultisampleState = &ms; - info.pVertexInputState = &input->visc; + info.pVertexInputState = input ? &input->visc : &emptyVisc; info.pRasterizationState = &rs; info.pViewportState = &vs; // Must set viewport and scissor counts even if we set the actual state dynamically. info.layout = pipelineLayout_; diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 56a358512486..7ad9135e5713 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -531,13 +531,15 @@ void FramebufferManagerCommon::ReformatFramebufferFrom(VirtualFramebuffer *vfb, return; } + char *vsCode = nullptr; + char *fsCode = nullptr; + if (!reinterpretVS_) { - char *buffer = new char[4000]; + vsCode = new char[4000]; const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc(); - GenerateReinterpretVertexShader(buffer, shaderLanguageDesc); - reinterpretVS_ = draw_->CreateShaderModule(ShaderStage::Vertex, shaderLanguageDesc.shaderLanguage, (const uint8_t *)buffer, strlen(buffer), "reinterpret_vs"); + GenerateReinterpretVertexShader(vsCode, shaderLanguageDesc); + reinterpretVS_ = draw_->CreateShaderModule(ShaderStage::Vertex, shaderLanguageDesc.shaderLanguage, (const uint8_t *)vsCode, strlen(vsCode), "reinterpret_vs"); _assert_(reinterpretVS_); - delete[] buffer; } if (!reinterpretSampler_) { @@ -551,12 +553,11 @@ void FramebufferManagerCommon::ReformatFramebufferFrom(VirtualFramebuffer *vfb, Draw::Pipeline *pipeline = reinterpretFromTo_[(int)oldFormat][(int)newFormat]; if (!pipeline) { - char *buffer = new char[4000]; + fsCode = new char[4000]; const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc(); - GenerateReinterpretFragmentShader(buffer, oldFormat, newFormat, shaderLanguageDesc); - Draw::ShaderModule *reinterpretFS = draw_->CreateShaderModule(ShaderStage::Fragment, shaderLanguageDesc.shaderLanguage, (const uint8_t *)buffer, strlen(buffer), "reinterpret_fs"); + GenerateReinterpretFragmentShader(fsCode, oldFormat, newFormat, shaderLanguageDesc); + Draw::ShaderModule *reinterpretFS = draw_->CreateShaderModule(ShaderStage::Fragment, shaderLanguageDesc.shaderLanguage, (const uint8_t *)fsCode, strlen(fsCode), "reinterpret_fs"); _assert_(reinterpretFS); - delete[] buffer; std::vector shaders; shaders.push_back(reinterpretVS_); @@ -597,10 +598,17 @@ void FramebufferManagerCommon::ReformatFramebufferFrom(VirtualFramebuffer *vfb, draw_->Draw(3, 0); draw_->InvalidateCachedState(); + // Unbind. + draw_->BindTexture(0, nullptr); + RebindFramebuffer("RebindFramebuffer - After reinterpret"); + shaderManager_->DirtyLastShader(); textureCache_->ForgetLastTexture(); gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE); + + delete[] vsCode; + delete[] fsCode; } void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb, bool isClearingDepth) { diff --git a/GPU/Common/ShaderId.h b/GPU/Common/ShaderId.h index 1c8c5f93d418..f28dd588ba97 100644 --- a/GPU/Common/ShaderId.h +++ b/GPU/Common/ShaderId.h @@ -82,9 +82,9 @@ enum FShaderBit : uint8_t { FS_BIT_DO_TEXTURE_PROJ = 22, FS_BIT_COLOR_DOUBLE = 23, FS_BIT_STENCIL_TO_ALPHA = 24, // 2 bits - FS_BIT_REPLACE_ALPHA_WITH_STENCIL_TYPE = 26, // 4 bits + FS_BIT_REPLACE_ALPHA_WITH_STENCIL_TYPE = 26, // 4 bits (ReplaceAlphaType) FS_BIT_REPLACE_LOGIC_OP_TYPE = 30, // 2 bits - FS_BIT_REPLACE_BLEND = 32, // 3 bits + FS_BIT_REPLACE_BLEND = 32, // 3 bits (ReplaceBlendType) FS_BIT_BLENDEQ = 35, // 3 bits FS_BIT_BLENDFUNC_A = 38, // 4 bits FS_BIT_BLENDFUNC_B = 42, // 4 bits @@ -92,7 +92,6 @@ enum FShaderBit : uint8_t { FS_BIT_BGRA_TEXTURE = 47, FS_BIT_TEST_DISCARD_TO_ZERO = 48, FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL = 49, - // 50+ are free. }; static inline FShaderBit operator +(FShaderBit bit, int i) { From 28f8578408f01c010377c4bf210ead11a449844a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sat, 7 Nov 2020 11:20:22 +0100 Subject: [PATCH 6/9] Cleanup reinterpret shader resources in FramebufferManagerCommon::DeviceLost(). --- GPU/Common/FramebufferManagerCommon.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 7ad9135e5713..822a9004d264 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -2244,6 +2244,16 @@ std::vector FramebufferManagerCommon::GetFramebufferList() { void FramebufferManagerCommon::DeviceLost() { DestroyAllFBOs(); + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 3; j++) { + if (reinterpretFromTo_[i][j]) { + reinterpretFromTo_[i][j]->Release(); + reinterpretFromTo_[i][j] = nullptr; + } + } + } + reinterpretSampler_->Release(); + reinterpretVS_->Release(); presentation_->DeviceLost(); draw_ = nullptr; } From 4e16fcaf1a3594825b383d095773ad4c849e7153 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sat, 7 Nov 2020 11:53:48 +0100 Subject: [PATCH 7/9] Fix reinterpret shader for D3D11 --- Common/GPU/Shader.cpp | 1 + Common/GPU/Shader.h | 1 + GPU/Common/ReinterpretFramebuffer.cpp | 1 + 3 files changed, 3 insertions(+) diff --git a/Common/GPU/Shader.cpp b/Common/GPU/Shader.cpp index b199f0a22722..9eea402f6791 100644 --- a/Common/GPU/Shader.cpp +++ b/Common/GPU/Shader.cpp @@ -74,6 +74,7 @@ void ShaderLanguageDesc::Init(ShaderLanguage lang) { forceMatrix4x4 = false; coefsFromBuffers = true; vsOutPrefix = "Out."; + viewportYSign = "-"; break; } } diff --git a/Common/GPU/Shader.h b/Common/GPU/Shader.h index 69fa5c9d6513..07f678e0a5ca 100644 --- a/Common/GPU/Shader.h +++ b/Common/GPU/Shader.h @@ -45,6 +45,7 @@ struct ShaderLanguageDesc { const char *lastFragData = nullptr; const char *framebufferFetchExtension = nullptr; const char *vsOutPrefix = ""; + const char *viewportYSign = ""; bool glslES30 = false; bool bitwiseOps = false; bool forceMatrix4x4 = false; diff --git a/GPU/Common/ReinterpretFramebuffer.cpp b/GPU/Common/ReinterpretFramebuffer.cpp index f915d99c79a0..2fff2bb5f71b 100644 --- a/GPU/Common/ReinterpretFramebuffer.cpp +++ b/GPU/Common/ReinterpretFramebuffer.cpp @@ -75,6 +75,7 @@ bool GenerateReinterpretVertexShader(char *buffer, const ShaderLanguageDesc &lan writer.C(" float x = -1.0 + float((gl_VertexIndex & 1) << 2);\n"); writer.C(" float y = -1.0 + float((gl_VertexIndex & 2) << 1);\n"); writer.C(" v_texcoord = (vec2(x, y) + vec2(1.0, 1.0)) * 0.5;\n"); + writer.F(" y *= %s1.0;\n", lang.viewportYSign); writer.C(" gl_Position = vec4(x, y, 0.0, 1.0);\n"); writer.EndVSMain(varyings); From 910524990183843c6be7d9b9de0c9278ae7e8173 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sat, 7 Nov 2020 23:20:34 +0100 Subject: [PATCH 8/9] Add compat flag for reinterpret shader, also disable on platforms that can't support it yet --- Common/GPU/Shader.cpp | 2 +- Common/GPU/Shader.h | 2 +- Core/Compatibility.cpp | 1 + Core/Compatibility.h | 1 + GPU/Common/FramebufferManagerCommon.cpp | 28 ++++++++++++++++++++----- GPU/Common/FramebufferManagerCommon.h | 2 +- GPU/Common/TextureCacheCommon.cpp | 2 +- GPU/D3D11/TextureCacheD3D11.cpp | 5 ++--- assets/compat.ini | 8 +++++++ 9 files changed, 39 insertions(+), 12 deletions(-) diff --git a/Common/GPU/Shader.cpp b/Common/GPU/Shader.cpp index 9eea402f6791..43943449cc91 100644 --- a/Common/GPU/Shader.cpp +++ b/Common/GPU/Shader.cpp @@ -66,7 +66,7 @@ void ShaderLanguageDesc::Init(ShaderLanguage lang) { bitwiseOps = lang == HLSL_D3D11; framebufferFetchExtension = nullptr; gles = false; - glslES30 = true; + glslES30 = true; // Hm, D3D9 too? glslVersionNumber = 0; lastFragData = nullptr; texture = "texture"; diff --git a/Common/GPU/Shader.h b/Common/GPU/Shader.h index 07f678e0a5ca..8a863f3c7f40 100644 --- a/Common/GPU/Shader.h +++ b/Common/GPU/Shader.h @@ -46,7 +46,7 @@ struct ShaderLanguageDesc { const char *framebufferFetchExtension = nullptr; const char *vsOutPrefix = ""; const char *viewportYSign = ""; - bool glslES30 = false; + bool glslES30 = false; // really glslES30Features. TODO: Clean this up. bool bitwiseOps = false; bool forceMatrix4x4 = false; bool coefsFromBuffers = false; diff --git a/Core/Compatibility.cpp b/Core/Compatibility.cpp index a965f1b06264..10b2c42896cc 100644 --- a/Core/Compatibility.cpp +++ b/Core/Compatibility.cpp @@ -74,6 +74,7 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) { CheckSetting(iniFile, gameID, "ReportSmallMemstick", &flags_.ReportSmallMemstick); CheckSetting(iniFile, gameID, "MemstickFixedFree", &flags_.MemstickFixedFree); CheckSetting(iniFile, gameID, "DateLimited", &flags_.DateLimited); + CheckSetting(iniFile, gameID, "ReinterpretFramebuffers", &flags_.ReinterpretFramebuffers); } void Compatibility::CheckSetting(IniFile &iniFile, const std::string &gameID, const char *option, bool *flag) { diff --git a/Core/Compatibility.h b/Core/Compatibility.h index 1d5dfa6372a7..e390e28f424f 100644 --- a/Core/Compatibility.h +++ b/Core/Compatibility.h @@ -72,6 +72,7 @@ struct CompatFlags { bool ReportSmallMemstick; bool MemstickFixedFree; bool DateLimited; + bool ReinterpretFramebuffers; }; class IniFile; diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 822a9004d264..2a6445e6f1a9 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -503,7 +503,7 @@ void FramebufferManagerCommon::NotifyRenderFramebufferUpdated(VirtualFramebuffer if (vfbFormatChanged) { textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_UPDATED); if (vfb->drawnFormat != vfb->format) { - ReformatFramebufferFrom(vfb, vfb->drawnFormat); + ReinterpretFramebufferFrom(vfb, vfb->drawnFormat); } } @@ -517,10 +517,28 @@ void FramebufferManagerCommon::NotifyRenderFramebufferUpdated(VirtualFramebuffer } } -void FramebufferManagerCommon::ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat oldFormat) { +void FramebufferManagerCommon::ReinterpretFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat oldFormat) { if (!useBufferedRendering_ || !vfb->fbo) { return; } + + ShaderLanguage lang = draw_->GetShaderLanguageDesc().shaderLanguage; + + bool doReinterpret = PSP_CoreParameter().compat.flags().ReinterpretFramebuffers && + (lang == HLSL_D3D11 || lang == GLSL_VULKAN || lang == GLSL_3xx); + if (!doReinterpret) { + // Fake reinterpret - just clear the way we always did on Vulkan. Just clear color and stencil. + if (oldFormat == GE_FORMAT_565) { + // We have to bind here instead of clear, since it can be that no framebuffer is bound. + // The backend can sometimes directly optimize it to a clear. + draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::KEEP, Draw::RPAction::CLEAR }, "FakeReinterpret"); + // Need to dirty anything that has command buffer dynamic state, in case we started a new pass above. + // Should find a way to feed that information back, maybe... Or simply correct the issue in the rendermanager. + gstate_c.Dirty(DIRTY_DEPTHSTENCIL_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_BLEND_STATE); + } + return; + } + GEBufferFormat newFormat = vfb->format; _assert_(newFormat != oldFormat); @@ -595,12 +613,13 @@ void FramebufferManagerCommon::ReformatFramebufferFrom(VirtualFramebuffer *vfb, draw_->SetScissorRect(0, 0, vfb->renderWidth, vfb->renderHeight); Draw::Viewport vp = Draw::Viewport{ 0.0f, 0.0f, (float)vfb->renderWidth, (float)vfb->renderHeight, 0.0f, 1.0f }; draw_->SetViewports(1, &vp); + // No vertex buffer - generate vertices in shader. TODO: Switch to a vertex buffer for GLES2/D3D9 compat. draw_->Draw(3, 0); draw_->InvalidateCachedState(); // Unbind. draw_->BindTexture(0, nullptr); - RebindFramebuffer("RebindFramebuffer - After reinterpret"); + RebindFramebuffer("After reinterpret"); shaderManager_->DirtyLastShader(); textureCache_->ForgetLastTexture(); @@ -639,8 +658,7 @@ void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffe } } if (vfb->drawnFormat != vfb->format) { - // TODO: Might ultimately combine this with the resize step in DoSetRenderFrameBuffer(). - ReformatFramebufferFrom(vfb, vfb->drawnFormat); + ReinterpretFramebufferFrom(vfb, vfb->drawnFormat); } if (useBufferedRendering_) { diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index 9f35afce116f..04422c7970db 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -322,7 +322,7 @@ class FramebufferManagerCommon { const std::vector &Framebuffers() { return vfbs_; } - void ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old); + void ReinterpretFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old); protected: virtual void PackFramebufferSync_(VirtualFramebuffer *vfb, int x, int y, int w, int h); diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 31189408ae41..dd67d94ab186 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -957,7 +957,7 @@ void TextureCacheCommon::SetTextureFramebuffer(const AttachCandidate &candidate) // TODO: Kinda ugly, maybe switch direction of the call? GEBufferFormat oldFormat = candidate.fb->format; candidate.fb->format = candidate.match.reinterpretTo; - framebufferManager_->ReformatFramebufferFrom(candidate.fb, oldFormat); + framebufferManager_->ReinterpretFramebufferFrom(candidate.fb, oldFormat); } _dbg_assert_msg_(framebuffer != nullptr, "Framebuffer must not be null."); diff --git a/GPU/D3D11/TextureCacheD3D11.cpp b/GPU/D3D11/TextureCacheD3D11.cpp index b315d27a8363..044f5113c301 100644 --- a/GPU/D3D11/TextureCacheD3D11.cpp +++ b/GPU/D3D11/TextureCacheD3D11.cpp @@ -421,10 +421,9 @@ void TextureCacheD3D11::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, TexCacheEntry::TexStatus alphaStatus = CheckAlpha(clutBuf_, GetClutDestFormatD3D11(clutFormat), clutTotalColors, clutTotalColors, 1); gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL); } else { - framebufferManagerD3D11_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET); - gstate_c.SetTextureFullAlpha(gstate.getTextureFormat() == GE_TFMT_5650); - framebufferManagerD3D11_->RebindFramebuffer("RebindFramebuffer - ApplyTextureFramebuffer"); // Probably not necessary. + framebufferManagerD3D11_->RebindFramebuffer("RebindFramebuffer - ApplyTextureFramebuffer"); + framebufferManagerD3D11_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET); } SamplerCacheKey samplerKey = GetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight); diff --git a/assets/compat.ini b/assets/compat.ini index 1bda7f4fac54..6cd40f5bc3cb 100644 --- a/assets/compat.ini +++ b/assets/compat.ini @@ -794,3 +794,11 @@ ULUS10455 = true # Car Jack Streets - issue #12698 NPUZ00043 = true NPEZ00198 = true + +# This setting will go away in the near future, hopefully we can enable it +# for all games. +[ReinterpretFramebuffers] +# Outrun - issue #11358 +ULES00262 = true +ULUS10064 = true +ULKS46087 = true From d81522af11b9c91bf559e63595a5214cbac410db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 8 Nov 2020 10:26:27 +0100 Subject: [PATCH 9/9] Address feedback. --- GPU/Common/ReinterpretFramebuffer.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/GPU/Common/ReinterpretFramebuffer.cpp b/GPU/Common/ReinterpretFramebuffer.cpp index 2fff2bb5f71b..8c824a9b44ee 100644 --- a/GPU/Common/ReinterpretFramebuffer.cpp +++ b/GPU/Common/ReinterpretFramebuffer.cpp @@ -8,8 +8,9 @@ static const VaryingDef varyings[1] = { { "vec2", "v_texcoord", "TEXCOORD0" }, }; -// TODO: We could have an option to preserve any extra color precision. But gonna start without it. -// Requires full size integer math. +// TODO: We could possibly have an option to preserve any extra color precision? But gonna start without it. +// Requires full size integer math. It would be possible to make a floating point-only version with lots of +// modulo and stuff, might do it one day. bool GenerateReinterpretFragmentShader(char *buffer, GEBufferFormat from, GEBufferFormat to, const ShaderLanguageDesc &lang) { if (!lang.bitwiseOps) { return false; @@ -30,7 +31,7 @@ bool GenerateReinterpretFragmentShader(char *buffer, GEBufferFormat from, GEBuff break; case GE_FORMAT_5551: writer.C(" uint color = uint(val.r * 31.99) | (uint(val.g * 31.99) << 5) | (uint(val.b * 31.99) << 10);\n"); - writer.C(" if (val.a > 128.0) color |= 0x8000U;\n"); + writer.C(" if (val.a >= 0.5) color |= 0x8000U;\n"); break; case GE_FORMAT_565: writer.C(" uint color = uint(val.r * 31.99) | (uint(val.g * 63.99) << 5) | (uint(val.b * 31.99) << 11);\n");