From 6310af25fa02d7174910a822366d411c63b8568a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 8 Nov 2020 23:17:06 +0100 Subject: [PATCH] Get shader color write masking going on all backends. --- Common/GPU/OpenGL/GLQueueRunner.cpp | 26 +++++++ Common/GPU/OpenGL/GLQueueRunner.h | 1 + Common/GPU/OpenGL/GLRenderManager.h | 24 +++++++ Common/GPU/ShaderWriter.cpp | 1 + Common/GPU/Vulkan/thin3d_vulkan.cpp | 3 +- GPU/Common/FragmentShaderGenerator.cpp | 94 +++++++++++++++++++++----- GPU/Common/GPUStateUtils.cpp | 62 ++++++++++++++++- GPU/Common/GPUStateUtils.h | 21 +++++- GPU/Common/ShaderCommon.h | 7 +- GPU/Common/ShaderId.cpp | 3 + GPU/Common/ShaderId.h | 1 + GPU/Common/ShaderUniforms.cpp | 4 ++ GPU/Common/ShaderUniforms.h | 14 ++-- GPU/D3D11/StateMappingD3D11.cpp | 38 ++--------- GPU/Directx9/StateMappingDX9.cpp | 24 ++----- GPU/GLES/ShaderManagerGLES.cpp | 4 ++ GPU/GLES/ShaderManagerGLES.h | 1 + GPU/GLES/StateMappingGLES.cpp | 35 +++------- GPU/GPUCommon.cpp | 4 +- GPU/Vulkan/StateMappingVulkan.cpp | 37 ++++------ assets/compat.ini | 10 ++- unittest/TestShaderGenerators.cpp | 72 +++++++++++++------- 22 files changed, 323 insertions(+), 163 deletions(-) diff --git a/Common/GPU/OpenGL/GLQueueRunner.cpp b/Common/GPU/OpenGL/GLQueueRunner.cpp index 597f92cac19e..a41771a8bda5 100644 --- a/Common/GPU/OpenGL/GLQueueRunner.cpp +++ b/Common/GPU/OpenGL/GLQueueRunner.cpp @@ -1001,6 +1001,32 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last CHECK_GL_ERROR_IF_DEBUG(); break; } + case GLRRenderCommand::UNIFORM4UI: + { + _dbg_assert_(curProgram); + int loc = c.uniform4.loc ? *c.uniform4.loc : -1; + if (c.uniform4.name) { + loc = curProgram->GetUniformLoc(c.uniform4.name); + } + if (loc >= 0) { + switch (c.uniform4.count) { + case 1: + glUniform1uiv(loc, 1, (GLuint *)&c.uniform4.v[0]); + break; + case 2: + glUniform2uiv(loc, 1, (GLuint *)c.uniform4.v); + break; + case 3: + glUniform3uiv(loc, 1, (GLuint *)c.uniform4.v); + break; + case 4: + glUniform4uiv(loc, 1, (GLuint *)c.uniform4.v); + break; + } + } + CHECK_GL_ERROR_IF_DEBUG(); + break; + } case GLRRenderCommand::UNIFORM4I: { _dbg_assert_(curProgram); diff --git a/Common/GPU/OpenGL/GLQueueRunner.h b/Common/GPU/OpenGL/GLQueueRunner.h index f47bfd7c40ec..07837421bea9 100644 --- a/Common/GPU/OpenGL/GLQueueRunner.h +++ b/Common/GPU/OpenGL/GLQueueRunner.h @@ -42,6 +42,7 @@ enum class GLRRenderCommand : uint8_t { BLENDCOLOR, LOGICOP, UNIFORM4I, + UNIFORM4UI, UNIFORM4F, UNIFORMMATRIX, TEXTURESAMPLER, diff --git a/Common/GPU/OpenGL/GLRenderManager.h b/Common/GPU/OpenGL/GLRenderManager.h index 3999332e691f..eca7204de2bf 100644 --- a/Common/GPU/OpenGL/GLRenderManager.h +++ b/Common/GPU/OpenGL/GLRenderManager.h @@ -659,6 +659,30 @@ class GLRenderManager { curRenderStep_->commands.push_back(data); } + void SetUniformUI(const GLint *loc, int count, const uint32_t *udata) { + _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); +#ifdef _DEBUG + _dbg_assert_(curProgram_); +#endif + GLRRenderData data{ GLRRenderCommand::UNIFORM4UI }; + data.uniform4.loc = loc; + data.uniform4.count = count; + memcpy(data.uniform4.v, udata, sizeof(uint32_t) * count); + curRenderStep_->commands.push_back(data); + } + + void SetUniformUI1(const GLint *loc, uint32_t udata) { + _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); +#ifdef _DEBUG + _dbg_assert_(curProgram_); +#endif + GLRRenderData data{ GLRRenderCommand::UNIFORM4UI }; + data.uniform4.loc = loc; + data.uniform4.count = 1; + memcpy(data.uniform4.v, &udata, sizeof(udata)); + curRenderStep_->commands.push_back(data); + } + void SetUniformF(const GLint *loc, int count, const float *udata) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); #ifdef _DEBUG diff --git a/Common/GPU/ShaderWriter.cpp b/Common/GPU/ShaderWriter.cpp index 2e05c56f799a..b793e48ae527 100644 --- a/Common/GPU/ShaderWriter.cpp +++ b/Common/GPU/ShaderWriter.cpp @@ -23,6 +23,7 @@ const char *hlsl_preamble_fs = "#define vec3 float3\n" "#define vec4 float4\n" "#define uvec3 uint3\n" +"#define uvec4 uint4\n" "#define ivec3 int3\n" "#define ivec4 int4\n" "#define mat4 float4x4\n" diff --git a/Common/GPU/Vulkan/thin3d_vulkan.cpp b/Common/GPU/Vulkan/thin3d_vulkan.cpp index 4fc289023097..b5ac0fb8c155 100644 --- a/Common/GPU/Vulkan/thin3d_vulkan.cpp +++ b/Common/GPU/Vulkan/thin3d_vulkan.cpp @@ -1543,7 +1543,8 @@ void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPass void VKContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) { VKFramebuffer *fb = (VKFramebuffer *)fbo; - // TODO: There are cases where this is okay, actually. + // TODO: There are cases where this is okay, actually. But requires layout transitions and stuff - + // we're not ready for this. _assert_(fb != curFramebuffer_); int aspect = 0; diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index e38cba0b9f47..5e3f8623c11e 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -80,6 +80,12 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu bool doFlatShading = id.Bit(FS_BIT_FLATSHADE); bool shaderDepal = id.Bit(FS_BIT_SHADER_DEPAL); bool bgraTexture = id.Bit(FS_BIT_BGRA_TEXTURE); + bool colorWriteMask = id.Bit(FS_BIT_COLOR_WRITEMASK); + + if (colorWriteMask && !compat.bitwiseOps) { + *errorString = "Color Write Mask requires bitwise ops"; + return false; + } GEComparison alphaTestFunc = (GEComparison)id.Bits(FS_BIT_ALPHA_TEST_FUNC, 3); GEComparison colorTestFunc = (GEComparison)id.Bits(FS_BIT_COLOR_TEST_FUNC, 2); @@ -104,7 +110,13 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu bool earlyFragmentTests = ((!enableAlphaTest && !enableColorTest) || testForceToZero) && !gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT); bool useAdrenoBugWorkaround = id.Bit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL); - bool readFramebufferTex = replaceBlend == REPLACE_BLEND_COPY_FBO && !gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH); + bool readFramebuffer = replaceBlend == REPLACE_BLEND_COPY_FBO || colorWriteMask; + bool readFramebufferTex = readFramebuffer && !gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH); + + if (readFramebuffer && compat.shaderLanguage == HLSL_D3D9) { + *errorString = "Framebuffer read not yet supported in HLSL D3D9"; + return false; + } if (compat.shaderLanguage == ShaderLanguage::GLSL_VULKAN) { if (earlyFragmentTests) { @@ -188,11 +200,9 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } else { WRITE(p, "SamplerState samp : register(s0);\n"); WRITE(p, "Texture2D tex : register(t0);\n"); - if (!isModeClear && replaceBlend > REPLACE_BLEND_STANDARD) { - if (replaceBlend == REPLACE_BLEND_COPY_FBO) { - // No sampler required, we Load - WRITE(p, "Texture2D fboTex : register(t1);\n"); - } + if (readFramebufferTex) { + // No sampler required, we Load + WRITE(p, "Texture2D fboTex : register(t1);\n"); } WRITE(p, "cbuffer base : register(b0) {\n%s};\n", ub_baseStr); } @@ -207,7 +217,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } if (enableColorTest) { if (compat.shaderLanguage == HLSL_D3D11) { - WRITE(p, "uvec3 roundAndScaleTo255iv(float3 x) { return uvec3(floor(x * 255.0f + 0.5f)); }\n"); + WRITE(p, "uvec3 roundAndScaleTo255iv(float3 x) { return (floor(x * 255.0f + 0.5f)); }\n"); } else { WRITE(p, "vec3 roundAndScaleTo255v(float3 x) { return floor(x * 255.0f + 0.5f); }\n"); } @@ -225,7 +235,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu if (enableFog) { WRITE(p, " float v_fogdepth: TEXCOORD1;\n"); } - if (compat.shaderLanguage == HLSL_D3D11 && ((replaceBlend == REPLACE_BLEND_COPY_FBO) || gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT))) { + if (compat.shaderLanguage == HLSL_D3D11 && readFramebuffer) { WRITE(p, " vec4 pixelPos : SV_POSITION;\n"); } WRITE(p, "};\n"); @@ -286,14 +296,15 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu if (doTexture) WRITE(p, "uniform sampler2D tex;\n"); + if (readFramebufferTex) { + if (!compat.texelFetch) { + WRITE(p, "uniform vec2 u_fbotexSize;\n"); + } + WRITE(p, "uniform sampler2D fbotex;\n"); + } + if (!isModeClear && replaceBlend > REPLACE_BLEND_STANDARD) { *uniformMask |= DIRTY_SHADERBLEND; - if (readFramebufferTex) { - if (!compat.texelFetch) { - WRITE(p, "uniform vec2 u_fbotexSize;\n"); - } - WRITE(p, "uniform sampler2D fbotex;\n"); - } if (replaceBlendFuncA >= GE_SRCBLEND_FIXA) { WRITE(p, "uniform vec3 u_blendFixA;\n"); } @@ -329,6 +340,11 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu *uniformMask |= DIRTY_DEPAL; } + if (colorWriteMask) { + WRITE(p, "uniform uint u_colorWriteMask;\n"); + *uniformMask |= DIRTY_COLORWRITEMASK; + } + if (stencilToAlpha && replaceAlphaWithStencilType == STENCIL_VALUE_UNIFORM) { *uniformMask |= DIRTY_STENCILREPLACEVALUE; WRITE(p, "uniform float u_stencilReplaceValue;\n"); @@ -387,6 +403,20 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } + // Provide implementations of packUnorm4x8 and unpackUnorm4x8 if not available. + if (colorWriteMask && compat.shaderLanguage == HLSL_D3D11 || (compat.shaderLanguage == GLSL_3xx && compat.glslVersionNumber < 400)) { + WRITE(p, "uint packUnorm4x8(vec4 v) {\n"); + WRITE(p, " v = clamp(v, 0.0, 1.0);\n"); + WRITE(p, " uvec4 u = uvec4(255.0 * v);\n"); + WRITE(p, " return u.x | (u.y << 8) | (u.z << 16) | (u.w << 24);\n"); + WRITE(p, "}\n"); + + WRITE(p, "vec4 unpackUnorm4x8(uint x) {\n"); + WRITE(p, " uvec4 u = uvec4(x & 0xFFU, (x >> 8) & 0xFFU, (x >> 16) & 0xFFU, (x >> 24) & 0xFFU);\n"); + WRITE(p, " return vec4(u) * (1.0 / 255.0);\n"); + WRITE(p, "}\n"); + } + // PowerVR needs a custom modulo function. For some reason, this has far higher precision than the builtin one. if ((gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) && needShaderTexClamp) { WRITE(p, "float mymod(float a, float b) { return a - b * floor(a / b); }\n"); @@ -416,6 +446,21 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu if (isModeClear) { // Clear mode does not allow any fancy shading. WRITE(p, " vec4 v = v_color0;\n"); + + // Masking with clear mode is ok, I think? + if (readFramebuffer) { + if (compat.shaderLanguage == HLSL_D3D11) { + WRITE(p, " vec4 destColor = fboTex.Load(int3((int)In.pixelPos.x, (int)In.pixelPos.y, 0));\n"); + } else if (gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH)) { + // If we have NV_shader_framebuffer_fetch / EXT_shader_framebuffer_fetch, we skip the blit. + // We can just read the prev value more directly. + WRITE(p, " lowp vec4 destColor = %s;\n", compat.lastFragData); + } else if (!compat.texelFetch) { + WRITE(p, " lowp vec4 destColor = %s(fbotex, gl_FragCoord.xy * u_fbotexSize.xy);\n", compat.texture); + } else { + WRITE(p, " lowp vec4 destColor = %s(fbotex, ivec2(gl_FragCoord.x, gl_FragCoord.y), 0);\n", compat.texelFetch); + } + } } else { const char *secondary = ""; // Secondary color for specular on top of texture @@ -799,19 +844,22 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu WRITE(p, " v.rgb = v.rgb * %s;\n", srcFactor); } - if (replaceBlend == REPLACE_BLEND_COPY_FBO && compat.shaderLanguage != HLSL_D3D9) { - // If we have NV_shader_framebuffer_fetch / EXT_shader_framebuffer_fetch, we skip the blit. - // We can just read the prev value more directly. + // Two things read from the old framebuffer - shader replacement blending and bit-level masking. + if (readFramebuffer) { if (compat.shaderLanguage == HLSL_D3D11) { WRITE(p, " vec4 destColor = fboTex.Load(int3((int)In.pixelPos.x, (int)In.pixelPos.y, 0));\n"); } else if (gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH)) { + // If we have NV_shader_framebuffer_fetch / EXT_shader_framebuffer_fetch, we skip the blit. + // We can just read the prev value more directly. WRITE(p, " lowp vec4 destColor = %s;\n", compat.lastFragData); } else if (!compat.texelFetch) { WRITE(p, " lowp vec4 destColor = %s(fbotex, gl_FragCoord.xy * u_fbotexSize.xy);\n", compat.texture); } else { WRITE(p, " lowp vec4 destColor = %s(fbotex, ivec2(gl_FragCoord.x, gl_FragCoord.y), 0);\n", compat.texelFetch); } + } + if (replaceBlend == REPLACE_BLEND_COPY_FBO) { const char *srcFactor = nullptr; const char *dstFactor = nullptr; @@ -927,6 +975,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu return false; } + // TODO: This could support more ops using the shader blending mechanism. LogicOpReplaceType replaceLogicOpType = (LogicOpReplaceType)id.Bits(FS_BIT_REPLACE_LOGIC_OP_TYPE, 2); switch (replaceLogicOpType) { case LOGICOPTYPE_ONE: @@ -943,6 +992,17 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu return false; } + // Final color computed - apply color write mask. + // TODO: Maybe optimize to only do math on the affected channels? + // Or .. meh. + if (colorWriteMask) { + WRITE(p, " highp uint v32 = packUnorm4x8(v);\n"); + WRITE(p, " highp uint d32 = packUnorm4x8(destColor);\n"); + // Note that the mask has been flipped to the PC way - 1 means write. + WRITE(p, " v32 = (v32 & u_colorWriteMask) | (d32 & ~u_colorWriteMask);\n"); + WRITE(p, " v = unpackUnorm4x8(v32);\n"); + } + if (gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) { const double scale = DepthSliceFactor() * 65535.0; diff --git a/GPU/Common/GPUStateUtils.cpp b/GPU/Common/GPUStateUtils.cpp index 7cf550ef85de..fc599c6f3e24 100644 --- a/GPU/Common/GPUStateUtils.cpp +++ b/GPU/Common/GPUStateUtils.cpp @@ -956,8 +956,68 @@ void ApplyStencilReplaceAndLogicOpIgnoreBlend(ReplaceAlphaType replaceAlphaWithS } } -// Called even if AlphaBlendEnable == false - it also deals with stencil-related blend state. +bool IsColorWriteMaskComplex(bool allowFramebufferRead) { + // Restrict to Outrun temporarily (by uglily reusing the ReinterpretFramebuffers flag) + if (!allowFramebufferRead || !PSP_CoreParameter().compat.flags().ReinterpretFramebuffers) { + // Don't have a choice - we'll make do but it won't always be right. + return false; + } + + uint32_t colorMask = (gstate.pmskc & 0xFFFFFF) | (gstate.pmska << 24); + + for (int i = 0; i < 4; i++) { + switch (colorMask & 0xFF) { + case 0x0: + case 0xFF: + break; + default: + return true; + } + colorMask >>= 8; + } + return false; +} +// If we can we emulate the colorMask by simply toggling the full R G B A masks offered +// by modern hardware, we do that. This is 99.9% of the time. +// When that's not enough, we fall back on a technique similar to shader blending, +// we read from the framebuffer (or a copy of it). +void ConvertMaskState(GenericMaskState &maskState, bool allowFramebufferRead) { + // Invert to convert masks from the PSP's format where 1 is don't draw to PC where 1 is draw. + uint32_t colorMask = ~((gstate.pmskc & 0xFFFFFF) | (gstate.pmska << 24)); + + maskState.applyFramebufferRead = false; + for (int i = 0; i < 4; i++) { + int channelMask = colorMask & 0xFF; + switch (channelMask) { + case 0x0: + maskState.rgba[i] = false; + break; + case 0xFF: + maskState.rgba[i] = true; + break; + default: + if (allowFramebufferRead) { + maskState.applyFramebufferRead = true; + maskState.rgba[i] = true; + } else { + // Use the old heuristic. + maskState.rgba[i] = channelMask >= 128; + } + } + colorMask >>= 8; + } + + // Let's not write to alpha if stencil isn't enabled. + if (IsStencilTestOutputDisabled()) { + maskState.rgba[3] = false; + } else if (ReplaceAlphaWithStencilType() == STENCIL_VALUE_KEEP) { + // If the stencil type is set to KEEP, we shouldn't write to the stencil/alpha channel. + maskState.rgba[3] = false; + } +} + +// Called even if AlphaBlendEnable == false - it also deals with stencil-related blend state. void ConvertBlendState(GenericBlendState &blendState, bool allowFramebufferRead) { // Blending is a bit complex to emulate. This is due to several reasons: // diff --git a/GPU/Common/GPUStateUtils.h b/GPU/Common/GPUStateUtils.h index 928570206637..943754c968bc 100644 --- a/GPU/Common/GPUStateUtils.h +++ b/GPU/Common/GPUStateUtils.h @@ -1,5 +1,6 @@ #pragma once +#include #include "Common/CommonTypes.h" #include "GPU/ge_constants.h" @@ -25,12 +26,18 @@ enum ReplaceAlphaType { }; enum ReplaceBlendType { - REPLACE_BLEND_NO, + REPLACE_BLEND_NO, // Blend function handled directly with blend states. + REPLACE_BLEND_STANDARD, + + // SRC part of blend function handled in-shader. REPLACE_BLEND_PRE_SRC, REPLACE_BLEND_PRE_SRC_2X_ALPHA, REPLACE_BLEND_2X_ALPHA, REPLACE_BLEND_2X_SRC, + + // Full blend equation runs in shader. + // We might have to make a copy of the framebuffer target to read from. REPLACE_BLEND_COPY_FBO, }; @@ -47,6 +54,9 @@ bool IsAlphaTestAgainstZero(); bool NeedsTestDiscard(); bool IsStencilTestOutputDisabled(); +// If not, we have to emulate it in the shader, similar to blend replace. +bool IsColorMaskSimple(uint32_t colorMask); + StencilValueType ReplaceAlphaWithStencilType(); ReplaceAlphaType ReplaceAlphaWithStencil(ReplaceBlendType replaceBlend); ReplaceBlendType ReplaceBlendWithShader(bool allowShaderBlend, GEBufferFormat bufferFormat); @@ -160,6 +170,15 @@ struct GenericBlendState { void ConvertBlendState(GenericBlendState &blendState, bool allowShaderBlend); void ApplyStencilReplaceAndLogicOpIgnoreBlend(ReplaceAlphaType replaceAlphaWithStencil, GenericBlendState &blendState); +struct GenericMaskState { + bool applyFramebufferRead; + uint32_t uniformMask; // For each bit, opposite to the PSP. + bool rgba[4]; // true = draw, false = don't draw this channel +}; + +void ConvertMaskState(GenericMaskState &maskState, bool allowFramebufferRead); +bool IsColorWriteMaskComplex(bool allowFramebufferRead); + struct GenericStencilFuncState { bool enabled; GEComparison testFunc; diff --git a/GPU/Common/ShaderCommon.h b/GPU/Common/ShaderCommon.h index 4d5a41f077ab..86ffdaad337b 100644 --- a/GPU/Common/ShaderCommon.h +++ b/GPU/Common/ShaderCommon.h @@ -41,7 +41,7 @@ enum DebugShaderStringType { }; // Shared between the backends. Not all are necessarily used by each backend, but this lets us share -// more code than before. +// more code than before. TODO: Can probably cut the number of these down without too much slowdown. enum : uint64_t { DIRTY_PROJMATRIX = 1ULL << 0, DIRTY_PROJTHROUGHMATRIX = 1ULL << 1, @@ -86,12 +86,13 @@ enum : uint64_t { DIRTY_CULLRANGE = 1ULL << 34, DIRTY_DEPAL = 1ULL << 35, + DIRTY_COLORWRITEMASK = 1ULL << 36, - // space for 5 more uniform dirty flags. Remember to update DIRTY_ALL_UNIFORMS. + // space for 4 more uniform dirty flags. Remember to update DIRTY_ALL_UNIFORMS. DIRTY_BONE_UNIFORMS = 0xFF000000ULL, - DIRTY_ALL_UNIFORMS = 0xFFFFFFFFFULL, + DIRTY_ALL_UNIFORMS = 0x1FFFFFFFFFULL, DIRTY_ALL_LIGHTS = DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3, // Other dirty elements that aren't uniforms! diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp index f69e52390082..88ac081cd36e 100644 --- a/GPU/Common/ShaderId.cpp +++ b/GPU/Common/ShaderId.cpp @@ -171,6 +171,7 @@ std::string FragmentShaderDesc(const FShaderID &id) { if (id.Bit(FS_BIT_FLATSHADE)) desc << "Flat "; if (id.Bit(FS_BIT_BGRA_TEXTURE)) desc << "BGRA "; if (id.Bit(FS_BIT_SHADER_DEPAL)) desc << "Depal "; + if (id.Bit(FS_BIT_COLOR_WRITEMASK)) desc << "WriteMask "; if (id.Bit(FS_BIT_SHADER_TEX_CLAMP)) { desc << "TClamp"; if (id.Bit(FS_BIT_CLAMP_S)) desc << "S"; @@ -239,6 +240,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) { bool doTextureAlpha = gstate.isTextureAlphaUsed(); bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT; bool useShaderDepal = gstate_c.useShaderDepal; + bool colorWriteMask = IsColorWriteMaskComplex(gstate_c.allowFramebufferRead); // Note how we here recompute some of the work already done in state mapping. // Not ideal! At least we share the code. @@ -309,6 +311,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) { id.SetBit(FS_BIT_FLATSHADE, doFlatShading); id.SetBit(FS_BIT_SHADER_DEPAL, useShaderDepal); + id.SetBit(FS_BIT_COLOR_WRITEMASK, colorWriteMask); if (g_Config.bVendorBugChecksEnabled) { if (bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL)) { diff --git a/GPU/Common/ShaderId.h b/GPU/Common/ShaderId.h index f28dd588ba97..5ae71487ad5b 100644 --- a/GPU/Common/ShaderId.h +++ b/GPU/Common/ShaderId.h @@ -92,6 +92,7 @@ enum FShaderBit : uint8_t { FS_BIT_BGRA_TEXTURE = 47, FS_BIT_TEST_DISCARD_TO_ZERO = 48, FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL = 49, + FS_BIT_COLOR_WRITEMASK = 50, }; static inline FShaderBit operator +(FShaderBit bit, int i) { diff --git a/GPU/Common/ShaderUniforms.cpp b/GPU/Common/ShaderUniforms.cpp index 46f6a9385544..bf2b540db5b8 100644 --- a/GPU/Common/ShaderUniforms.cpp +++ b/GPU/Common/ShaderUniforms.cpp @@ -203,6 +203,10 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView Uint8x3ToFloat4_AlphaUint8(ub->matAmbient, gstate.materialambient, gstate.getMaterialAmbientA()); } + if (dirtyUniforms & DIRTY_COLORWRITEMASK) { + ub->colorWriteMask = ~((gstate.pmska << 24) | (gstate.pmskc & 0xFFFFFF)); + } + // Texturing if (dirtyUniforms & DIRTY_UVSCALEOFFSET) { const float invW = 1.0f / (float)gstate_c.curTextureWidth; diff --git a/GPU/Common/ShaderUniforms.h b/GPU/Common/ShaderUniforms.h index a2ecf4a71bfc..0959be328bbf 100644 --- a/GPU/Common/ShaderUniforms.h +++ b/GPU/Common/ShaderUniforms.h @@ -10,7 +10,7 @@ enum : uint64_t { DIRTY_BASE_UNIFORMS = DIRTY_WORLDMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWMATRIX | DIRTY_TEXMATRIX | DIRTY_ALPHACOLORREF | DIRTY_PROJMATRIX | DIRTY_FOGCOLOR | DIRTY_FOGCOEF | DIRTY_TEXENV | DIRTY_STENCILREPLACEVALUE | - DIRTY_ALPHACOLORMASK | DIRTY_SHADERBLEND | DIRTY_UVSCALEOFFSET | DIRTY_TEXCLAMP | DIRTY_DEPTHRANGE | DIRTY_MATAMBIENTALPHA | + DIRTY_ALPHACOLORMASK | DIRTY_SHADERBLEND | DIRTY_COLORWRITEMASK | DIRTY_UVSCALEOFFSET | DIRTY_TEXCLAMP | DIRTY_DEPTHRANGE | DIRTY_MATAMBIENTALPHA | DIRTY_BEZIERSPLINE | DIRTY_DEPAL, DIRTY_LIGHT_UNIFORMS = DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3 | @@ -30,10 +30,10 @@ struct UB_VS_FS_Base { float depthRange[4]; float fogCoef[2]; float stencil; float pad0; float matAmbient[4]; - uint32_t spline_counts; uint32_t depal_mask_shift_off_fmt; // 4 params packed into one. - int pad2; int pad3; float cullRangeMin[4]; float cullRangeMax[4]; + uint32_t spline_counts; uint32_t depal_mask_shift_off_fmt; // 4 params packed into one. + uint32_t colorWriteMask; int pad3; // Fragment data float fogColor[4]; float texEnvColor[4]; @@ -56,17 +56,17 @@ R"( mat4 u_proj; vec2 u_fogcoef; float u_stencilReplaceValue; vec4 u_matambientalpha; + vec4 u_cullRangeMin; + vec4 u_cullRangeMax; uint u_spline_counts; uint u_depal_mask_shift_off_fmt; - int u_pad2; + uint u_colorWriteMask; int u_pad3; - vec4 u_cullRangeMin; - vec4 u_cullRangeMax; vec3 u_fogcolor; vec3 u_texenv; ivec4 u_alphacolorref; ivec4 u_alphacolormask; - vec3 u_blendFixA; + vec3 u_blendFixA; uint colorWriteMask; vec3 u_blendFixB; vec4 u_texclamp; vec2 u_texclampoff; diff --git a/GPU/D3D11/StateMappingD3D11.cpp b/GPU/D3D11/StateMappingD3D11.cpp index 6329495c8c5c..a6ca5025da4a 100644 --- a/GPU/D3D11/StateMappingD3D11.cpp +++ b/GPU/D3D11/StateMappingD3D11.cpp @@ -158,6 +158,10 @@ void DrawEngineD3D11::ApplyDrawState(int prim) { // Set blend - unless we need to do it in the shader. GenericBlendState blendState; ConvertBlendState(blendState, gstate_c.allowFramebufferRead); + + GenericMaskState maskState; + ConvertMaskState(maskState, gstate_c.allowFramebufferRead); + if (blendState.applyFramebufferRead) { if (ApplyFramebufferRead(&fboTexNeedsBind_)) { // The shader takes over the responsibility for blending, so recompute. @@ -170,6 +174,7 @@ void DrawEngineD3D11::ApplyDrawState(int prim) { gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE); } else if (blendState.resetFramebufferRead) { ResetFramebufferRead(); + gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE); } if (blendState.enabled) { @@ -204,38 +209,7 @@ void DrawEngineD3D11::ApplyDrawState(int prim) { } } - // PSP color/alpha mask is per bit but we can only support per byte. - // But let's do that, at least. And let's try a threshold. - bool rmask = (gstate.pmskc & 0xFF) < 128; - bool gmask = ((gstate.pmskc >> 8) & 0xFF) < 128; - bool bmask = ((gstate.pmskc >> 16) & 0xFF) < 128; - bool amask = (gstate.pmska & 0xFF) < 128; - -#ifndef MOBILE_DEVICE - u8 abits = (gstate.pmska >> 0) & 0xFF; - u8 rbits = (gstate.pmskc >> 0) & 0xFF; - u8 gbits = (gstate.pmskc >> 8) & 0xFF; - u8 bbits = (gstate.pmskc >> 16) & 0xFF; - if ((rbits != 0 && rbits != 0xFF) || (gbits != 0 && gbits != 0xFF) || (bbits != 0 && bbits != 0xFF)) { - WARN_LOG_REPORT_ONCE(rgbmask, G3D, "Unsupported RGB mask: r=%02x g=%02x b=%02x", rbits, gbits, bbits); - } - if (abits != 0 && abits != 0xFF) { - // The stencil part of the mask is supported. - WARN_LOG_REPORT_ONCE(amask, G3D, "Unsupported alpha/stencil mask: %02x", abits); - } -#endif - - // Let's not write to alpha if stencil isn't enabled. - if (IsStencilTestOutputDisabled()) { - amask = false; - } else { - // If the stencil type is set to KEEP, we shouldn't write to the stencil/alpha channel. - if (ReplaceAlphaWithStencilType() == STENCIL_VALUE_KEEP) { - amask = false; - } - } - - keys_.blend.colorWriteMask = (rmask ? 1 : 0) | (gmask ? 2 : 0) | (bmask ? 4 : 0) | (amask ? 8 : 0); + keys_.blend.colorWriteMask = (maskState.rgba[0] ? 1 : 0) | (maskState.rgba[1] ? 2 : 0) | (maskState.rgba[2] ? 4 : 0) | (maskState.rgba[3] ? 8 : 0); } if (!device1_) { diff --git a/GPU/Directx9/StateMappingDX9.cpp b/GPU/Directx9/StateMappingDX9.cpp index 99f8860f28dc..dfb5c5a754ee 100644 --- a/GPU/Directx9/StateMappingDX9.cpp +++ b/GPU/Directx9/StateMappingDX9.cpp @@ -129,7 +129,10 @@ void DrawEngineDX9::ApplyDrawState(int prim) { GenericBlendState blendState; ConvertBlendState(blendState, gstate_c.allowFramebufferRead); - if (blendState.applyFramebufferRead) { + GenericMaskState maskState; + ConvertMaskState(maskState, gstate_c.allowFramebufferRead); + + if (blendState.applyFramebufferRead || maskState.applyFramebufferRead) { if (ApplyFramebufferRead(&fboTexNeedsBind_)) { // The shader takes over the responsibility for blending, so recompute. ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState); @@ -160,24 +163,7 @@ void DrawEngineDX9::ApplyDrawState(int prim) { dxstate.blend.disable(); } - // PSP color/alpha mask is per bit but we can only support per byte. - // But let's do that, at least. And let's try a threshold. - bool rmask = (gstate.pmskc & 0xFF) < 128; - bool gmask = ((gstate.pmskc >> 8) & 0xFF) < 128; - bool bmask = ((gstate.pmskc >> 16) & 0xFF) < 128; - bool amask = (gstate.pmska & 0xFF) < 128; - - // Let's not write to alpha if stencil isn't enabled. - if (IsStencilTestOutputDisabled()) { - amask = false; - } else { - // If the stencil type is set to KEEP, we shouldn't write to the stencil/alpha channel. - if (ReplaceAlphaWithStencilType() == STENCIL_VALUE_KEEP) { - amask = false; - } - } - - dxstate.colorMask.set(rmask, gmask, bmask, amask); + dxstate.colorMask.set(maskState.rgba[0], maskState.rgba[1], maskState.rgba[2], maskState.rgba[3]); } } diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index a8d23c88ba3d..159f0261a7b4 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -102,6 +102,7 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs, queries.push_back({ &u_fogcoef, "u_fogcoef" }); queries.push_back({ &u_alphacolorref, "u_alphacolorref" }); queries.push_back({ &u_alphacolormask, "u_alphacolormask" }); + queries.push_back({ &u_colorWriteMask, "u_colorWriteMask" }); queries.push_back({ &u_stencilReplaceValue, "u_stencilReplaceValue" }); queries.push_back({ &u_testtex, "testtex" }); @@ -377,6 +378,9 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid, bool useBu if (dirty & DIRTY_ALPHACOLORMASK) { SetColorUniform3iAlpha(render_, &u_alphacolormask, gstate.colortestmask, gstate.getAlphaTestMask()); } + if (dirty & DIRTY_COLORWRITEMASK) { + render_->SetUniformUI1(&u_colorWriteMask, ~((gstate.pmska << 24) | (gstate.pmskc & 0xFFFFFF))); + } if (dirty & DIRTY_FOGCOLOR) { SetColorUniform3(render_, &u_fogcolor, gstate.fogcolor); } diff --git a/GPU/GLES/ShaderManagerGLES.h b/GPU/GLES/ShaderManagerGLES.h index 71e554163112..f93c9a4d24e5 100644 --- a/GPU/GLES/ShaderManagerGLES.h +++ b/GPU/GLES/ShaderManagerGLES.h @@ -80,6 +80,7 @@ class LinkedShader { // Fragment processing inputs int u_alphacolorref; int u_alphacolormask; + int u_colorWriteMask; int u_testtex; int u_fogcolor; int u_fogcoef; diff --git a/GPU/GLES/StateMappingGLES.cpp b/GPU/GLES/StateMappingGLES.cpp index 43f9629f7e32..cc89eda9b7d3 100644 --- a/GPU/GLES/StateMappingGLES.cpp +++ b/GPU/GLES/StateMappingGLES.cpp @@ -24,6 +24,7 @@ #include "Common/Profiler/Profiler.h" #include "Common/GPU/OpenGL/GLDebugLog.h" #include "Common/GPU/OpenGL/GLRenderManager.h" +#include "Common/Data/Convert/SmallDataConvert.h" #include "GPU/Math3D.h" #include "GPU/GPUState.h" @@ -140,18 +141,6 @@ void DrawEngineGLES::ApplyDrawState(int prim) { // Start profiling here to skip SetTexture which is already accounted for PROFILE_THIS_SCOPE("applydrawstate"); - // amask is needed for both stencil and blend state so we keep it outside for now - bool amask = (gstate.pmska & 0xFF) < 128; - // Let's not write to alpha if stencil isn't enabled. - if (IsStencilTestOutputDisabled()) { - amask = false; - } else { - // If the stencil type is set to KEEP, we shouldn't write to the stencil/alpha channel. - if (ReplaceAlphaWithStencilType() == STENCIL_VALUE_KEEP) { - amask = false; - } - } - bool useBufferedRendering = framebufferManager_->UseBufferedRendering(); if (gstate_c.IsDirty(DIRTY_BLEND_STATE)) { @@ -169,7 +158,10 @@ void DrawEngineGLES::ApplyDrawState(int prim) { GenericBlendState blendState; ConvertBlendState(blendState, gstate_c.allowFramebufferRead); - if (blendState.applyFramebufferRead) { + GenericMaskState maskState; + ConvertMaskState(maskState, gstate_c.allowFramebufferRead); + + if (blendState.applyFramebufferRead || maskState.applyFramebufferRead) { if (ApplyFramebufferRead(&fboTexNeedsBind_)) { // The shader takes over the responsibility for blending, so recompute. ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState); @@ -177,7 +169,6 @@ void DrawEngineGLES::ApplyDrawState(int prim) { // We copy the framebuffer here, as doing so will wipe any blend state if we do it later. if (fboTexNeedsBind_) { // Note that this is positions, not UVs, that we need the copy from. - // TODO: If the device doesn't support blit, this will corrupt the currently applied texture. framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY); // If we are rendering at a higher resolution, linear is probably best for the dest color. renderManager->SetTextureSampler(1, GL_CLAMP_TO_EDGE, GL_CLAMP_TO_EDGE, GL_LINEAR, GL_LINEAR, 0.0f); @@ -205,23 +196,13 @@ void DrawEngineGLES::ApplyDrawState(int prim) { } if (blendState.useBlendColor) { uint32_t color = blendState.blendColor; - const float col[4] = { - (float)((color & 0xFF) >> 0) * (1.0f / 255.0f), - (float)((color & 0xFF00) >> 8) * (1.0f / 255.0f), - (float)((color & 0xFF0000) >> 16) * (1.0f / 255.0f), - (float)((color & 0xFF000000) >> 24) * (1.0f / 255.0f), - }; + float col[4]; + Uint8x4ToFloat4(col, color); renderManager->SetBlendFactor(col); } } - // PSP color/alpha mask is per bit but we can only support per byte. - // But let's do that, at least. And let's try a threshold. - bool rmask = (gstate.pmskc & 0xFF) < 128; - bool gmask = ((gstate.pmskc >> 8) & 0xFF) < 128; - bool bmask = ((gstate.pmskc >> 16) & 0xFF) < 128; - - int mask = (int)rmask | ((int)gmask << 1) | ((int)bmask << 2) | ((int)amask << 3); + int mask = (int)maskState.rgba[0] | ((int)maskState.rgba[1] << 1) | ((int)maskState.rgba[2] << 2) | ((int)maskState.rgba[3] << 3); if (blendState.enabled) { renderManager->SetBlendAndMask(mask, blendState.enabled, glBlendFactorLookup[(size_t)blendState.srcColor], glBlendFactorLookup[(size_t)blendState.dstColor], diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index b32e26f78516..a86f7b99634b 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -120,8 +120,8 @@ const CommonCommandTableEntry commonCommandTable[] = { { GE_CMD_BLENDMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE }, { GE_CMD_BLENDFIXEDA, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE }, { GE_CMD_BLENDFIXEDB, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE }, - { GE_CMD_MASKRGB, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE }, - { GE_CMD_MASKALPHA, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_DEPTHSTENCIL_STATE }, + { GE_CMD_MASKRGB, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_COLORWRITEMASK }, + { GE_CMD_MASKALPHA, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_COLORWRITEMASK }, { GE_CMD_ZTEST, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHSTENCIL_STATE }, { GE_CMD_ZTESTENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHSTENCIL_STATE | DIRTY_FRAGMENTSHADER_STATE }, { GE_CMD_ZWRITEDISABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHSTENCIL_STATE | DIRTY_FRAGMENTSHADER_STATE }, diff --git a/GPU/Vulkan/StateMappingVulkan.cpp b/GPU/Vulkan/StateMappingVulkan.cpp index 4d7c5fec1388..a636531e53db 100644 --- a/GPU/Vulkan/StateMappingVulkan.cpp +++ b/GPU/Vulkan/StateMappingVulkan.cpp @@ -165,7 +165,10 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag GenericBlendState blendState; ConvertBlendState(blendState, gstate_c.allowFramebufferRead); - if (blendState.applyFramebufferRead) { + GenericMaskState maskState; + ConvertMaskState(maskState, gstate_c.allowFramebufferRead); + + if (blendState.applyFramebufferRead || maskState.applyFramebufferRead) { if (ApplyFramebufferRead(&fboTexNeedsBind_)) { // The shader takes over the responsibility for blending, so recompute. ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState); @@ -178,6 +181,7 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE); } else if (blendState.resetFramebufferRead) { ResetFramebufferRead(); + gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE); } if (blendState.enabled) { @@ -206,24 +210,11 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag dynState.useBlendColor = false; } - // PSP color/alpha mask is per bit but we can only support per byte. - // But let's do that, at least. And let's try a threshold. - bool rmask = (gstate.pmskc & 0xFF) < 128; - bool gmask = ((gstate.pmskc >> 8) & 0xFF) < 128; - bool bmask = ((gstate.pmskc >> 16) & 0xFF) < 128; - bool amask = (gstate.pmska & 0xFF) < 128; - - // Let's not write to alpha if stencil isn't enabled. - if (IsStencilTestOutputDisabled()) { - amask = false; - } else { - // If the stencil type is set to KEEP, we shouldn't write to the stencil/alpha channel. - if (ReplaceAlphaWithStencilType() == STENCIL_VALUE_KEEP) { - amask = false; - } - } - - key.colorWriteMask = (rmask ? VK_COLOR_COMPONENT_R_BIT : 0) | (gmask ? VK_COLOR_COMPONENT_G_BIT : 0) | (bmask ? VK_COLOR_COMPONENT_B_BIT : 0) | (amask ? VK_COLOR_COMPONENT_A_BIT : 0); + key.colorWriteMask = + (maskState.rgba[0] ? VK_COLOR_COMPONENT_R_BIT : 0) | + (maskState.rgba[1] ? VK_COLOR_COMPONENT_G_BIT : 0) | + (maskState.rgba[2] ? VK_COLOR_COMPONENT_B_BIT : 0) | + (maskState.rgba[3] ? VK_COLOR_COMPONENT_A_BIT : 0); // Workaround proposed in #10421, for bug where the color write mask is not applied correctly on Adreno. if ((gstate.pmskc & 0x00FFFFFF) == 0x00FFFFFF && g_Config.bVendorBugChecksEnabled && draw_->GetBugs().Has(Draw::Bugs::COLORWRITEMASK_BROKEN_WITH_DEPTHTEST)) { @@ -377,20 +368,16 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag } void DrawEngineVulkan::BindShaderBlendTex() { - // At this point, we know if the vertices are full alpha or not. - // TODO: Set the nearest/linear here (since we correctly know if alpha/color tests are needed)? + // TODO: At this point, we know if the vertices are full alpha or not. + // Set the nearest/linear here (since we correctly know if alpha/color tests are needed)? if (!gstate.isModeClear()) { - // TODO: Test texture? if (fboTexNeedsBind_) { - // Note that this is positions, not UVs, that we need the copy from. framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY); - // If we are rendering at a higher resolution, linear is probably best for the dest color. boundSecondary_ = (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_TEXTURE1_IMAGEVIEW); fboTexBound_ = true; fboTexNeedsBind_ = false; } } - } void DrawEngineVulkan::ApplyDrawStateLate(VulkanRenderManager *renderManager, bool applyStencilRef, uint8_t stencilRef, bool useBlendConstant) { diff --git a/assets/compat.ini b/assets/compat.ini index 6cd40f5bc3cb..2273beb0f376 100644 --- a/assets/compat.ini +++ b/assets/compat.ini @@ -796,9 +796,15 @@ NPUZ00043 = true NPEZ00198 = true # This setting will go away in the near future, hopefully we can enable it -# for all games. +# for all or most games. [ReinterpretFramebuffers] -# Outrun - issue #11358 +# Outrun 2006: Coast to Coast - issue #11358 ULES00262 = true ULUS10064 = true ULKS46087 = true + +# Colin McRae's DiRT 2? +# ULUS10471 = true +# ULJM05533 = true +# NPJH50006 = true + diff --git a/unittest/TestShaderGenerators.cpp b/unittest/TestShaderGenerators.cpp index 46fa21730578..6336b97662f2 100644 --- a/unittest/TestShaderGenerators.cpp +++ b/unittest/TestShaderGenerators.cpp @@ -181,7 +181,7 @@ bool TestReinterpretShaders() { failed = true; return false; } else { - printf("===\n%s\n===\n", buffer); + //printf("===\n%s\n===\n", buffer); } } } @@ -215,25 +215,16 @@ bool TestReinterpretShaders() { return !failed; } +const ShaderLanguage languages[] = { + ShaderLanguage::HLSL_D3D9, + ShaderLanguage::HLSL_D3D11, + ShaderLanguage::GLSL_VULKAN, + ShaderLanguage::GLSL_1xx, + ShaderLanguage::GLSL_3xx, +}; +const int numLanguages = ARRAY_SIZE(languages); -bool TestShaderGenerators() { - LoadD3D11(); - init_glslang(); - LoadD3DCompilerDynamic(); - - if (!TestReinterpretShaders()) { - return false; - } - - ShaderLanguage languages[] = { - ShaderLanguage::HLSL_D3D9, - ShaderLanguage::HLSL_D3D11, - ShaderLanguage::GLSL_VULKAN, - ShaderLanguage::GLSL_1xx, - ShaderLanguage::GLSL_3xx, - }; - const int numLanguages = ARRAY_SIZE(languages); - +bool TestVertexShaders() { char *buffer[numLanguages]; for (int i = 0; i < numLanguages; i++) { @@ -290,8 +281,21 @@ bool TestShaderGenerators() { printf("%d/%d vertex shaders generated (it's normal that it's not all, there are invalid bit combos)\n", successes, count * numLanguages); - successes = 0; - count = 200; + for (int i = 0; i < numLanguages; i++) { + delete[] buffer[i]; + } + return true; +} + +bool TestFragmentShaders() { + char *buffer[numLanguages]; + + for (int i = 0; i < numLanguages; i++) { + buffer[i] = new char[65536]; + } + GMRng rng; + int successes = 0; + int count = 300; // Generate a bunch of random fragment shader IDs, try to generate shader source. // Then compile it and check that it's ok. @@ -337,15 +341,31 @@ bool TestShaderGenerators() { printf("%d/%d fragment shaders generated (it's normal that it's not all, there are invalid bit combos)\n", successes, count * numLanguages); - successes = 0; - count = 200; + for (int i = 0; i < numLanguages; i++) { + delete[] buffer[i]; + } + return true; +} +bool TestShaderGenerators() { + LoadD3D11(); + init_glslang(); + LoadD3DCompilerDynamic(); - _CrtCheckMemory(); + if (!TestReinterpretShaders()) { + return false; + } - for (int i = 0; i < numLanguages; i++) { - delete[] buffer[i]; + if (!TestFragmentShaders()) { + return false; } + if (!TestVertexShaders()) { + return false; + } + + _CrtCheckMemory(); + + return true; }