From 44ba31fbc67198086c3a65cb3b6ef88872ee5c92 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 16 Sep 2018 23:57:20 -0700 Subject: [PATCH] Vulkan: Implement verex range culling. Also D3D11, since they are very similar. --- GPU/Common/ShaderUniforms.cpp | 38 ++++++++++++++++++++++ GPU/Common/ShaderUniforms.h | 8 ++++- GPU/Directx9/VertexShaderGeneratorDX9.cpp | 2 +- GPU/Vulkan/VertexShaderGeneratorVulkan.cpp | 24 +++++++++++--- 4 files changed, 65 insertions(+), 7 deletions(-) diff --git a/GPU/Common/ShaderUniforms.cpp b/GPU/Common/ShaderUniforms.cpp index e2dee45c2826..5489df59b6e8 100644 --- a/GPU/Common/ShaderUniforms.cpp +++ b/GPU/Common/ShaderUniforms.cpp @@ -192,6 +192,44 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView ub->depthRange[3] = viewZInvScale; } + if (dirtyUniforms & DIRTY_CULLRANGE) { + // Account for the projection viewport adjustment when viewport is too large. + auto reverseViewportX = [](float x) { + float pspViewport = (x - gstate.getViewportXCenter()) * (1.0f / gstate.getViewportXScale()); + return pspViewport * (1.0f / gstate_c.vpWidthScale); + }; + auto reverseViewportY = [flipViewport](float y) { + float heightScale = gstate_c.vpHeightScale; + if (flipViewport) { + // For D3D11. + heightScale = -heightScale; + } + float pspViewport = (y - gstate.getViewportYCenter()) * (1.0f / gstate.getViewportYScale()); + return pspViewport * (1.0f / gstate_c.vpHeightScale); + }; + auto reverseViewportZ = [](float z) { + float pspViewport = (z - gstate.getViewportZCenter()) * (1.0f / gstate.getViewportZScale()); + // Differs from GLES: depth is 0 to 1, not -1 to 1. + return (pspViewport - gstate_c.vpZOffset) * (1.0f / gstate_c.vpDepthScale) * 0.5f + 0.5f; + }; + auto sortPair = [](float a, float b) { + return a > b ? std::make_pair(b, a) : std::make_pair(a, b); + }; + + // The PSP seems to use 0.12.4 for X and Y, and 0.16.0 for Z. + // Any vertex outside this range (unless depth clamp enabled) is discarded. + auto x = sortPair(reverseViewportX(0.0f), reverseViewportX(4096.0f)); + auto y = sortPair(reverseViewportY(0.0f), reverseViewportY(4096.0f)); + auto z = sortPair(reverseViewportZ(0.0f), reverseViewportZ(65535.5f)); + // Since we have space in w, use it to pass the depth clamp flag. We also pass NAN for w "discard". + float clampEnable = gstate.isDepthClampEnabled() ? 1.0f : 0.0f; + + float minValues[4]{ x.first, y.first, z.first, clampEnable }; + memcpy(ub->cullRangeMin, minValues, sizeof(ub->cullRangeMin)); + float maxValues[4]{ x.second, y.second, z.second, NAN }; + memcpy(ub->cullRangeMax, maxValues, sizeof(ub->cullRangeMax)); + } + if (dirtyUniforms & DIRTY_BEZIERSPLINE) { ub->spline_counts = BytesToUint32(gstate_c.spline_count_u, gstate_c.spline_count_v, gstate_c.spline_type_u, gstate_c.spline_type_v); } diff --git a/GPU/Common/ShaderUniforms.h b/GPU/Common/ShaderUniforms.h index dd1e659604b4..5c5c29f16904 100644 --- a/GPU/Common/ShaderUniforms.h +++ b/GPU/Common/ShaderUniforms.h @@ -18,7 +18,7 @@ enum : uint64_t { }; // TODO: Split into two structs, one for software transform and one for hardware transform, to save space. -// 512 bytes. Probably can't get to 256 (nVidia's UBO alignment). +// Currently 512 bytes. Probably can't get to 256 (nVidia's UBO alignment). // Every line here is a 4-float. struct UB_VS_FS_Base { float proj[16]; @@ -32,6 +32,8 @@ struct UB_VS_FS_Base { float matAmbient[4]; uint32_t spline_counts; uint32_t depal_mask_shift_off_fmt; // 4 params packed into one. int pad2; int pad3; + float cullRangeMin[4]; + float cullRangeMax[4]; // Fragment data float fogColor[4]; float texEnvColor[4]; @@ -58,6 +60,8 @@ R"( mat4 proj_mtx; uint depal_mask_shift_off_fmt; int pad2; int pad3; + vec4 cullRangeMin; + vec4 cullRangeMax; vec3 fogcolor; vec3 texenv; ivec4 alphacolorref; @@ -84,6 +88,8 @@ R"( float4x4 u_proj; uint u_depal_mask_shift_off_fmt; int pad2; int pad3; + float4 u_cullRangeMin; + float4 u_cullRangeMax; float3 u_fogcolor; float3 u_texenv; uint4 u_alphacolorref; diff --git a/GPU/Directx9/VertexShaderGeneratorDX9.cpp b/GPU/Directx9/VertexShaderGeneratorDX9.cpp index 8cebd2dfdc15..1cd0f383c4fa 100644 --- a/GPU/Directx9/VertexShaderGeneratorDX9.cpp +++ b/GPU/Directx9/VertexShaderGeneratorDX9.cpp @@ -815,7 +815,7 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage } } - if (lang == HLSL_DX9 && !isModeThrough) { + if (!isModeThrough) { WRITE(p, " float3 projPos = outPos.xyz / outPos.w;\n"); // Vertex range culling doesn't happen when depth is clamped, so only do this if in range. WRITE(p, " if (u_cullRangeMin.w <= 0.0f || (projPos.z >= u_cullRangeMin.z && projPos.z <= u_cullRangeMax.z)) {\n"); diff --git a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp index e901c4390043..c180631e9e27 100644 --- a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp +++ b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp @@ -317,13 +317,13 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer) { WRITE(p, " v_fogdepth = position.w;\n"); } if (isModeThrough) { - WRITE(p, " gl_Position = base.proj_through_mtx * vec4(position.xyz, 1.0);\n"); + WRITE(p, " vec4 outPos = base.proj_through_mtx * vec4(position.xyz, 1.0);\n"); } else { // The viewport is used in this case, so need to compensate for that. if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { - WRITE(p, " gl_Position = depthRoundZVP(base.proj_mtx * vec4(position.xyz, 1.0));\n"); + WRITE(p, " vec4 outPos = depthRoundZVP(base.proj_mtx * vec4(position.xyz, 1.0));\n"); } else { - WRITE(p, " gl_Position = base.proj_mtx * vec4(position.xyz, 1.0);\n"); + WRITE(p, " vec4 outPos = base.proj_mtx * vec4(position.xyz, 1.0);\n"); } } } else { @@ -472,9 +472,9 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer) { // Final view and projection transforms. if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { - WRITE(p, " gl_Position = depthRoundZVP(base.proj_mtx * viewPos);\n"); + WRITE(p, " vec4 outPos = depthRoundZVP(base.proj_mtx * viewPos);\n"); } else { - WRITE(p, " gl_Position = base.proj_mtx * viewPos;\n"); + WRITE(p, " vec4 outPos = base.proj_mtx * viewPos;\n"); } // TODO: Declare variables for dots for shade mapping if needed. @@ -694,6 +694,20 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer) { if (enableFog) WRITE(p, " v_fogdepth = (viewPos.z + base.fogcoef.x) * base.fogcoef.y;\n"); } + + if (!isModeThrough) { + WRITE(p, " vec3 projPos = outPos.xyz / outPos.w;\n"); + // Vertex range culling doesn't happen when depth is clamped, so only do this if in range. + WRITE(p, " if (base.cullRangeMin.w <= 0.0f || (projPos.z >= base.cullRangeMin.z && projPos.z <= base.cullRangeMax.z)) {\n"); + const char *outMin = "projPos.x < base.cullRangeMin.x || projPos.y < base.cullRangeMin.y || projPos.z < base.cullRangeMin.z"; + const char *outMax = "projPos.x > base.cullRangeMax.x || projPos.y > base.cullRangeMax.y || projPos.z > base.cullRangeMax.z"; + WRITE(p, " if (%s || %s) {\n", outMin, outMax); + WRITE(p, " outPos.w = base.cullRangeMax.w;\n"); + WRITE(p, " }\n"); + WRITE(p, " }\n"); + } + WRITE(p, " gl_Position = outPos;\n"); + WRITE(p, "}\n"); return true; }