diff --git a/GPU/Common/ShaderCommon.h b/GPU/Common/ShaderCommon.h index c26185885435..1ef4ba51e4ef 100644 --- a/GPU/Common/ShaderCommon.h +++ b/GPU/Common/ShaderCommon.h @@ -87,14 +87,15 @@ enum : uint64_t { DIRTY_BEZIERSPLINE = 1ULL << 32, DIRTY_TEXCLAMP = 1ULL << 33, + DIRTY_CULLRANGE = 1ULL << 34, - DIRTY_DEPAL = 1ULL << 34, + DIRTY_DEPAL = 1ULL << 35, // space for 5 more uniform dirty flags. Remember to update DIRTY_ALL_UNIFORMS. DIRTY_BONE_UNIFORMS = 0xFF000000ULL, - DIRTY_ALL_UNIFORMS = 0x7FFFFFFFFULL, + DIRTY_ALL_UNIFORMS = 0xFFFFFFFFFULL, DIRTY_ALL_LIGHTS = DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3, // Other dirty elements that aren't uniforms! diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index 484819332e6b..a091d1a6a39f 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -116,6 +116,8 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs, else numBones = 0; queries.push_back({ &u_depthRange, "u_depthRange" }); + queries.push_back({ &u_cullRangeMin, "u_cullRangeMin" }); + queries.push_back({ &u_cullRangeMax, "u_cullRangeMax" }); #ifdef USE_BONE_ARRAY queries.push_back({ &u_bone, "u_bone" }); @@ -481,6 +483,43 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid) { float data[4] = { viewZScale, viewZCenter, viewZCenter, viewZInvScale }; SetFloatUniform4(render_, &u_depthRange, data); } + if (dirty & DIRTY_CULLRANGE) { + // Account for the projection viewport adjustment when viewport is too large. + auto reverseViewportX = [](float x) { + float pspViewport = (x - gstate.getViewportXCenter()) * (1.0f / gstate.getViewportXScale()); + return (pspViewport - gstate_c.vpXOffset) * (1.0f / gstate_c.vpWidthScale); + }; + auto reverseViewportY = [](float y) { + float yOffset = gstate_c.vpYOffset; + if (g_Config.iRenderingMode == FB_NON_BUFFERED_MODE) { + // GL upside down is a pain as usual. + // TODO: Is this right? + yOffset = -yOffset; + } + float pspViewport = (y - gstate.getViewportYCenter()) * (1.0f / gstate.getViewportYScale()); + return (pspViewport - yOffset) * (1.0f / gstate_c.vpHeightScale); + }; + auto reverseViewportZ = [](float z) { + float pspViewport = (z - gstate.getViewportZCenter()) * (1.0f / gstate.getViewportZScale()); + return (pspViewport - gstate_c.vpZOffset) * (1.0f / gstate_c.vpDepthScale); + }; + auto sortPair = [](float a, float b) { + return a > b ? std::make_pair(b, a) : std::make_pair(a, b); + }; + + // The PSP seems to use 0.12.4 for X and Y, and 0.16.0 for Z. + // Any vertex outside this range (unless depth clamp enabled) is discarded. + auto x = sortPair(reverseViewportX(0.0f), reverseViewportX(4096.0f)); + auto y = sortPair(reverseViewportY(0.0f), reverseViewportY(4096.0f)); + auto z = sortPair(reverseViewportZ(0.0f), reverseViewportZ(65535.5f)); + // Since we have space in w, use it to pass the depth clamp flag. We also pass NAN for w "discard". + float clampEnable = gstate.isDepthClampEnabled() ? 1.0f : 0.0f; + + float minValues[4]{ x.first, y.first, z.first, clampEnable }; + SetFloatUniform4(render_, &u_cullRangeMin, minValues); + float maxValues[4]{ x.second, y.second, z.second, NAN }; + SetFloatUniform4(render_, &u_cullRangeMax, maxValues); + } if (dirty & DIRTY_STENCILREPLACEVALUE) { float f = (float)gstate.getStencilTestRef() * (1.0f / 255.0f); diff --git a/GPU/GLES/ShaderManagerGLES.h b/GPU/GLES/ShaderManagerGLES.h index fdcd2ce98b0d..5dbfe43b4e25 100644 --- a/GPU/GLES/ShaderManagerGLES.h +++ b/GPU/GLES/ShaderManagerGLES.h @@ -71,6 +71,8 @@ class LinkedShader { int u_texmtx; int u_world; int u_depthRange; // x,y = viewport xscale/xcenter. z,w=clipping minz/maxz (?) + int u_cullRangeMin; + int u_cullRangeMax; #ifdef USE_BONE_ARRAY int u_bone; // array, size is numBones diff --git a/GPU/GLES/VertexShaderGeneratorGLES.cpp b/GPU/GLES/VertexShaderGeneratorGLES.cpp index c3a6427d3b7d..bc6291a73c1e 100644 --- a/GPU/GLES/VertexShaderGeneratorGLES.cpp +++ b/GPU/GLES/VertexShaderGeneratorGLES.cpp @@ -335,6 +335,12 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask, *uniformMask |= DIRTY_DEPTHRANGE; } + if (!isModeThrough) { + WRITE(p, "uniform highp vec4 u_cullRangeMin;\n"); + WRITE(p, "uniform highp vec4 u_cullRangeMax;\n"); + *uniformMask |= DIRTY_CULLRANGE; + } + WRITE(p, "%s%s lowp vec4 v_color0;\n", shading, varying); if (lmode) { WRITE(p, "%s%s lowp vec3 v_color1;\n", shading, varying); @@ -472,13 +478,13 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask, WRITE(p, " v_fogdepth = position.w;\n"); } if (isModeThrough) { - WRITE(p, " gl_Position = u_proj_through * vec4(position.xyz, 1.0);\n"); + WRITE(p, " vec4 outPos = u_proj_through * vec4(position.xyz, 1.0);\n"); } else { // The viewport is used in this case, so need to compensate for that. if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { - WRITE(p, " gl_Position = depthRoundZVP(u_proj * vec4(position.xyz, 1.0));\n"); + WRITE(p, " vec4 outPos = depthRoundZVP(u_proj * vec4(position.xyz, 1.0));\n"); } else { - WRITE(p, " gl_Position = u_proj * vec4(position.xyz, 1.0);\n"); + WRITE(p, " vec4 outPos = u_proj * vec4(position.xyz, 1.0);\n"); } } } else { @@ -671,9 +677,9 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask, // Final view and projection transforms. if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { - WRITE(p, " gl_Position = depthRoundZVP(u_proj * viewPos);\n"); + WRITE(p, " vec4 outPos = depthRoundZVP(u_proj * viewPos);\n"); } else { - WRITE(p, " gl_Position = u_proj * viewPos;\n"); + WRITE(p, " vec4 outPos = u_proj * viewPos;\n"); } // TODO: Declare variables for dots for shade mapping if needed. @@ -898,5 +904,19 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask, if (enableFog) WRITE(p, " v_fogdepth = (viewPos.z + u_fogcoef.x) * u_fogcoef.y;\n"); } + + if (!isModeThrough) { + WRITE(p, " vec3 projPos = outPos.xyz / outPos.w;\n"); + // Vertex range culling doesn't happen when depth is clamped, so only do this if in range. + WRITE(p, " if (u_cullRangeMin.w <= 0.0f || (projPos.z >= u_cullRangeMin.z && projPos.z <= u_cullRangeMax.z)) {\n"); + const char *outMin = "projPos.x < u_cullRangeMin.x || projPos.y < u_cullRangeMin.y || projPos.z < u_cullRangeMin.z"; + const char *outMax = "projPos.x > u_cullRangeMax.x || projPos.y > u_cullRangeMax.y || projPos.z > u_cullRangeMax.z"; + WRITE(p, " if (%s || %s) {\n", outMin, outMax); + WRITE(p, " outPos.w = u_cullRangeMax.w;\n"); + WRITE(p, " }\n"); + WRITE(p, " }\n"); + } + WRITE(p, " gl_Position = outPos;\n"); + WRITE(p, "}\n"); } diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index fed71166aed3..ad9d0458cb05 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -188,13 +188,13 @@ const CommonCommandTableEntry commonCommandTable[] = { // Viewport. { GE_CMD_OFFSETX, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE }, { GE_CMD_OFFSETY, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE }, - { GE_CMD_VIEWPORTXSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE }, - { GE_CMD_VIEWPORTYSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE }, - { GE_CMD_VIEWPORTXCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE }, - { GE_CMD_VIEWPORTYCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE }, - { GE_CMD_VIEWPORTZSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_DEPTHRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE }, - { GE_CMD_VIEWPORTZCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_DEPTHRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE }, - { GE_CMD_DEPTHCLAMPENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE }, + { GE_CMD_VIEWPORTXSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_VIEWPORTSCISSOR_STATE }, + { GE_CMD_VIEWPORTYSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_VIEWPORTSCISSOR_STATE }, + { GE_CMD_VIEWPORTXCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_VIEWPORTSCISSOR_STATE }, + { GE_CMD_VIEWPORTYCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_VIEWPORTSCISSOR_STATE }, + { GE_CMD_VIEWPORTZSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_DEPTHRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE }, + { GE_CMD_VIEWPORTZCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_DEPTHRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE }, + { GE_CMD_DEPTHCLAMPENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_RASTER_STATE }, // Z clip { GE_CMD_MINZ, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHRANGE | DIRTY_VIEWPORTSCISSOR_STATE },