diff --git a/Common/GPU/OpenGL/GLQueueRunner.cpp b/Common/GPU/OpenGL/GLQueueRunner.cpp index 2fe56bfac61c..a2d66b9a8690 100644 --- a/Common/GPU/OpenGL/GLQueueRunner.cpp +++ b/Common/GPU/OpenGL/GLQueueRunner.cpp @@ -793,6 +793,7 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last bool blendEnabled = false; bool cullEnabled = false; bool ditherEnabled = false; + bool depthClampEnabled = false; #ifndef USING_GLES2 int logicOp = -1; bool logicEnabled = false; @@ -1283,6 +1284,17 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last glDisable(GL_DITHER); ditherEnabled = false; } +#ifndef USING_GLES2 + if (c.raster.depthClampEnable) { + if (!depthClampEnabled) { + glEnable(GL_DEPTH_CLAMP); + depthClampEnabled = true; + } + } else if (!c.raster.depthClampEnable && depthClampEnabled) { + glDisable(GL_DEPTH_CLAMP); + depthClampEnabled = false; + } +#endif CHECK_GL_ERROR_IF_DEBUG(); break; default: @@ -1322,6 +1334,8 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last if (cullEnabled) glDisable(GL_CULL_FACE); #ifndef USING_GLES2 + if (depthClampEnabled) + glDisable(GL_DEPTH_CLAMP); if (!gl_extensions.IsGLES && logicEnabled) { glDisable(GL_COLOR_LOGIC_OP); } diff --git a/Common/GPU/OpenGL/GLQueueRunner.h b/Common/GPU/OpenGL/GLQueueRunner.h index 07837421bea9..8dec75394981 100644 --- a/Common/GPU/OpenGL/GLQueueRunner.h +++ b/Common/GPU/OpenGL/GLQueueRunner.h @@ -196,6 +196,7 @@ struct GLRRenderData { GLenum frontFace; GLenum cullFace; GLboolean ditherEnable; + GLboolean depthClampEnable; } raster; }; }; diff --git a/Common/GPU/OpenGL/GLRenderManager.h b/Common/GPU/OpenGL/GLRenderManager.h index 9edd5af35cf8..b4c3378739aa 100644 --- a/Common/GPU/OpenGL/GLRenderManager.h +++ b/Common/GPU/OpenGL/GLRenderManager.h @@ -813,13 +813,14 @@ class GLRenderManager { curRenderStep_->commands.push_back(data); } - void SetRaster(GLboolean cullEnable, GLenum frontFace, GLenum cullFace, GLboolean ditherEnable) { + void SetRaster(GLboolean cullEnable, GLenum frontFace, GLenum cullFace, GLboolean ditherEnable, GLboolean depthClamp) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); GLRRenderData data{ GLRRenderCommand::RASTER }; data.raster.cullEnable = cullEnable; data.raster.frontFace = frontFace; data.raster.cullFace = cullFace; data.raster.ditherEnable = ditherEnable; + data.raster.depthClampEnable = depthClamp; curRenderStep_->commands.push_back(data); } diff --git a/Common/GPU/OpenGL/thin3d_gl.cpp b/Common/GPU/OpenGL/thin3d_gl.cpp index 161c04eecfaf..d26f1ab8af8f 100644 --- a/Common/GPU/OpenGL/thin3d_gl.cpp +++ b/Common/GPU/OpenGL/thin3d_gl.cpp @@ -188,7 +188,7 @@ class OpenGLDepthStencilState : public DepthStencilState { class OpenGLRasterState : public RasterState { public: void Apply(GLRenderManager *render) { - render->SetRaster(cullEnable, frontFace, cullMode, false); + render->SetRaster(cullEnable, frontFace, cullMode, GL_FALSE, GL_FALSE); } GLboolean cullEnable; @@ -533,6 +533,12 @@ OpenGLContext::OpenGLContext() { } caps_.framebufferBlitSupported = gl_extensions.NV_framebuffer_blit || gl_extensions.ARB_framebuffer_object; caps_.framebufferDepthBlitSupported = caps_.framebufferBlitSupported; + caps_.depthClampSupported = gl_extensions.ARB_depth_clamp; + + // Interesting potential hack for emulating GL_DEPTH_CLAMP (use a separate varying, force depth in fragment shader): + // This will induce a performance penalty on many architectures though so a blanket enable of this + // is probably not a good idea. + // https://stackoverflow.com/questions/5960757/how-to-emulate-gl-depth-clamp-nv switch (gl_extensions.gpuVendor) { case GPU_VENDOR_AMD: caps_.vendor = GPUVendor::VENDOR_AMD; break; diff --git a/GPU/Common/DepalettizeShaderCommon.cpp b/GPU/Common/DepalettizeShaderCommon.cpp index 11c598a7f09e..ba8f520e1b9f 100644 --- a/GPU/Common/DepalettizeShaderCommon.cpp +++ b/GPU/Common/DepalettizeShaderCommon.cpp @@ -25,6 +25,7 @@ #include "Common/Log.h" #include "Core/Reporting.h" #include "GPU/GPUState.h" +#include "GPU/Common/GPUStateUtils.h" #include "GPU/Common/DepalettizeShaderCommon.h" #define WRITE p+=sprintf @@ -71,6 +72,12 @@ void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat, ShaderLang WRITE(p, "out vec4 fragColor0;\n"); WRITE(p, "uniform sampler2D tex;\n"); WRITE(p, "uniform sampler2D pal;\n"); + + if (pixelFormat == GE_FORMAT_DEPTH16) { + DepthScaleFactors factors = GetDepthScaleFactors(); + WRITE(p, "const float z_scale = %f;\n", factors.scale); + WRITE(p, "const float z_offset = %f;\n", factors.offset); + } } if (language == HLSL_D3D11) { diff --git a/GPU/GLES/DepthBufferGLES.cpp b/GPU/GLES/DepthBufferGLES.cpp index 9cd487ae5ae2..7eae61caa690 100644 --- a/GPU/GLES/DepthBufferGLES.cpp +++ b/GPU/GLES/DepthBufferGLES.cpp @@ -133,7 +133,7 @@ void FramebufferManagerGLES::PackDepthbuffer(VirtualFramebuffer *vfb, int x, int // We must bind the program after starting the render pass, and set the color mask after clearing. render_->SetScissor({ 0, 0, vfb->renderWidth, vfb->renderHeight }); render_->SetDepth(false, false, GL_ALWAYS); - render_->SetRaster(false, GL_CCW, GL_FRONT, GL_FALSE); + render_->SetRaster(false, GL_CCW, GL_FRONT, GL_FALSE, GL_FALSE); render_->BindProgram(depthDownloadProgram_); if (!gstate_c.Supports(GPU_SUPPORTS_ACCURATE_DEPTH)) { diff --git a/GPU/GLES/FramebufferManagerGLES.cpp b/GPU/GLES/FramebufferManagerGLES.cpp index 5870c899ef5f..f28b4f7016f5 100644 --- a/GPU/GLES/FramebufferManagerGLES.cpp +++ b/GPU/GLES/FramebufferManagerGLES.cpp @@ -203,7 +203,7 @@ void FramebufferManagerGLES::DrawActiveTexture(float x, float y, float w, float // We always want a plain state here, well, except for when it's used by the stencil stuff... render_->SetDepth(false, false, GL_ALWAYS); - render_->SetRaster(false, GL_CCW, GL_FRONT, GL_FALSE); + render_->SetRaster(false, GL_CCW, GL_FRONT, GL_FALSE, GL_FALSE); if (!(flags & DRAWTEX_KEEP_STENCIL_ALPHA)) { render_->SetNoBlendAndMask(0xF); render_->SetStencilDisabled(); diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp index d60e1b5592fa..18b11d57f5f2 100644 --- a/GPU/GLES/GPU_GLES.cpp +++ b/GPU/GLES/GPU_GLES.cpp @@ -221,6 +221,14 @@ void GPU_GLES::CheckGPUFeatures() { if (gl_extensions.ARB_texture_float || gl_extensions.OES_texture_float) features |= GPU_SUPPORTS_TEXTURE_FLOAT; + if (draw_->GetDeviceCaps().depthClampSupported) { + features |= GPU_SUPPORTS_DEPTH_CLAMP | GPU_SUPPORTS_ACCURATE_DEPTH; + // Our implementation of depth texturing needs simple Z range, so can't + // use the extension hacks (yet). + if (gl_extensions.GLES3) + features |= GPU_SUPPORTS_DEPTH_TEXTURE; + } + // If we already have a 16-bit depth buffer, we don't need to round. bool prefer24 = draw_->GetDeviceCaps().preferredDepthBufferFormat == Draw::DataFormat::D24_S8; if (prefer24) { diff --git a/GPU/GLES/StateMappingGLES.cpp b/GPU/GLES/StateMappingGLES.cpp index 992695e4a538..404dc506ac29 100644 --- a/GPU/GLES/StateMappingGLES.cpp +++ b/GPU/GLES/StateMappingGLES.cpp @@ -230,7 +230,23 @@ void DrawEngineGLES::ApplyDrawState(int prim) { GLenum cullMode = cullingMode[gstate.getCullMode() ^ !useBufferedRendering]; cullEnable = !gstate.isModeClear() && prim != GE_PRIM_RECTANGLES && gstate.isCullEnabled(); - renderManager->SetRaster(cullEnable, GL_CCW, cullMode, dither); + + bool depthClampEnable = false; + if (gstate.isModeClear() || gstate.isModeThrough()) { + // TODO: Might happen in clear mode if not through... + depthClampEnable = false; + } else { + if (gstate.getDepthRangeMin() == 0 || gstate.getDepthRangeMax() == 65535) { + // TODO: Still has a bug where we clamp to depth range if one is not the full range. + // But the alternate is not clamping in either direction... + depthClampEnable = gstate.isDepthClampEnabled() && gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP); + } else { + // We just want to clip in this case, the clamp would be clipped anyway. + depthClampEnable = false; + } + } + + renderManager->SetRaster(cullEnable, GL_CCW, cullMode, dither, depthClampEnable); } if (gstate_c.IsDirty(DIRTY_DEPTHSTENCIL_STATE)) { diff --git a/GPU/GLES/StencilBufferGLES.cpp b/GPU/GLES/StencilBufferGLES.cpp index df9cd0619337..480690b9301c 100644 --- a/GPU/GLES/StencilBufferGLES.cpp +++ b/GPU/GLES/StencilBufferGLES.cpp @@ -191,7 +191,7 @@ bool FramebufferManagerGLES::NotifyStencilUpload(u32 addr, int size, StencilUplo render_->SetDepth(false, false, GL_ALWAYS); render_->Clear(0, 0, 0, GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT, 0x8, 0, 0, 0, 0); render_->SetStencilFunc(GL_TRUE, GL_ALWAYS, 0xFF, 0xFF); - render_->SetRaster(false, GL_CCW, GL_FRONT, GL_FALSE); + render_->SetRaster(false, GL_CCW, GL_FRONT, GL_FALSE, GL_FALSE); render_->BindProgram(stencilUploadProgram_); render_->SetNoBlendAndMask(0x8); diff --git a/GPU/GLES/TextureCacheGLES.cpp b/GPU/GLES/TextureCacheGLES.cpp index 783ea75c4007..1dd35b39c0fc 100644 --- a/GPU/GLES/TextureCacheGLES.cpp +++ b/GPU/GLES/TextureCacheGLES.cpp @@ -341,13 +341,15 @@ class TextureShaderApplier { }; void TextureCacheGLES::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, FramebufferNotificationChannel channel) { - DepalShader *depal = nullptr; + DepalShader *depalShader = nullptr; uint32_t clutMode = gstate.clutformat & 0xFFFFFF; bool need_depalettize = IsClutFormat(texFormat); - bool useShaderDepal = framebufferManager_->GetCurrentRenderVFB() != framebuffer && (gstate_c.Supports(GPU_SUPPORTS_GLSL_ES_300) || gstate_c.Supports(GPU_SUPPORTS_GLSL_330)); + bool depth = channel == NOTIFY_FB_DEPTH; + bool useShaderDepal = framebufferManager_->GetCurrentRenderVFB() != framebuffer && (gstate_c.Supports(GPU_SUPPORTS_GLSL_ES_300) || gstate_c.Supports(GPU_SUPPORTS_GLSL_330)) && !depth; if (!gstate_c.Supports(GPU_SUPPORTS_32BIT_INT_FSHADER)) { useShaderDepal = false; + depth = false; // Can't support this } if (need_depalettize && !g_Config.bDisableSlowFramebufEffects) { @@ -375,10 +377,10 @@ void TextureCacheGLES::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, return; } + depalShader = depalShaderCache_->GetDepalettizeShader(clutMode, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat); gstate_c.SetUseShaderDepal(false); - depal = depalShaderCache_->GetDepalettizeShader(clutMode, framebuffer->drawnFormat); } - if (depal) { + if (depalShader) { shaderManager_->DirtyLastShader(); const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); @@ -388,11 +390,12 @@ void TextureCacheGLES::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, render_->SetScissor(GLRect2D{ 0, 0, (int)framebuffer->renderWidth, (int)framebuffer->renderHeight }); render_->SetViewport(GLRViewport{ 0.0f, 0.0f, (float)framebuffer->renderWidth, (float)framebuffer->renderHeight, 0.0f, 1.0f }); - TextureShaderApplier shaderApply(depal, framebuffer->bufferWidth, framebuffer->bufferHeight, framebuffer->renderWidth, framebuffer->renderHeight); + TextureShaderApplier shaderApply(depalShader, framebuffer->bufferWidth, framebuffer->bufferHeight, framebuffer->renderWidth, framebuffer->renderHeight); shaderApply.ApplyBounds(gstate_c.vertBounds, gstate_c.curTextureXOffset, gstate_c.curTextureYOffset); shaderApply.Use(render_, drawEngine_, shadeInputLayout_); - framebufferManagerGL_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_SKIP_COPY | BINDFBCOLOR_FORCE_SELF); + draw_->BindFramebufferAsTexture(framebuffer->fbo, 0, depth ? Draw::FB_DEPTH_BIT : Draw::FB_COLOR_BIT, 0); + render_->BindTexture(TEX_SLOT_CLUT, clutTexture); render_->SetTextureSampler(TEX_SLOT_CLUT, GL_REPEAT, GL_CLAMP_TO_EDGE, GL_NEAREST, GL_NEAREST, 0.0f); diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index e52df542be0b..7bd78af5ae04 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -220,7 +220,7 @@ void GPU_Vulkan::CheckGPUFeatures() { } // Might enable this later - in the first round we are mostly looking at depth/stencil/discard. - // if (g_Config.bDisableVendorBugChecks) + // if (!g_Config.bEnableVendorBugChecks) // features |= GPU_SUPPORTS_ACCURATE_DEPTH; // Mandatory features on Vulkan, which may be checked in "centralized" code