Skip to content

Commit

Permalink
Merge pull request #14833 from unknownbrackets/guardband
Browse files Browse the repository at this point in the history
Handle guardband clip/cull better for hardware backends
  • Loading branch information
hrydgard authored Oct 19, 2021
2 parents 236d029 + 275bacc commit 16bf519
Show file tree
Hide file tree
Showing 25 changed files with 184 additions and 107 deletions.
5 changes: 4 additions & 1 deletion Common/GPU/D3D11/thin3d_d3d11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,9 @@ D3D11DrawContext::D3D11DrawContext(ID3D11Device *device, ID3D11DeviceContext *de
// Seems like a fair approximation...
caps_.dualSourceBlend = featureLevel_ >= D3D_FEATURE_LEVEL_10_0;
caps_.depthClampSupported = featureLevel_ >= D3D_FEATURE_LEVEL_10_0;
// SV_ClipDistance# seems to be 10+.
caps_.clipDistanceSupported = featureLevel_ >= D3D_FEATURE_LEVEL_10_0;
caps_.cullDistanceSupported = featureLevel_ >= D3D_FEATURE_LEVEL_10_0;

caps_.depthRangeMinusOneToOne = false;
caps_.framebufferBlitSupported = false;
Expand Down Expand Up @@ -1345,7 +1348,7 @@ void D3D11DrawContext::BindSamplerStates(int start, int count, SamplerState **st
_assert_(start + count <= ARRAY_SIZE(samplers));
for (int i = 0; i < count; i++) {
D3D11SamplerState *samp = (D3D11SamplerState *)states[i];
samplers[i] = samp->ss;
samplers[i] = samp ? samp->ss : nullptr;
}
context_->PSSetSamplers(start, count, samplers);
}
Expand Down
3 changes: 2 additions & 1 deletion Common/GPU/D3D9/thin3d_d3d9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,8 @@ class D3D9Context : public DrawContext {
_assert_(start + count <= MAX_BOUND_TEXTURES);
for (int i = 0; i < count; ++i) {
D3D9SamplerState *s = static_cast<D3D9SamplerState *>(states[i]);
s->Apply(device_, start + i);
if (s)
s->Apply(device_, start + i);
}
}
void BindVertexBuffers(int start, int count, Buffer **buffers, const int *offsets) override {
Expand Down
1 change: 1 addition & 0 deletions Common/GPU/OpenGL/GLFeatures.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,7 @@ void CheckGLExtensions() {
gl_extensions.OES_texture_float = g_set_gl_extensions.count("GL_OES_texture_float") != 0;
gl_extensions.EXT_buffer_storage = g_set_gl_extensions.count("GL_EXT_buffer_storage") != 0;
gl_extensions.EXT_clip_cull_distance = g_set_gl_extensions.count("GL_EXT_clip_cull_distance") != 0;
gl_extensions.APPLE_clip_distance = g_set_gl_extensions.count("GL_APPLE_clip_distance") != 0;

#if defined(__ANDROID__)
// On Android, incredibly, this is not consistently non-zero! It does seem to have the same value though.
Expand Down
3 changes: 3 additions & 0 deletions Common/GPU/OpenGL/GLFeatures.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@ struct GLExtensions {
// ARM
bool ARM_shader_framebuffer_fetch;

// APPLE
bool APPLE_clip_distance;

// EGL
bool EGL_NV_system_time;
bool EGL_NV_coverage_sample;
Expand Down
17 changes: 17 additions & 0 deletions Common/GPU/OpenGL/GLQueueRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@
#include "GLRenderManager.h"
#include "DataFormatGL.h"

// These are the same value, alias for simplicity.
#if defined(GL_CLIP_DISTANCE0_EXT) && !defined(GL_CLIP_DISTANCE0)
#define GL_CLIP_DISTANCE0 GL_CLIP_DISTANCE0_EXT
#elif !defined(GL_CLIP_DISTANCE0)
#define GL_CLIP_DISTANCE0 0x3000
#endif

static constexpr int TEXCACHE_NAME_CACHE_SIZE = 16;

#if PPSSPP_PLATFORM(IOS)
Expand Down Expand Up @@ -798,6 +805,7 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
int logicOp = -1;
bool logicEnabled = false;
#endif
bool clipDistance0Enabled = false;
GLuint blendEqColor = (GLuint)-1;
GLuint blendEqAlpha = (GLuint)-1;

Expand Down Expand Up @@ -1106,6 +1114,13 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
{
if (curProgram != c.program.program) {
glUseProgram(c.program.program->program);
if (c.program.program->use_clip_distance0 != clipDistance0Enabled) {
if (c.program.program->use_clip_distance0)
glEnable(GL_CLIP_DISTANCE0);
else
glDisable(GL_CLIP_DISTANCE0);
clipDistance0Enabled = c.program.program->use_clip_distance0;
}
curProgram = c.program.program;
}
CHECK_GL_ERROR_IF_DEBUG();
Expand Down Expand Up @@ -1340,6 +1355,8 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
glDisable(GL_COLOR_LOGIC_OP);
}
#endif
if (clipDistance0Enabled)
glDisable(GL_CLIP_DISTANCE0);
if ((colorMask & 15) != 15)
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
CHECK_GL_ERROR_IF_DEBUG();
Expand Down
4 changes: 3 additions & 1 deletion Common/GPU/OpenGL/GLRenderManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ class GLRProgram {
std::vector<Semantic> semantics_;
std::vector<UniformLocQuery> queries_;
std::vector<Initializer> initialize_;
bool use_clip_distance0 = false;

struct UniformInfo {
int loc_;
Expand Down Expand Up @@ -422,13 +423,14 @@ class GLRenderManager {
// not be an active render pass.
GLRProgram *CreateProgram(
std::vector<GLRShader *> shaders, std::vector<GLRProgram::Semantic> semantics, std::vector<GLRProgram::UniformLocQuery> queries,
std::vector<GLRProgram::Initializer> initalizers, bool supportDualSource) {
std::vector<GLRProgram::Initializer> initalizers, bool supportDualSource, bool useClipDistance0) {
GLRInitStep step{ GLRInitStepType::CREATE_PROGRAM };
_assert_(shaders.size() <= ARRAY_SIZE(step.create_program.shaders));
step.create_program.program = new GLRProgram();
step.create_program.program->semantics_ = semantics;
step.create_program.program->queries_ = queries;
step.create_program.program->initialize_ = initalizers;
step.create_program.program->use_clip_distance0 = useClipDistance0;
step.create_program.support_dual_source = supportDualSource;
_assert_msg_(shaders.size() > 0, "Can't create a program with zero shaders");
for (size_t i = 0; i < shaders.size(); i++) {
Expand Down
9 changes: 8 additions & 1 deletion Common/GPU/OpenGL/thin3d_gl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,13 @@ OpenGLContext::OpenGLContext() {
caps_.framebufferBlitSupported = gl_extensions.NV_framebuffer_blit || gl_extensions.ARB_framebuffer_object;
caps_.framebufferDepthBlitSupported = caps_.framebufferBlitSupported;
caps_.depthClampSupported = gl_extensions.ARB_depth_clamp;
if (gl_extensions.IsGLES) {
caps_.clipDistanceSupported = gl_extensions.EXT_clip_cull_distance || gl_extensions.APPLE_clip_distance;
caps_.cullDistanceSupported = gl_extensions.EXT_clip_cull_distance;
} else {
caps_.clipDistanceSupported = gl_extensions.VersionGEThan(3, 0);
caps_.cullDistanceSupported = gl_extensions.ARB_cull_distance;
}

// Interesting potential hack for emulating GL_DEPTH_CLAMP (use a separate varying, force depth in fragment shader):
// This will induce a performance penalty on many architectures though so a blanket enable of this
Expand Down Expand Up @@ -1162,7 +1169,7 @@ bool OpenGLPipeline::LinkShaders() {
std::vector<GLRProgram::Initializer> initialize;
for (int i = 0; i < MAX_TEXTURE_SLOTS; ++i)
initialize.push_back({ &samplerLocs_[i], 0, i });
program_ = render_->CreateProgram(linkShaders, semantics, queries, initialize, false);
program_ = render_->CreateProgram(linkShaders, semantics, queries, initialize, false, false);
return true;
}

Expand Down
2 changes: 2 additions & 0 deletions Common/GPU/Vulkan/VulkanContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -580,6 +580,8 @@ void VulkanContext::ChooseDevice(int physical_device) {
deviceFeatures_.enabled.depthClamp = deviceFeatures_.available.depthClamp;
deviceFeatures_.enabled.depthBounds = deviceFeatures_.available.depthBounds;
deviceFeatures_.enabled.samplerAnisotropy = deviceFeatures_.available.samplerAnisotropy;
deviceFeatures_.enabled.shaderClipDistance = deviceFeatures_.available.shaderClipDistance;
deviceFeatures_.enabled.shaderCullDistance = deviceFeatures_.available.shaderCullDistance;
// For easy wireframe mode, someday.
deviceFeatures_.enabled.fillModeNonSolid = deviceFeatures_.available.fillModeNonSolid;

Expand Down
7 changes: 7 additions & 0 deletions Common/GPU/Vulkan/thin3d_vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -780,6 +780,8 @@ VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit)
caps_.multiViewport = vulkan->GetDeviceFeatures().enabled.multiViewport != 0;
caps_.dualSourceBlend = vulkan->GetDeviceFeatures().enabled.dualSrcBlend != 0;
caps_.depthClampSupported = vulkan->GetDeviceFeatures().enabled.depthClamp != 0;
caps_.clipDistanceSupported = vulkan->GetDeviceFeatures().enabled.shaderClipDistance != 0;
caps_.cullDistanceSupported = vulkan->GetDeviceFeatures().enabled.shaderCullDistance != 0;
caps_.framebufferBlitSupported = true;
caps_.framebufferCopySupported = true;
caps_.framebufferDepthBlitSupported = false; // Can be checked for.
Expand Down Expand Up @@ -816,6 +818,11 @@ VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit)
} else if (caps_.vendor == GPUVendor::VENDOR_INTEL) {
// Workaround for Intel driver bug. TODO: Re-enable after some driver version
bugs_.Infest(Bugs::DUAL_SOURCE_BLENDING_BROKEN);
} else if (caps_.vendor == GPUVendor::VENDOR_ARM) {
// These GPUs (up to some certain hardware version?) have a bug where draws where gl_Position.w == .z
// corrupt the depth buffer. This is easily worked around by simply scaling Z down a tiny bit when this case
// is detected. See: https://github.com/hrydgard/ppsspp/issues/11937
bugs_.Infest(Bugs::EQUAL_WZ_CORRUPTS_DEPTH);
}

caps_.deviceID = deviceProps.deviceID;
Expand Down
3 changes: 3 additions & 0 deletions Common/GPU/thin3d.h
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,7 @@ class Bugs {
BROKEN_NAN_IN_CONDITIONAL = 4,
COLORWRITEMASK_BROKEN_WITH_DEPTHTEST = 5,
BROKEN_FLAT_IN_SHADER = 6,
EQUAL_WZ_CORRUPTS_DEPTH = 7,
};

protected:
Expand Down Expand Up @@ -520,6 +521,8 @@ struct DeviceCaps {
bool dualSourceBlend;
bool logicOpSupported;
bool depthClampSupported;
bool clipDistanceSupported;
bool cullDistanceSupported;
bool framebufferCopySupported;
bool framebufferBlitSupported;
bool framebufferDepthCopySupported;
Expand Down
6 changes: 5 additions & 1 deletion Common/UI/Context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,11 @@ void UIContext::BeginNoTex() {

void UIContext::BeginPipeline(Draw::Pipeline *pipeline, Draw::SamplerState *samplerState) {
_assert_(pipeline != nullptr);
draw_->BindSamplerStates(0, 1, &samplerState);
// Also clear out any other textures bound.
Draw::SamplerState *samplers[3]{ samplerState };
draw_->BindSamplerStates(0, 3, samplers);
Draw::Texture *textures[2]{};
draw_->BindTextures(1, 2, textures);
RebindTexture();
UIBegin(pipeline);
}
Expand Down
40 changes: 8 additions & 32 deletions GPU/Common/ShaderUniforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,29 +43,12 @@ void CalcCullRange(float minValues[4], float maxValues[4], bool flipViewport, bo
float pspViewport = (y - gstate.getViewportYCenter()) * (1.0f / gstate.getViewportYScale());
return (pspViewport * heightScale) - yOffset;
};
auto reverseViewportZ = [hasNegZ](float z) {
float vpZScale = gstate.getViewportZScale();
float vpZCenter = gstate.getViewportZCenter();

float scale, center;
if (gstate_c.Supports(GPU_SUPPORTS_ACCURATE_DEPTH)) {
// These are just the reverse of the formulas in GPUStateUtils.
float halfActualZRange = vpZScale * (1.0f / gstate_c.vpDepthScale);
float minz = -((gstate_c.vpZOffset * halfActualZRange) - vpZCenter) - halfActualZRange;

// In accurate depth mode, we're comparing against a value scaled to (minz, maxz).
// And minz might be very negative, (e.g. if we're clamping in that direction.)
scale = halfActualZRange;
center = minz + halfActualZRange;
} else {
// In old-style depth mode, we're comparing against a value scaled to viewport.
// (and possibly incorrectly clipped against it.)
scale = vpZScale;
center = vpZCenter;
auto transformZ = [hasNegZ](float z) {
// Z culling ignores the viewport, so we just redo the projection matrix adjustments.
if (hasNegZ) {
return (z * gstate_c.vpDepthScale) + gstate_c.vpZOffset;
}

float realViewport = (z - center) * (1.0f / scale);
return hasNegZ ? realViewport : (realViewport * 0.5f + 0.5f);
return (z * gstate_c.vpDepthScale * 0.5f) + gstate_c.vpZOffset * 0.5f + 0.5f;
};
auto sortPair = [](float a, float b) {
return a > b ? std::make_pair(b, a) : std::make_pair(a, b);
Expand All @@ -75,7 +58,7 @@ void CalcCullRange(float minValues[4], float maxValues[4], bool flipViewport, bo
// Any vertex outside this range (unless depth clamp enabled) is discarded.
auto x = sortPair(reverseViewportX(0.0f), reverseViewportX(4096.0f));
auto y = sortPair(reverseViewportY(0.0f), reverseViewportY(4096.0f));
auto z = sortPair(reverseViewportZ(0.0f), reverseViewportZ(65535.5f));
auto z = sortPair(transformZ(-1.000030517578125f), transformZ(1.000030517578125f));
// Since we have space in w, use it to pass the depth clamp flag. We also pass NAN for w "discard".
float clampEnable = gstate.isDepthClampEnabled() ? 1.0f : 0.0f;

Expand Down Expand Up @@ -243,18 +226,11 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView
float viewZScale = halfActualZRange * 2.0f;
// Account for the half pixel offset.
float viewZCenter = minz + (DepthSliceFactor() / 256.0f) * 0.5f;
float viewZInvScale;

if (viewZScale != 0.0) {
viewZInvScale = 1.0f / viewZScale;
} else {
viewZInvScale = 0.0;
}

ub->depthRange[0] = viewZScale;
ub->depthRange[1] = viewZCenter;
ub->depthRange[2] = viewZCenter;
ub->depthRange[3] = viewZInvScale;
ub->depthRange[2] = gstate_c.vpZOffset * 0.5f + 0.5f;
ub->depthRange[3] = 2.0f * (1.0f / gstate_c.vpDepthScale);
}

if (dirtyUniforms & DIRTY_CULLRANGE) {
Expand Down
35 changes: 22 additions & 13 deletions GPU/Common/SoftwareTransformCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,8 @@ static void SwapUVs(TransformedVertex &a, TransformedVertex &b) {

// Note: 0 is BR and 2 is TL.

static void RotateUV(TransformedVertex v[4], float flippedMatrix[16], bool flippedY) {
// Transform these two coordinates to figure out whether they're flipped or not.
Vec4f tl;
Vec3ByMatrix44(tl.AsArray(), v[2].pos, flippedMatrix);

Vec4f br;
Vec3ByMatrix44(br.AsArray(), v[0].pos, flippedMatrix);

static void RotateUV(TransformedVertex v[4], Vec4f tl, Vec4f br, bool flippedY) {
// We use the transformed tl/br coordinates to figure out whether they're flipped or not.
float ySign = flippedY ? -1.0 : 1.0;

const float invtlw = 1.0f / tl.w;
Expand Down Expand Up @@ -438,7 +432,7 @@ void SoftwareTransform::Decode(int prim, u32 vertType, const DecVtxFormat &decVt
// TODO: This bleeds outside the play area in non-buffered mode. Big deal? Probably not.
// TODO: Allow creating a depth clear and a color draw.
bool reallyAClear = false;
if (maxIndex > 1 && prim == GE_PRIM_RECTANGLES && gstate.isModeClear()) {
if (maxIndex > 1 && prim == GE_PRIM_RECTANGLES && gstate.isModeClear() && throughmode) {
int scissorX2 = gstate.getScissorX2() + 1;
int scissorY2 = gstate.getScissorY2() + 1;
reallyAClear = IsReallyAClear(transformed, maxIndex, scissorX2, scissorY2);
Expand All @@ -465,7 +459,7 @@ void SoftwareTransform::Decode(int prim, u32 vertType, const DecVtxFormat &decVt
}

// Detect full screen "clears" that might not be so obvious, to set the safe size if possible.
if (!result->setSafeSize && prim == GE_PRIM_RECTANGLES && maxIndex == 2) {
if (!result->setSafeSize && prim == GE_PRIM_RECTANGLES && maxIndex == 2 && throughmode) {
bool clearingColor = gstate.isModeClear() && (gstate.isClearModeColorMask() || gstate.isClearModeAlphaMask());
bool writingColor = gstate.getColorMask() != 0xFFFFFFFF;
bool startsZeroX = transformed[0].x <= 0.0f && transformed[1].x > 0.0f && transformed[1].x > transformed[0].x;
Expand Down Expand Up @@ -629,10 +623,25 @@ void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertTy
trans[3].u = transVtxTL.u;

// That's the four corners. Now process UV rotation.
if (throughmode)
if (throughmode) {
RotateUVThrough(trans);
else
RotateUV(trans, flippedMatrix, flippedY);
} else {
Vec4f tl;
Vec3ByMatrix44(tl.AsArray(), transVtxTL.pos, flippedMatrix);
Vec4f br;
Vec3ByMatrix44(br.AsArray(), transVtxBR.pos, flippedMatrix);

// If both transformed verts are outside Z, cull this rectangle entirely.
constexpr float outsideValue = 1.000030517578125f;
bool tlOutside = fabsf(tl.z / tl.w) >= outsideValue;
bool brOutside = fabsf(br.z / br.w) >= outsideValue;
if (tlOutside && brOutside)
continue;
if (!gstate.isDepthClampEnabled() && (tlOutside || brOutside))
continue;

RotateUV(trans, tl, br, flippedY);
}

// Triangle: BR-TR-TL
indsOut[0] = i * 2 + 0;
Expand Down
Loading

0 comments on commit 16bf519

Please sign in to comment.