Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement geometry shader for range culling #16142

Merged
merged 12 commits into from
Oct 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1562,6 +1562,8 @@ set(GPU_SOURCES
GPU/Common/FragmentShaderGenerator.h
GPU/Common/VertexShaderGenerator.cpp
GPU/Common/VertexShaderGenerator.h
GPU/Common/GeometryShaderGenerator.cpp
GPU/Common/GeometryShaderGenerator.h
GPU/Common/FramebufferManagerCommon.cpp
GPU/Common/FramebufferManagerCommon.h
GPU/Common/GPUDebugInterface.cpp
Expand Down
2 changes: 0 additions & 2 deletions Common/GPU/Vulkan/VulkanContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -600,8 +600,6 @@ void VulkanContext::ChooseDevice(int physical_device) {
deviceFeatures_.enabled.samplerAnisotropy = deviceFeatures_.available.samplerAnisotropy;
deviceFeatures_.enabled.shaderClipDistance = deviceFeatures_.available.shaderClipDistance;
deviceFeatures_.enabled.shaderCullDistance = deviceFeatures_.available.shaderCullDistance;
// For easy wireframe mode, someday.
deviceFeatures_.enabled.fillModeNonSolid = deviceFeatures_.available.fillModeNonSolid;
deviceFeatures_.enabled.geometryShader = deviceFeatures_.available.geometryShader;

GetDeviceLayerExtensionList(nullptr, device_extension_properties_);
Expand Down
11 changes: 10 additions & 1 deletion Common/GPU/Vulkan/VulkanRenderManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ bool VKRGraphicsPipeline::Create(VulkanContext *vulkan, VkRenderPass compatibleR
// Fill in the last part of the desc since now it's time to block.
VkShaderModule vs = desc->vertexShader->BlockUntilReady();
VkShaderModule fs = desc->fragmentShader->BlockUntilReady();
VkShaderModule gs = desc->geometryShader ? desc->geometryShader->BlockUntilReady() : VK_NULL_HANDLE;

if (!vs || !fs) {
if (!vs || !fs || (!gs && desc->geometryShader)) {
ERROR_LOG(G3D, "Failed creating graphics pipeline - missing shader modules");
// We're kinda screwed here?
return false;
Expand All @@ -49,6 +50,14 @@ bool VKRGraphicsPipeline::Create(VulkanContext *vulkan, VkRenderPass compatibleR
ss[1].pSpecializationInfo = nullptr;
ss[1].module = fs;
ss[1].pName = "main";
if (gs) {
stageCount++;
ss[2].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
ss[2].stage = VK_SHADER_STAGE_GEOMETRY_BIT;
ss[2].pSpecializationInfo = nullptr;
ss[2].module = gs;
ss[2].pName = "main";
}

VkGraphicsPipelineCreateInfo pipe{ VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO };
pipe.pStages = ss;
Expand Down
1 change: 1 addition & 0 deletions Common/GPU/Vulkan/VulkanRenderManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ struct VKRGraphicsPipelineDesc {
// Replaced the ShaderStageInfo with promises here so we can wait for compiles to finish.
Promise<VkShaderModule> *vertexShader = nullptr;
Promise<VkShaderModule> *fragmentShader = nullptr;
Promise<VkShaderModule> *geometryShader = nullptr;

VkPipelineInputAssemblyStateCreateInfo inputAssembly{ VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO };
VkVertexInputAttributeDescription attrs[8]{};
Expand Down
7 changes: 6 additions & 1 deletion Common/GPU/Vulkan/thin3d_vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,9 +177,9 @@ VkShaderStageFlagBits StageToVulkan(ShaderStage stage) {
case ShaderStage::Vertex: return VK_SHADER_STAGE_VERTEX_BIT;
case ShaderStage::Geometry: return VK_SHADER_STAGE_GEOMETRY_BIT;
case ShaderStage::Compute: return VK_SHADER_STAGE_COMPUTE_BIT;
default:
case ShaderStage::Fragment: return VK_SHADER_STAGE_FRAGMENT_BIT;
}
return VK_SHADER_STAGE_FRAGMENT_BIT;
}

// Not registering this as a resource holder, instead the pipeline is registered. It will
Expand Down Expand Up @@ -846,6 +846,11 @@ VKContext::VKContext(VulkanContext *vulkan)
if (majorVersion >= 32) {
bugs_.Infest(Bugs::MALI_CONSTANT_LOAD_BUG); // See issue #15661
}

// Older ARM devices have very slow geometry shaders, not worth using. At least before 15.
if (majorVersion <= 15) {
bugs_.Infest(Bugs::GEOMETRY_SHADERS_SLOW);
}
}

// Limited, through input attachments and self-dependencies.
Expand Down
3 changes: 3 additions & 0 deletions Common/GPU/thin3d.h
Original file line number Diff line number Diff line change
Expand Up @@ -333,11 +333,14 @@ class Bugs {
RASPBERRY_SHADER_COMP_HANG = 8,
MALI_CONSTANT_LOAD_BUG = 9,
SUBPASS_FEEDBACK_BROKEN = 10,
GEOMETRY_SHADERS_SLOW = 11,
MAX_BUG,
};

protected:
uint32_t flags_ = 0;

static_assert(sizeof(flags_) * 8 > MAX_BUG, "Ran out of space for bugs.");
};

class RefCountedObject {
Expand Down
1 change: 1 addition & 0 deletions Core/Config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -874,6 +874,7 @@ static ConfigSetting graphicsSettings[] = {
#endif
ConfigSetting("CameraDevice", &g_Config.sCameraDevice, "", true, false),
ConfigSetting("VendorBugChecksEnabled", &g_Config.bVendorBugChecksEnabled, true, false, false),
ConfigSetting("UseGeometryShader", &g_Config.bUseGeometryShader, true, true, true),
ReportedConfigSetting("RenderingMode", &g_Config.iRenderingMode, 1, true, true),
ConfigSetting("SoftwareRenderer", &g_Config.bSoftwareRendering, false, true, true),
ConfigSetting("SoftwareRendererJit", &g_Config.bSoftwareRenderingJit, true, true, true),
Expand Down
1 change: 1 addition & 0 deletions Core/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ struct Config {
bool bHardwareTransform; // only used in the GLES backend
bool bSoftwareSkinning; // may speed up some games
bool bVendorBugChecksEnabled;
bool bUseGeometryShader;

int iRenderingMode; // 0 = non-buffered rendering 1 = buffered rendering
int iTexFiltering; // 1 = auto , 2 = nearest , 3 = linear , 4 = auto max quality
Expand Down
134 changes: 134 additions & 0 deletions GPU/Common/GeometryShaderGenerator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
// Copyright (c) 2012- PPSSPP Project.

// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.

// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/

// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.

#include <cstdio>
#include <cstdlib>
#include <locale.h>

#include "Common/StringUtils.h"
#include "Common/GPU/OpenGL/GLFeatures.h"
#include "Common/GPU/ShaderWriter.h"
#include "Common/GPU/thin3d.h"
#include "Core/Config.h"
#include "GPU/ge_constants.h"
#include "GPU/GPUState.h"
#include "GPU/Common/ShaderId.h"
#include "GPU/Common/ShaderUniforms.h"
#include "GPU/Common/GeometryShaderGenerator.h"

#undef WRITE

#define WRITE(p, ...) p.F(__VA_ARGS__)

// TODO: Could support VK_NV_geometry_shader_passthrough, though the hardware that supports
// it is already pretty fast at geometry shaders..


bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLanguageDesc &compat, const Draw::Bugs bugs, std::string *errorString) {
std::vector<const char*> gl_exts;
if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) {
if (gl_extensions.EXT_gpu_shader4) {
gl_exts.push_back("#extension GL_EXT_gpu_shader4 : enable");
}
}

ShaderWriter p(buffer, compat, ShaderStage::Geometry, gl_exts.data(), gl_exts.size());
p.C("layout(triangles) in;\n");
p.C("layout(triangle_strip, max_vertices = 3) out;\n");

if (compat.shaderLanguage == GLSL_VULKAN) {
WRITE(p, "\n");
WRITE(p, "layout (std140, set = 0, binding = 3) uniform baseVars {\n%s};\n", ub_baseStr);
} else if (compat.shaderLanguage == HLSL_D3D11) {
WRITE(p, "cbuffer base : register(b0) {\n%s};\n", ub_baseStr);
}

std::vector<VaryingDef> varyings, outVaryings;

if (id.Bit(GS_BIT_DO_TEXTURE)) {
varyings.push_back(VaryingDef{ "vec3", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" });
outVaryings.push_back(VaryingDef{ "vec3", "v_texcoordOut", Draw::SEM_TEXCOORD0, 0, "highp" });
}
varyings.push_back(VaryingDef{ "vec4", "v_color0", Draw::SEM_COLOR0, 1, "lowp" });
outVaryings.push_back(VaryingDef{ "vec4", "v_color0Out", Draw::SEM_COLOR0, 1, "lowp" });
if (id.Bit(GS_BIT_LMODE)) {
varyings.push_back(VaryingDef{ "vec3", "v_color1", Draw::SEM_COLOR1, 2, "lowp" });
outVaryings.push_back(VaryingDef{ "vec3", "v_color1Out", Draw::SEM_COLOR1, 2, "lowp" });
}
varyings.push_back(VaryingDef{ "float", "v_fogdepth", Draw::SEM_TEXCOORD1, 3, "highp" });
outVaryings.push_back(VaryingDef{ "float", "v_fogdepthOut", Draw::SEM_TEXCOORD1, 3, "highp" });

p.BeginGSMain(varyings, outVaryings);

// Apply culling
p.C(" bool anyInside = false;\n");

p.C(" for (int i = 0; i < 3; i++) {\n"); // TODO: 3 or gl_in.length()? which will be faster?
p.C(" vec4 outPos = gl_in[i].gl_Position;\n");
p.C(" vec3 projPos = outPos.xyz / outPos.w;\n");
p.C(" float projZ = (projPos.z - u_depthRange.z) * u_depthRange.w;\n");
// Vertex range culling doesn't happen when Z clips, note sign of w is important.
p.C(" if (u_cullRangeMin.w <= 0.0 || projZ * outPos.w > -outPos.w) {\n");
const char *outMin = "projPos.x < u_cullRangeMin.x || projPos.y < u_cullRangeMin.y";
const char *outMax = "projPos.x > u_cullRangeMax.x || projPos.y > u_cullRangeMax.y";
p.F(" if ((%s) || (%s)) {\n", outMin, outMax);
p.C(" return;\n"); // Cull!
p.C(" }\n");
p.C(" }\n");
p.C(" if (u_cullRangeMin.w <= 0.0) {\n");
p.C(" if (projPos.z < u_cullRangeMin.z || projPos.z > u_cullRangeMax.z) {\n");
// When not clamping depth, cull the triangle of Z is outside the valid range (not based on clip Z.)
p.C(" return;\n");
p.C(" }\n");
p.C(" } else {\n");
p.C(" if (projPos.z >= u_cullRangeMin.z) { anyInside = true; }\n");
p.C(" if (projPos.z <= u_cullRangeMax.z) { anyInside = true; }\n");
p.C(" }\n");
p.C(" } // for\n");

// Cull any triangle fully outside in the same direction when depth clamp enabled.
// Basically simulate cull distances.
p.C(" if (u_cullRangeMin.w > 0.0 && !anyInside) {\n");
p.C(" return;\n");
p.C(" }\n");

const char *clip0 = compat.shaderLanguage == HLSL_D3D11 ? "" : "[0]";

p.C(" for (int i = 0; i < 3; i++) {\n"); // TODO: 3 or gl_in.length()? which will be faster?
p.C(" vec4 outPos = gl_in[i].gl_Position;\n");
p.C(" gl_Position = outPos;\n");
// TODO: Not rectangles...
if (gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) {
p.C(" vec3 projPos = outPos.xyz / outPos.w;\n");
p.C(" float projZ = (projPos.z - u_depthRange.z) * u_depthRange.w;\n");
p.F(" gl_ClipDistance%s = projZ * outPos.w + outPos.w;\n", clip0);
}

for (size_t i = 0; i < varyings.size(); i++) {
VaryingDef &in = varyings[i];
VaryingDef &out = outVaryings[i];
p.F(" %s = %s[i];\n", outVaryings[i].name, varyings[i].name);
}
// Debug - null the red channel
//p.C(" if (i == 0) v_color0Out.x = 0.0;\n");
p.C(" EmitVertex();\n");
p.C(" }\n");

p.EndGSMain();

return true;
}
5 changes: 5 additions & 0 deletions GPU/Common/GeometryShaderGenerator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#pragma once

#include "GPU/Common/ShaderId.h"

bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLanguageDesc &compat, const Draw::Bugs bugs, std::string *errorString);
3 changes: 2 additions & 1 deletion GPU/Common/ShaderCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,11 @@ enum : uint64_t {
DIRTY_VIEWPORTSCISSOR_STATE = 1ULL << 46,
DIRTY_VERTEXSHADER_STATE = 1ULL << 47,
DIRTY_FRAGMENTSHADER_STATE = 1ULL << 48,
DIRTY_GEOMETRYSHADER_STATE = 1ULL << 49,

// Everything that's not uniforms. Use this after using thin3d.
// TODO: Should we also add DIRTY_FRAMEBUF here? It kinda generally takes care of itself.
DIRTY_ALL_RENDER_STATE = DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS,
DIRTY_ALL_RENDER_STATE = DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS,

// Note that the top 8 bits (54-63) cannot be dirtied through the commonCommandTable due to packing of other flags.

Expand Down
39 changes: 39 additions & 0 deletions GPU/Common/ShaderId.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -366,3 +366,42 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip

*id_out = id;
}

std::string GeometryShaderDesc(const GShaderID &id) {
std::stringstream desc;
desc << StringFromFormat("%08x:%08x ", id.d[1], id.d[0]);
if (id.Bit(GS_BIT_ENABLED)) desc << "ENABLED ";
if (id.Bit(GS_BIT_DO_TEXTURE)) desc << "TEX ";
if (id.Bit(GS_BIT_LMODE)) desc << "LMODE ";
return desc.str();
}

void ComputeGeometryShaderID(GShaderID *id_out, const Draw::Bugs &bugs, int prim) {
GShaderID id;

bool vertexRangeCulling =
!gstate.isModeThrough() && gstate_c.submitType == SubmitType::DRAW; // neither hw nor sw spline/bezier. See #11692

// If we're not using GS culling, return a zero ID.
// Also, only use this for triangle primitives.
if (!vertexRangeCulling || !gstate_c.Supports(GPU_SUPPORTS_GS_CULLING) || (prim != GE_PRIM_TRIANGLES && prim != GE_PRIM_TRIANGLE_FAN && prim != GE_PRIM_TRIANGLE_STRIP)) {
*id_out = id;
return;
}

id.SetBit(GS_BIT_ENABLED, true);

if (gstate.isModeClear()) {
// No attribute bits.
} else {
bool isModeThrough = gstate.isModeThrough();
bool lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled() && !isModeThrough;

id.SetBit(GS_BIT_LMODE, lmode);
if (gstate.isTextureMapEnabled()) {
id.SetBit(GS_BIT_DO_TEXTURE);
}
}

*id_out = id;
}
39 changes: 39 additions & 0 deletions GPU/Common/ShaderId.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,17 @@ static inline FShaderBit operator +(FShaderBit bit, int i) {
return FShaderBit((int)bit + i);
}

// Some of these bits are straight from FShaderBit, since they essentially enable attributes directly.
enum GShaderBit : uint8_t {
GS_BIT_ENABLED = 0, // If not set, we don't use a geo shader.
GS_BIT_DO_TEXTURE = 1, // presence of texcoords
GS_BIT_LMODE = 2, // presence of specular color (regular color always present)
};

static inline GShaderBit operator +(GShaderBit bit, int i) {
return GShaderBit((int)bit + i);
}

struct ShaderID {
ShaderID() {
clear();
Expand Down Expand Up @@ -232,6 +243,31 @@ struct FShaderID : ShaderID {
}
};

struct GShaderID : ShaderID {
GShaderID() : ShaderID() {
}

explicit GShaderID(ShaderID &src) {
memcpy(d, src.d, sizeof(d));
}

bool Bit(GShaderBit bit) const {
return ShaderID::Bit((int)bit);
}

int Bits(GShaderBit bit, int count) const {
return ShaderID::Bits((int)bit, count);
}

void SetBit(GShaderBit bit, bool value = true) {
ShaderID::SetBit((int)bit, value);
}

void SetBits(GShaderBit bit, int count, int value) {
ShaderID::SetBits((int)bit, count, value);
}
};

namespace Draw {
class Bugs;
}
Expand All @@ -244,3 +280,6 @@ std::string VertexShaderDesc(const VShaderID &id);
struct ComputedPipelineState;
void ComputeFragmentShaderID(FShaderID *id, const ComputedPipelineState &pipelineState, const Draw::Bugs &bugs);
std::string FragmentShaderDesc(const FShaderID &id);

void ComputeGeometryShaderID(GShaderID *id, const Draw::Bugs &bugs, int prim);
std::string GeometryShaderDesc(const GShaderID &id);
2 changes: 1 addition & 1 deletion GPU/Common/VertexShaderGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1299,7 +1299,7 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
WRITE(p, " if (u_cullRangeMin.w <= 0.0 || projZ * outPos.w > -outPos.w) {\n");
const char *outMin = "projPos.x < u_cullRangeMin.x || projPos.y < u_cullRangeMin.y";
const char *outMax = "projPos.x > u_cullRangeMax.x || projPos.y > u_cullRangeMax.y";
WRITE(p, " if (%s || %s) {\n", outMin, outMax);
WRITE(p, " if ((%s) || (%s)) {\n", outMin, outMax);
WRITE(p, " outPos.xyzw = u_cullRangeMax.wwww;\n");
WRITE(p, " }\n");
WRITE(p, " }\n");
Expand Down
2 changes: 2 additions & 0 deletions GPU/GPU.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@
<ClInclude Include="..\ext\xbrz\xbrz.h" />
<ClInclude Include="Common\TextureShaderCommon.h" />
<ClInclude Include="Common\Draw2D.h" />
<ClInclude Include="Common\GeometryShaderGenerator.h" />
<ClInclude Include="Common\ReinterpretFramebuffer.h" />
<ClInclude Include="Common\DepalettizeShaderCommon.h" />
<ClInclude Include="Common\DrawEngineCommon.h" />
Expand Down Expand Up @@ -455,6 +456,7 @@
<ClCompile Include="..\ext\xbrz\xbrz.cpp" />
<ClCompile Include="Common\TextureShaderCommon.cpp" />
<ClCompile Include="Common\Draw2D.cpp" />
<ClCompile Include="Common\GeometryShaderGenerator.cpp" />
<ClCompile Include="Common\ReinterpretFramebuffer.cpp" />
<ClCompile Include="Common\DepalettizeShaderCommon.cpp" />
<ClCompile Include="Common\DrawEngineCommon.cpp" />
Expand Down
Loading