Skip to content

Commit

Permalink
Merge pull request #16142 from unknownbrackets/geo-shader
Browse files Browse the repository at this point in the history
Implement geometry shader for range culling
  • Loading branch information
hrydgard authored Oct 2, 2022
2 parents 33f24ef + 4df7a8f commit bc1975b
Show file tree
Hide file tree
Showing 33 changed files with 585 additions and 53 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1562,6 +1562,8 @@ set(GPU_SOURCES
GPU/Common/FragmentShaderGenerator.h
GPU/Common/VertexShaderGenerator.cpp
GPU/Common/VertexShaderGenerator.h
GPU/Common/GeometryShaderGenerator.cpp
GPU/Common/GeometryShaderGenerator.h
GPU/Common/FramebufferManagerCommon.cpp
GPU/Common/FramebufferManagerCommon.h
GPU/Common/GPUDebugInterface.cpp
Expand Down
2 changes: 0 additions & 2 deletions Common/GPU/Vulkan/VulkanContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -600,8 +600,6 @@ void VulkanContext::ChooseDevice(int physical_device) {
deviceFeatures_.enabled.samplerAnisotropy = deviceFeatures_.available.samplerAnisotropy;
deviceFeatures_.enabled.shaderClipDistance = deviceFeatures_.available.shaderClipDistance;
deviceFeatures_.enabled.shaderCullDistance = deviceFeatures_.available.shaderCullDistance;
// For easy wireframe mode, someday.
deviceFeatures_.enabled.fillModeNonSolid = deviceFeatures_.available.fillModeNonSolid;
deviceFeatures_.enabled.geometryShader = deviceFeatures_.available.geometryShader;

GetDeviceLayerExtensionList(nullptr, device_extension_properties_);
Expand Down
11 changes: 10 additions & 1 deletion Common/GPU/Vulkan/VulkanRenderManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ bool VKRGraphicsPipeline::Create(VulkanContext *vulkan, VkRenderPass compatibleR
// Fill in the last part of the desc since now it's time to block.
VkShaderModule vs = desc->vertexShader->BlockUntilReady();
VkShaderModule fs = desc->fragmentShader->BlockUntilReady();
VkShaderModule gs = desc->geometryShader ? desc->geometryShader->BlockUntilReady() : VK_NULL_HANDLE;

if (!vs || !fs) {
if (!vs || !fs || (!gs && desc->geometryShader)) {
ERROR_LOG(G3D, "Failed creating graphics pipeline - missing shader modules");
// We're kinda screwed here?
return false;
Expand All @@ -49,6 +50,14 @@ bool VKRGraphicsPipeline::Create(VulkanContext *vulkan, VkRenderPass compatibleR
ss[1].pSpecializationInfo = nullptr;
ss[1].module = fs;
ss[1].pName = "main";
if (gs) {
stageCount++;
ss[2].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
ss[2].stage = VK_SHADER_STAGE_GEOMETRY_BIT;
ss[2].pSpecializationInfo = nullptr;
ss[2].module = gs;
ss[2].pName = "main";
}

VkGraphicsPipelineCreateInfo pipe{ VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO };
pipe.pStages = ss;
Expand Down
1 change: 1 addition & 0 deletions Common/GPU/Vulkan/VulkanRenderManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ struct VKRGraphicsPipelineDesc {
// Replaced the ShaderStageInfo with promises here so we can wait for compiles to finish.
Promise<VkShaderModule> *vertexShader = nullptr;
Promise<VkShaderModule> *fragmentShader = nullptr;
Promise<VkShaderModule> *geometryShader = nullptr;

VkPipelineInputAssemblyStateCreateInfo inputAssembly{ VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO };
VkVertexInputAttributeDescription attrs[8]{};
Expand Down
7 changes: 6 additions & 1 deletion Common/GPU/Vulkan/thin3d_vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,9 +177,9 @@ VkShaderStageFlagBits StageToVulkan(ShaderStage stage) {
case ShaderStage::Vertex: return VK_SHADER_STAGE_VERTEX_BIT;
case ShaderStage::Geometry: return VK_SHADER_STAGE_GEOMETRY_BIT;
case ShaderStage::Compute: return VK_SHADER_STAGE_COMPUTE_BIT;
default:
case ShaderStage::Fragment: return VK_SHADER_STAGE_FRAGMENT_BIT;
}
return VK_SHADER_STAGE_FRAGMENT_BIT;
}

// Not registering this as a resource holder, instead the pipeline is registered. It will
Expand Down Expand Up @@ -846,6 +846,11 @@ VKContext::VKContext(VulkanContext *vulkan)
if (majorVersion >= 32) {
bugs_.Infest(Bugs::MALI_CONSTANT_LOAD_BUG); // See issue #15661
}

// Older ARM devices have very slow geometry shaders, not worth using. At least before 15.
if (majorVersion <= 15) {
bugs_.Infest(Bugs::GEOMETRY_SHADERS_SLOW);
}
}

// Limited, through input attachments and self-dependencies.
Expand Down
3 changes: 3 additions & 0 deletions Common/GPU/thin3d.h
Original file line number Diff line number Diff line change
Expand Up @@ -333,11 +333,14 @@ class Bugs {
RASPBERRY_SHADER_COMP_HANG = 8,
MALI_CONSTANT_LOAD_BUG = 9,
SUBPASS_FEEDBACK_BROKEN = 10,
GEOMETRY_SHADERS_SLOW = 11,
MAX_BUG,
};

protected:
uint32_t flags_ = 0;

static_assert(sizeof(flags_) * 8 > MAX_BUG, "Ran out of space for bugs.");
};

class RefCountedObject {
Expand Down
1 change: 1 addition & 0 deletions Core/Config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -874,6 +874,7 @@ static ConfigSetting graphicsSettings[] = {
#endif
ConfigSetting("CameraDevice", &g_Config.sCameraDevice, "", true, false),
ConfigSetting("VendorBugChecksEnabled", &g_Config.bVendorBugChecksEnabled, true, false, false),
ConfigSetting("UseGeometryShader", &g_Config.bUseGeometryShader, true, true, true),
ReportedConfigSetting("RenderingMode", &g_Config.iRenderingMode, 1, true, true),
ConfigSetting("SoftwareRenderer", &g_Config.bSoftwareRendering, false, true, true),
ConfigSetting("SoftwareRendererJit", &g_Config.bSoftwareRenderingJit, true, true, true),
Expand Down
1 change: 1 addition & 0 deletions Core/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ struct Config {
bool bHardwareTransform; // only used in the GLES backend
bool bSoftwareSkinning; // may speed up some games
bool bVendorBugChecksEnabled;
bool bUseGeometryShader;

int iRenderingMode; // 0 = non-buffered rendering 1 = buffered rendering
int iTexFiltering; // 1 = auto , 2 = nearest , 3 = linear , 4 = auto max quality
Expand Down
134 changes: 134 additions & 0 deletions GPU/Common/GeometryShaderGenerator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
// Copyright (c) 2012- PPSSPP Project.

// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.

// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/

// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.

#include <cstdio>
#include <cstdlib>
#include <locale.h>

#include "Common/StringUtils.h"
#include "Common/GPU/OpenGL/GLFeatures.h"
#include "Common/GPU/ShaderWriter.h"
#include "Common/GPU/thin3d.h"
#include "Core/Config.h"
#include "GPU/ge_constants.h"
#include "GPU/GPUState.h"
#include "GPU/Common/ShaderId.h"
#include "GPU/Common/ShaderUniforms.h"
#include "GPU/Common/GeometryShaderGenerator.h"

#undef WRITE

#define WRITE(p, ...) p.F(__VA_ARGS__)

// TODO: Could support VK_NV_geometry_shader_passthrough, though the hardware that supports
// it is already pretty fast at geometry shaders..


bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLanguageDesc &compat, const Draw::Bugs bugs, std::string *errorString) {
std::vector<const char*> gl_exts;
if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) {
if (gl_extensions.EXT_gpu_shader4) {
gl_exts.push_back("#extension GL_EXT_gpu_shader4 : enable");
}
}

ShaderWriter p(buffer, compat, ShaderStage::Geometry, gl_exts.data(), gl_exts.size());
p.C("layout(triangles) in;\n");
p.C("layout(triangle_strip, max_vertices = 3) out;\n");

if (compat.shaderLanguage == GLSL_VULKAN) {
WRITE(p, "\n");
WRITE(p, "layout (std140, set = 0, binding = 3) uniform baseVars {\n%s};\n", ub_baseStr);
} else if (compat.shaderLanguage == HLSL_D3D11) {
WRITE(p, "cbuffer base : register(b0) {\n%s};\n", ub_baseStr);
}

std::vector<VaryingDef> varyings, outVaryings;

if (id.Bit(GS_BIT_DO_TEXTURE)) {
varyings.push_back(VaryingDef{ "vec3", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" });
outVaryings.push_back(VaryingDef{ "vec3", "v_texcoordOut", Draw::SEM_TEXCOORD0, 0, "highp" });
}
varyings.push_back(VaryingDef{ "vec4", "v_color0", Draw::SEM_COLOR0, 1, "lowp" });
outVaryings.push_back(VaryingDef{ "vec4", "v_color0Out", Draw::SEM_COLOR0, 1, "lowp" });
if (id.Bit(GS_BIT_LMODE)) {
varyings.push_back(VaryingDef{ "vec3", "v_color1", Draw::SEM_COLOR1, 2, "lowp" });
outVaryings.push_back(VaryingDef{ "vec3", "v_color1Out", Draw::SEM_COLOR1, 2, "lowp" });
}
varyings.push_back(VaryingDef{ "float", "v_fogdepth", Draw::SEM_TEXCOORD1, 3, "highp" });
outVaryings.push_back(VaryingDef{ "float", "v_fogdepthOut", Draw::SEM_TEXCOORD1, 3, "highp" });

p.BeginGSMain(varyings, outVaryings);

// Apply culling
p.C(" bool anyInside = false;\n");

p.C(" for (int i = 0; i < 3; i++) {\n"); // TODO: 3 or gl_in.length()? which will be faster?
p.C(" vec4 outPos = gl_in[i].gl_Position;\n");
p.C(" vec3 projPos = outPos.xyz / outPos.w;\n");
p.C(" float projZ = (projPos.z - u_depthRange.z) * u_depthRange.w;\n");
// Vertex range culling doesn't happen when Z clips, note sign of w is important.
p.C(" if (u_cullRangeMin.w <= 0.0 || projZ * outPos.w > -outPos.w) {\n");
const char *outMin = "projPos.x < u_cullRangeMin.x || projPos.y < u_cullRangeMin.y";
const char *outMax = "projPos.x > u_cullRangeMax.x || projPos.y > u_cullRangeMax.y";
p.F(" if ((%s) || (%s)) {\n", outMin, outMax);
p.C(" return;\n"); // Cull!
p.C(" }\n");
p.C(" }\n");
p.C(" if (u_cullRangeMin.w <= 0.0) {\n");
p.C(" if (projPos.z < u_cullRangeMin.z || projPos.z > u_cullRangeMax.z) {\n");
// When not clamping depth, cull the triangle of Z is outside the valid range (not based on clip Z.)
p.C(" return;\n");
p.C(" }\n");
p.C(" } else {\n");
p.C(" if (projPos.z >= u_cullRangeMin.z) { anyInside = true; }\n");
p.C(" if (projPos.z <= u_cullRangeMax.z) { anyInside = true; }\n");
p.C(" }\n");
p.C(" } // for\n");

// Cull any triangle fully outside in the same direction when depth clamp enabled.
// Basically simulate cull distances.
p.C(" if (u_cullRangeMin.w > 0.0 && !anyInside) {\n");
p.C(" return;\n");
p.C(" }\n");

const char *clip0 = compat.shaderLanguage == HLSL_D3D11 ? "" : "[0]";

p.C(" for (int i = 0; i < 3; i++) {\n"); // TODO: 3 or gl_in.length()? which will be faster?
p.C(" vec4 outPos = gl_in[i].gl_Position;\n");
p.C(" gl_Position = outPos;\n");
// TODO: Not rectangles...
if (gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) {
p.C(" vec3 projPos = outPos.xyz / outPos.w;\n");
p.C(" float projZ = (projPos.z - u_depthRange.z) * u_depthRange.w;\n");
p.F(" gl_ClipDistance%s = projZ * outPos.w + outPos.w;\n", clip0);
}

for (size_t i = 0; i < varyings.size(); i++) {
VaryingDef &in = varyings[i];
VaryingDef &out = outVaryings[i];
p.F(" %s = %s[i];\n", outVaryings[i].name, varyings[i].name);
}
// Debug - null the red channel
//p.C(" if (i == 0) v_color0Out.x = 0.0;\n");
p.C(" EmitVertex();\n");
p.C(" }\n");

p.EndGSMain();

return true;
}
5 changes: 5 additions & 0 deletions GPU/Common/GeometryShaderGenerator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#pragma once

#include "GPU/Common/ShaderId.h"

bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLanguageDesc &compat, const Draw::Bugs bugs, std::string *errorString);
3 changes: 2 additions & 1 deletion GPU/Common/ShaderCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,11 @@ enum : uint64_t {
DIRTY_VIEWPORTSCISSOR_STATE = 1ULL << 46,
DIRTY_VERTEXSHADER_STATE = 1ULL << 47,
DIRTY_FRAGMENTSHADER_STATE = 1ULL << 48,
DIRTY_GEOMETRYSHADER_STATE = 1ULL << 49,

// Everything that's not uniforms. Use this after using thin3d.
// TODO: Should we also add DIRTY_FRAMEBUF here? It kinda generally takes care of itself.
DIRTY_ALL_RENDER_STATE = DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS,
DIRTY_ALL_RENDER_STATE = DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS,

// Note that the top 8 bits (54-63) cannot be dirtied through the commonCommandTable due to packing of other flags.

Expand Down
39 changes: 39 additions & 0 deletions GPU/Common/ShaderId.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -366,3 +366,42 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip

*id_out = id;
}

std::string GeometryShaderDesc(const GShaderID &id) {
std::stringstream desc;
desc << StringFromFormat("%08x:%08x ", id.d[1], id.d[0]);
if (id.Bit(GS_BIT_ENABLED)) desc << "ENABLED ";
if (id.Bit(GS_BIT_DO_TEXTURE)) desc << "TEX ";
if (id.Bit(GS_BIT_LMODE)) desc << "LMODE ";
return desc.str();
}

void ComputeGeometryShaderID(GShaderID *id_out, const Draw::Bugs &bugs, int prim) {
GShaderID id;

bool vertexRangeCulling =
!gstate.isModeThrough() && gstate_c.submitType == SubmitType::DRAW; // neither hw nor sw spline/bezier. See #11692

// If we're not using GS culling, return a zero ID.
// Also, only use this for triangle primitives.
if (!vertexRangeCulling || !gstate_c.Supports(GPU_SUPPORTS_GS_CULLING) || (prim != GE_PRIM_TRIANGLES && prim != GE_PRIM_TRIANGLE_FAN && prim != GE_PRIM_TRIANGLE_STRIP)) {
*id_out = id;
return;
}

id.SetBit(GS_BIT_ENABLED, true);

if (gstate.isModeClear()) {
// No attribute bits.
} else {
bool isModeThrough = gstate.isModeThrough();
bool lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled() && !isModeThrough;

id.SetBit(GS_BIT_LMODE, lmode);
if (gstate.isTextureMapEnabled()) {
id.SetBit(GS_BIT_DO_TEXTURE);
}
}

*id_out = id;
}
39 changes: 39 additions & 0 deletions GPU/Common/ShaderId.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,17 @@ static inline FShaderBit operator +(FShaderBit bit, int i) {
return FShaderBit((int)bit + i);
}

// Some of these bits are straight from FShaderBit, since they essentially enable attributes directly.
enum GShaderBit : uint8_t {
GS_BIT_ENABLED = 0, // If not set, we don't use a geo shader.
GS_BIT_DO_TEXTURE = 1, // presence of texcoords
GS_BIT_LMODE = 2, // presence of specular color (regular color always present)
};

static inline GShaderBit operator +(GShaderBit bit, int i) {
return GShaderBit((int)bit + i);
}

struct ShaderID {
ShaderID() {
clear();
Expand Down Expand Up @@ -232,6 +243,31 @@ struct FShaderID : ShaderID {
}
};

struct GShaderID : ShaderID {
GShaderID() : ShaderID() {
}

explicit GShaderID(ShaderID &src) {
memcpy(d, src.d, sizeof(d));
}

bool Bit(GShaderBit bit) const {
return ShaderID::Bit((int)bit);
}

int Bits(GShaderBit bit, int count) const {
return ShaderID::Bits((int)bit, count);
}

void SetBit(GShaderBit bit, bool value = true) {
ShaderID::SetBit((int)bit, value);
}

void SetBits(GShaderBit bit, int count, int value) {
ShaderID::SetBits((int)bit, count, value);
}
};

namespace Draw {
class Bugs;
}
Expand All @@ -244,3 +280,6 @@ std::string VertexShaderDesc(const VShaderID &id);
struct ComputedPipelineState;
void ComputeFragmentShaderID(FShaderID *id, const ComputedPipelineState &pipelineState, const Draw::Bugs &bugs);
std::string FragmentShaderDesc(const FShaderID &id);

void ComputeGeometryShaderID(GShaderID *id, const Draw::Bugs &bugs, int prim);
std::string GeometryShaderDesc(const GShaderID &id);
2 changes: 1 addition & 1 deletion GPU/Common/VertexShaderGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1299,7 +1299,7 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
WRITE(p, " if (u_cullRangeMin.w <= 0.0 || projZ * outPos.w > -outPos.w) {\n");
const char *outMin = "projPos.x < u_cullRangeMin.x || projPos.y < u_cullRangeMin.y";
const char *outMax = "projPos.x > u_cullRangeMax.x || projPos.y > u_cullRangeMax.y";
WRITE(p, " if (%s || %s) {\n", outMin, outMax);
WRITE(p, " if ((%s) || (%s)) {\n", outMin, outMax);
WRITE(p, " outPos.xyzw = u_cullRangeMax.wwww;\n");
WRITE(p, " }\n");
WRITE(p, " }\n");
Expand Down
2 changes: 2 additions & 0 deletions GPU/GPU.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@
<ClInclude Include="..\ext\xbrz\xbrz.h" />
<ClInclude Include="Common\TextureShaderCommon.h" />
<ClInclude Include="Common\Draw2D.h" />
<ClInclude Include="Common\GeometryShaderGenerator.h" />
<ClInclude Include="Common\ReinterpretFramebuffer.h" />
<ClInclude Include="Common\DepalettizeShaderCommon.h" />
<ClInclude Include="Common\DrawEngineCommon.h" />
Expand Down Expand Up @@ -455,6 +456,7 @@
<ClCompile Include="..\ext\xbrz\xbrz.cpp" />
<ClCompile Include="Common\TextureShaderCommon.cpp" />
<ClCompile Include="Common\Draw2D.cpp" />
<ClCompile Include="Common\GeometryShaderGenerator.cpp" />
<ClCompile Include="Common\ReinterpretFramebuffer.cpp" />
<ClCompile Include="Common\DepalettizeShaderCommon.cpp" />
<ClCompile Include="Common\DrawEngineCommon.cpp" />
Expand Down
Loading

0 comments on commit bc1975b

Please sign in to comment.