From 878a049f602d95fb287bd00586481b37f5c4cb3c Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 1 Oct 2022 19:22:16 -0700 Subject: [PATCH 01/12] GPU: Add dirtying for geo shader state. Not yet used, but dirtied at the right times. --- GPU/Common/ShaderCommon.h | 3 ++- GPU/GPUCommon.cpp | 20 ++++++++++---------- GPU/GPUCommon.h | 4 ++-- GPU/Vulkan/DrawEngineVulkan.cpp | 4 ++-- GPU/Vulkan/ShaderManagerVulkan.cpp | 8 ++++---- 5 files changed, 20 insertions(+), 19 deletions(-) diff --git a/GPU/Common/ShaderCommon.h b/GPU/Common/ShaderCommon.h index d6dfbbc3f81a..63e31160145d 100644 --- a/GPU/Common/ShaderCommon.h +++ b/GPU/Common/ShaderCommon.h @@ -109,10 +109,11 @@ enum : uint64_t { DIRTY_VIEWPORTSCISSOR_STATE = 1ULL << 46, DIRTY_VERTEXSHADER_STATE = 1ULL << 47, DIRTY_FRAGMENTSHADER_STATE = 1ULL << 48, + DIRTY_GEOMETRYSHADER_STATE = 1ULL << 49, // Everything that's not uniforms. Use this after using thin3d. // TODO: Should we also add DIRTY_FRAMEBUF here? It kinda generally takes care of itself. - DIRTY_ALL_RENDER_STATE = DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS, + DIRTY_ALL_RENDER_STATE = DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS, // Note that the top 8 bits (54-63) cannot be dirtied through the commonCommandTable due to packing of other flags. diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 2ebdf3ab0587..a44fb47572c4 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -87,8 +87,8 @@ const CommonCommandTableEntry commonCommandTable[] = { { GE_CMD_FOG2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FOGCOEF }, // These affect the fragment shader so need flushing. - { GE_CMD_CLEARMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE }, - { GE_CMD_TEXTUREMAPENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE }, + { GE_CMD_CLEARMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE }, + { GE_CMD_TEXTUREMAPENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE }, { GE_CMD_FOGENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE }, { GE_CMD_TEXMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS | DIRTY_FRAGMENTSHADER_STATE }, { GE_CMD_TEXSHADELS, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE }, @@ -102,7 +102,7 @@ const CommonCommandTableEntry commonCommandTable[] = { // These change the vertex shader so need flushing. { GE_CMD_REVERSENORMAL, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE }, - { GE_CMD_LIGHTINGENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE }, + { GE_CMD_LIGHTINGENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE }, { GE_CMD_LIGHTENABLE0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE }, { GE_CMD_LIGHTENABLE1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE }, { GE_CMD_LIGHTENABLE2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE }, @@ -114,7 +114,7 @@ const CommonCommandTableEntry commonCommandTable[] = { { GE_CMD_MATERIALUPDATE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE }, // These change both shaders so need flushing. - { GE_CMD_LIGHTMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE }, + { GE_CMD_LIGHTMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE }, { GE_CMD_TEXFILTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS }, { GE_CMD_TEXWRAP, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS | DIRTY_FRAGMENTSHADER_STATE }, @@ -1683,7 +1683,7 @@ void GPUCommon::Execute_VertexType(u32 op, u32 diff) { if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK)) { gstate_c.Dirty(DIRTY_UVSCALEOFFSET); if (diff & GE_VTYPE_THROUGH_MASK) - gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_CULLRANGE); + gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_CULLRANGE); } } @@ -1710,7 +1710,7 @@ void GPUCommon::Execute_VertexTypeSkinning(u32 op, u32 diff) { gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE); } if (diff & GE_VTYPE_THROUGH_MASK) - gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_CULLRANGE); + gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_CULLRANGE); } void GPUCommon::CheckDepthUsage(VirtualFramebuffer *vfb) { @@ -2044,8 +2044,8 @@ void GPUCommon::Execute_Bezier(u32 op, u32 diff) { SetDrawType(DRAW_BEZIER, PatchPrimToPrim(surface.primType)); + gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE); if (drawEngineCommon_->CanUseHardwareTessellation(surface.primType)) { - gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE); gstate_c.submitType = SubmitType::HW_BEZIER; if (gstate_c.spline_num_points_u != surface.num_points_u) { gstate_c.Dirty(DIRTY_BEZIERSPLINE); @@ -2059,7 +2059,7 @@ void GPUCommon::Execute_Bezier(u32 op, u32 diff) { UpdateUVScaleOffset(); drawEngineCommon_->SubmitCurve(control_points, indices, surface, gstate.vertType, &bytesRead, "bezier"); - gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE); + gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE); gstate_c.submitType = SubmitType::DRAW; // After drawing, we advance pointers - see SubmitPrim which does the same. @@ -2119,8 +2119,8 @@ void GPUCommon::Execute_Spline(u32 op, u32 diff) { SetDrawType(DRAW_SPLINE, PatchPrimToPrim(surface.primType)); + gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE); if (drawEngineCommon_->CanUseHardwareTessellation(surface.primType)) { - gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE); gstate_c.submitType = SubmitType::HW_SPLINE; if (gstate_c.spline_num_points_u != surface.num_points_u) { gstate_c.Dirty(DIRTY_BEZIERSPLINE); @@ -2134,7 +2134,7 @@ void GPUCommon::Execute_Spline(u32 op, u32 diff) { UpdateUVScaleOffset(); drawEngineCommon_->SubmitCurve(control_points, indices, surface, gstate.vertType, &bytesRead, "spline"); - gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE); + gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE); gstate_c.submitType = SubmitType::DRAW; // After drawing, we advance pointers - see SubmitPrim which does the same. diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h index 1b4eb0494da4..78f7b15ceff5 100644 --- a/GPU/GPUCommon.h +++ b/GPU/GPUCommon.h @@ -269,14 +269,14 @@ class GPUCommon : public GPUInterface, public GPUDebugInterface { void SetDrawType(DrawType type, GEPrimitiveType prim) { if (type != lastDraw_) { // We always flush when drawing splines/beziers so no need to do so here - gstate_c.Dirty(DIRTY_UVSCALEOFFSET | DIRTY_VERTEXSHADER_STATE); + gstate_c.Dirty(DIRTY_UVSCALEOFFSET | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE); lastDraw_ = type; } // Prim == RECTANGLES can cause CanUseHardwareTransform to flip, so we need to dirty. // Also, culling may be affected so dirty the raster state. if (IsTrianglePrim(prim) != IsTrianglePrim(lastPrim_)) { Flush(); - gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE); + gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE); lastPrim_ = prim; } } diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index d086b60921c3..703ada14b4a7 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -770,7 +770,7 @@ void DrawEngineVulkan::DoFlush() { sampler = nullSampler_; } - if (!lastPipeline_ || gstate_c.IsDirty(DIRTY_BLEND_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE) || prim != lastPrim_) { + if (!lastPipeline_ || gstate_c.IsDirty(DIRTY_BLEND_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE) || prim != lastPrim_) { if (prim != lastPrim_ || gstate_c.IsDirty(DIRTY_BLEND_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE)) { ConvertStateToVulkanKey(*framebufferManager_, shaderManager_, prim, pipelineKey_, dynState_); } @@ -901,7 +901,7 @@ void DrawEngineVulkan::DoFlush() { if (sampler == VK_NULL_HANDLE) sampler = nullSampler_; } - if (!lastPipeline_ || gstate_c.IsDirty(DIRTY_BLEND_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE) || prim != lastPrim_) { + if (!lastPipeline_ || gstate_c.IsDirty(DIRTY_BLEND_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE) || prim != lastPrim_) { if (prim != lastPrim_ || gstate_c.IsDirty(DIRTY_BLEND_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE)) { ConvertStateToVulkanKey(*framebufferManager_, shaderManager_, prim, pipelineKey_, dynState_); } diff --git a/GPU/Vulkan/ShaderManagerVulkan.cpp b/GPU/Vulkan/ShaderManagerVulkan.cpp index 6c39280a8278..763315915462 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.cpp +++ b/GPU/Vulkan/ShaderManagerVulkan.cpp @@ -128,7 +128,7 @@ std::string VulkanFragmentShader::GetShaderString(DebugShaderStringType type) co VulkanVertexShader::VulkanVertexShader(VulkanContext *vulkan, VShaderID id, const char *code, bool useHWTransform) : vulkan_(vulkan), useHWTransform_(useHWTransform), id_(id) { source_ = code; - module_ = CompileShaderModuleAsync(vulkan, VK_SHADER_STAGE_VERTEX_BIT, source_.c_str(), new std::string(VertexShaderDesc(id).c_str())); + module_ = CompileShaderModuleAsync(vulkan, VK_SHADER_STAGE_VERTEX_BIT, source_.c_str(), new std::string(VertexShaderDesc(id))); if (!module_) { failed_ = true; } else { @@ -195,13 +195,13 @@ void ShaderManagerVulkan::Clear() { vsCache_.Clear(); lastFSID_.set_invalid(); lastVSID_.set_invalid(); - gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE); + gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE); } void ShaderManagerVulkan::ClearShaders() { Clear(); DirtyShader(); - gstate_c.Dirty(DIRTY_ALL_UNIFORMS | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE); + gstate_c.Dirty(DIRTY_ALL_UNIFORMS | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE); } void ShaderManagerVulkan::DirtyShader() { @@ -214,7 +214,7 @@ void ShaderManagerVulkan::DirtyShader() { void ShaderManagerVulkan::DirtyLastShader() { lastVShader_ = nullptr; lastFShader_ = nullptr; - gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE); + gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE); } uint64_t ShaderManagerVulkan::UpdateUniforms(bool useBufferedRendering) { From 38e16324f0014549f148fd13102ff9f7d0d64ffb Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 1 Oct 2022 14:57:00 -0700 Subject: [PATCH 02/12] Vulkan: Clean up shader module tag. --- Common/GPU/Vulkan/thin3d_vulkan.cpp | 2 +- GPU/Vulkan/ShaderManagerVulkan.cpp | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/Common/GPU/Vulkan/thin3d_vulkan.cpp b/Common/GPU/Vulkan/thin3d_vulkan.cpp index 35d620be3cba..413057b28846 100644 --- a/Common/GPU/Vulkan/thin3d_vulkan.cpp +++ b/Common/GPU/Vulkan/thin3d_vulkan.cpp @@ -177,9 +177,9 @@ VkShaderStageFlagBits StageToVulkan(ShaderStage stage) { case ShaderStage::Vertex: return VK_SHADER_STAGE_VERTEX_BIT; case ShaderStage::Geometry: return VK_SHADER_STAGE_GEOMETRY_BIT; case ShaderStage::Compute: return VK_SHADER_STAGE_COMPUTE_BIT; - default: case ShaderStage::Fragment: return VK_SHADER_STAGE_FRAGMENT_BIT; } + return VK_SHADER_STAGE_FRAGMENT_BIT; } // Not registering this as a resource holder, instead the pipeline is registered. It will diff --git a/GPU/Vulkan/ShaderManagerVulkan.cpp b/GPU/Vulkan/ShaderManagerVulkan.cpp index 763315915462..bfb5547b6aa3 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.cpp +++ b/GPU/Vulkan/ShaderManagerVulkan.cpp @@ -73,14 +73,22 @@ static Promise *CompileShaderModuleAsync(VulkanContext *vulkan, VkShaderModule shaderModule = VK_NULL_HANDLE; if (success) { - success = vulkan->CreateShaderModule(spirv, &shaderModule, stage == VK_SHADER_STAGE_VERTEX_BIT ? "game_vertex" : "game_fragment"); + const char *createTag = tag ? tag->c_str() : nullptr; + if (!createTag) { + switch (stage) { + case VK_SHADER_STAGE_VERTEX_BIT: createTag = "game_vertex"; break; + case VK_SHADER_STAGE_FRAGMENT_BIT: createTag = "game_fragment"; break; + case VK_SHADER_STAGE_GEOMETRY_BIT: createTag = "game_geometry"; break; + case VK_SHADER_STAGE_COMPUTE_BIT: createTag = "game_compute"; break; + } + } + + success = vulkan->CreateShaderModule(spirv, &shaderModule, createTag); #ifdef SHADERLOG OutputDebugStringA("OK"); #endif - if (tag) { - vulkan->SetDebugName(shaderModule, VK_OBJECT_TYPE_SHADER_MODULE, tag->c_str()); + if (tag) delete tag; - } } return shaderModule; From d16caa71af444ee089a9ac74ce76fa3176eb3867 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 1 Oct 2022 20:01:23 -0700 Subject: [PATCH 03/12] Vulkan: Add geometry shader ID tracking. We're still not generating them, yet. But this tracks the objects and IDs through the pipeline. --- Common/GPU/Vulkan/VulkanRenderManager.cpp | 11 ++- Common/GPU/Vulkan/VulkanRenderManager.h | 1 + GPU/Common/ShaderId.cpp | 39 ++++++++++ GPU/Common/ShaderId.h | 39 ++++++++++ GPU/GPUState.h | 3 +- GPU/Vulkan/DrawEngineVulkan.cpp | 13 ++-- GPU/Vulkan/DrawEngineVulkan.h | 4 +- GPU/Vulkan/GPU_Vulkan.cpp | 1 + GPU/Vulkan/PipelineManagerVulkan.cpp | 21 +++-- GPU/Vulkan/PipelineManagerVulkan.h | 4 +- GPU/Vulkan/ShaderManagerVulkan.cpp | 95 +++++++++++++++++++++-- GPU/Vulkan/ShaderManagerVulkan.h | 32 +++++++- UI/DevScreens.cpp | 2 +- 13 files changed, 241 insertions(+), 24 deletions(-) diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp index 4e047bf64e3f..344b059bbcf2 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.cpp +++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp @@ -30,8 +30,9 @@ bool VKRGraphicsPipeline::Create(VulkanContext *vulkan, VkRenderPass compatibleR // Fill in the last part of the desc since now it's time to block. VkShaderModule vs = desc->vertexShader->BlockUntilReady(); VkShaderModule fs = desc->fragmentShader->BlockUntilReady(); + VkShaderModule gs = desc->geometryShader ? desc->geometryShader->BlockUntilReady() : VK_NULL_HANDLE; - if (!vs || !fs) { + if (!vs || !fs || (!gs && desc->geometryShader)) { ERROR_LOG(G3D, "Failed creating graphics pipeline - missing shader modules"); // We're kinda screwed here? return false; @@ -49,6 +50,14 @@ bool VKRGraphicsPipeline::Create(VulkanContext *vulkan, VkRenderPass compatibleR ss[1].pSpecializationInfo = nullptr; ss[1].module = fs; ss[1].pName = "main"; + if (gs) { + stageCount++; + ss[2].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + ss[2].stage = VK_SHADER_STAGE_GEOMETRY_BIT; + ss[2].pSpecializationInfo = nullptr; + ss[2].module = gs; + ss[2].pName = "main"; + } VkGraphicsPipelineCreateInfo pipe{ VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO }; pipe.pStages = ss; diff --git a/Common/GPU/Vulkan/VulkanRenderManager.h b/Common/GPU/Vulkan/VulkanRenderManager.h index 66692d99a7d4..7fd45e22463f 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.h +++ b/Common/GPU/Vulkan/VulkanRenderManager.h @@ -128,6 +128,7 @@ struct VKRGraphicsPipelineDesc { // Replaced the ShaderStageInfo with promises here so we can wait for compiles to finish. Promise *vertexShader = nullptr; Promise *fragmentShader = nullptr; + Promise *geometryShader = nullptr; VkPipelineInputAssemblyStateCreateInfo inputAssembly{ VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO }; VkVertexInputAttributeDescription attrs[8]{}; diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp index bb8dd542374c..eccd5d158f6e 100644 --- a/GPU/Common/ShaderId.cpp +++ b/GPU/Common/ShaderId.cpp @@ -366,3 +366,42 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip *id_out = id; } + +std::string GeometryShaderDesc(const GShaderID &id) { + std::stringstream desc; + desc << StringFromFormat("%08x:%08x ", id.d[1], id.d[0]); + if (id.Bit(GS_BIT_ENABLED)) desc << "ENABLED "; + if (id.Bit(GS_BIT_DO_TEXTURE)) desc << "TEX "; + if (id.Bit(GS_BIT_LMODE)) desc << "LMODE "; + return desc.str(); +} + +void ComputeGeometryShaderID(GShaderID *id_out, const Draw::Bugs &bugs, int prim) { + GShaderID id; + + bool vertexRangeCulling = + !gstate.isModeThrough() && gstate_c.submitType == SubmitType::DRAW; // neither hw nor sw spline/bezier. See #11692 + + // If we're not using GS culling, return a zero ID. + // Also, only use this for triangle primitives. + if (!vertexRangeCulling || !gstate_c.Supports(GPU_SUPPORTS_GS_CULLING) || (prim != GE_PRIM_TRIANGLES && prim != GE_PRIM_TRIANGLE_FAN && prim != GE_PRIM_TRIANGLE_STRIP)) { + *id_out = id; + return; + } + + id.SetBit(GS_BIT_ENABLED, true); + + if (gstate.isModeClear()) { + // No attribute bits. + } else { + bool isModeThrough = gstate.isModeThrough(); + bool lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled() && !isModeThrough; + + id.SetBit(GS_BIT_LMODE, lmode); + if (gstate.isTextureMapEnabled()) { + id.SetBit(GS_BIT_DO_TEXTURE); + } + } + + *id_out = id; +} diff --git a/GPU/Common/ShaderId.h b/GPU/Common/ShaderId.h index 20a6cfda4850..ec25abb4d3f4 100644 --- a/GPU/Common/ShaderId.h +++ b/GPU/Common/ShaderId.h @@ -104,6 +104,17 @@ static inline FShaderBit operator +(FShaderBit bit, int i) { return FShaderBit((int)bit + i); } +// Some of these bits are straight from FShaderBit, since they essentially enable attributes directly. +enum GShaderBit : uint8_t { + GS_BIT_ENABLED = 0, // If not set, we don't use a geo shader. + GS_BIT_DO_TEXTURE = 1, // presence of texcoords + GS_BIT_LMODE = 2, // presence of specular color (regular color always present) +}; + +static inline GShaderBit operator +(GShaderBit bit, int i) { + return GShaderBit((int)bit + i); +} + struct ShaderID { ShaderID() { clear(); @@ -232,6 +243,31 @@ struct FShaderID : ShaderID { } }; +struct GShaderID : ShaderID { + GShaderID() : ShaderID() { + } + + explicit GShaderID(ShaderID &src) { + memcpy(d, src.d, sizeof(d)); + } + + bool Bit(GShaderBit bit) const { + return ShaderID::Bit((int)bit); + } + + int Bits(GShaderBit bit, int count) const { + return ShaderID::Bits((int)bit, count); + } + + void SetBit(GShaderBit bit, bool value = true) { + ShaderID::SetBit((int)bit, value); + } + + void SetBits(GShaderBit bit, int count, int value) { + ShaderID::SetBits((int)bit, count, value); + } +}; + namespace Draw { class Bugs; } @@ -244,3 +280,6 @@ std::string VertexShaderDesc(const VShaderID &id); struct ComputedPipelineState; void ComputeFragmentShaderID(FShaderID *id, const ComputedPipelineState &pipelineState, const Draw::Bugs &bugs); std::string FragmentShaderDesc(const FShaderID &id); + +void ComputeGeometryShaderID(GShaderID *id, const Draw::Bugs &bugs, int prim); +std::string GeometryShaderDesc(const GShaderID &id); diff --git a/GPU/GPUState.h b/GPU/GPUState.h index a10cfe790e5e..e5d566284542 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -486,7 +486,8 @@ enum { // Free bit: 15 GPU_SUPPORTS_DEPTH_TEXTURE = FLAG_BIT(16), GPU_SUPPORTS_ACCURATE_DEPTH = FLAG_BIT(17), - // Free bits: 18-19 + GPU_SUPPORTS_GS_CULLING = FLAG_BIT(18), // Geometry shader + // Free bit: 19 GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH = FLAG_BIT(20), GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT = FLAG_BIT(21), GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT = FLAG_BIT(22), diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index 703ada14b4a7..2c50795472d4 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -89,8 +89,6 @@ DrawEngineVulkan::DrawEngineVulkan(Draw::DrawContext *draw) decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE); indexGen.Setup(decIndex); - - InitDeviceObjects(); } void DrawEngineVulkan::InitDeviceObjects() { @@ -114,6 +112,8 @@ void DrawEngineVulkan::InitDeviceObjects() { bindings[3].descriptorCount = 1; bindings[3].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; bindings[3].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; + if (gstate_c.Supports(GPU_SUPPORTS_GS_CULLING)) + bindings[3].stageFlags |= VK_SHADER_STAGE_GEOMETRY_BIT; bindings[3].binding = DRAW_BINDING_DYNUBO_BASE; bindings[4].descriptorCount = 1; bindings[4].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; @@ -581,6 +581,7 @@ void DrawEngineVulkan::DoFlush() { VulkanVertexShader *vshader = nullptr; VulkanFragmentShader *fshader = nullptr; + VulkanGeometryShader *gshader = nullptr; uint32_t ibOffset; uint32_t vbOffset; @@ -775,14 +776,14 @@ void DrawEngineVulkan::DoFlush() { ConvertStateToVulkanKey(*framebufferManager_, shaderManager_, prim, pipelineKey_, dynState_); } - shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, pipelineState_, true, useHWTessellation_, decOptions_.expandAllWeightsToFloat); // usehwtransform + shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, &gshader, pipelineState_, true, useHWTessellation_, decOptions_.expandAllWeightsToFloat); // usehwtransform if (!vshader) { // We're screwed. return; } _dbg_assert_msg_(vshader->UseHWTransform(), "Bad vshader"); - VulkanPipeline *pipeline = pipelineManager_->GetOrCreatePipeline(renderManager, pipelineLayout_, pipelineKey_, &dec_->decFmt, vshader, fshader, true, 0); + VulkanPipeline *pipeline = pipelineManager_->GetOrCreatePipeline(renderManager, pipelineLayout_, pipelineKey_, &dec_->decFmt, vshader, fshader, gshader, true, 0); if (!pipeline || !pipeline->pipeline) { // Already logged, let's bail out. return; @@ -905,9 +906,9 @@ void DrawEngineVulkan::DoFlush() { if (prim != lastPrim_ || gstate_c.IsDirty(DIRTY_BLEND_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE)) { ConvertStateToVulkanKey(*framebufferManager_, shaderManager_, prim, pipelineKey_, dynState_); } - shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, pipelineState_, false, false, decOptions_.expandAllWeightsToFloat); // usehwtransform + shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, &gshader, pipelineState_, false, false, decOptions_.expandAllWeightsToFloat); // usehwtransform _dbg_assert_msg_(!vshader->UseHWTransform(), "Bad vshader"); - VulkanPipeline *pipeline = pipelineManager_->GetOrCreatePipeline(renderManager, pipelineLayout_, pipelineKey_, &dec_->decFmt, vshader, fshader, false, 0); + VulkanPipeline *pipeline = pipelineManager_->GetOrCreatePipeline(renderManager, pipelineLayout_, pipelineKey_, &dec_->decFmt, vshader, fshader, gshader, false, 0); if (!pipeline || !pipeline->pipeline) { // Already logged, let's bail out. decodedVerts_ = 0; diff --git a/GPU/Vulkan/DrawEngineVulkan.h b/GPU/Vulkan/DrawEngineVulkan.h index 50df67b13677..ebf50e33b51d 100644 --- a/GPU/Vulkan/DrawEngineVulkan.h +++ b/GPU/Vulkan/DrawEngineVulkan.h @@ -127,6 +127,9 @@ class DrawEngineVulkan : public DrawEngineCommon { DrawEngineVulkan(Draw::DrawContext *draw); virtual ~DrawEngineVulkan(); + // We reference feature flags, so this is called after construction. + void InitDeviceObjects(); + void SetShaderManager(ShaderManagerVulkan *shaderManager) { shaderManager_ = shaderManager; } @@ -196,7 +199,6 @@ class DrawEngineVulkan : public DrawEngineCommon { void ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManager, ShaderManagerVulkan *shaderManager, int prim, VulkanPipelineRasterStateKey &key, VulkanDynamicState &dynState); void BindShaderBlendTex(); - void InitDeviceObjects(); void DestroyDeviceObjects(); void DecodeVertsToPushBuffer(VulkanPushBuffer *push, uint32_t *bindOffset, VkBuffer *vkbuf); diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index 5bf6b0a2ac4e..aa31c93be465 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -53,6 +53,7 @@ GPU_Vulkan::GPU_Vulkan(GraphicsContext *gfxCtx, Draw::DrawContext *draw) : GPUCommon(gfxCtx, draw), drawEngine_(draw) { gstate_c.featureFlags = CheckGPUFeatures(); + drawEngine_.InitDeviceObjects(); VulkanContext *vulkan = (VulkanContext *)gfxCtx->GetAPIContext(); diff --git a/GPU/Vulkan/PipelineManagerVulkan.cpp b/GPU/Vulkan/PipelineManagerVulkan.cpp index 2ea530d25a68..f25b0cb26934 100644 --- a/GPU/Vulkan/PipelineManagerVulkan.cpp +++ b/GPU/Vulkan/PipelineManagerVulkan.cpp @@ -171,7 +171,7 @@ static std::string CutFromMain(std::string str) { static VulkanPipeline *CreateVulkanPipeline(VulkanRenderManager *renderManager, VkPipelineCache pipelineCache, VkPipelineLayout layout, PipelineFlags pipelineFlags, const VulkanPipelineRasterStateKey &key, - const DecVtxFormat *decFmt, VulkanVertexShader *vs, VulkanFragmentShader *fs, bool useHwTransform, u32 variantBitmask) { + const DecVtxFormat *decFmt, VulkanVertexShader *vs, VulkanFragmentShader *fs, VulkanGeometryShader *gs, bool useHwTransform, u32 variantBitmask) { VulkanPipeline *vulkanPipeline = new VulkanPipeline(); VKRGraphicsPipelineDesc *desc = &vulkanPipeline->desc; desc->pipelineCache = pipelineCache; @@ -254,6 +254,7 @@ static VulkanPipeline *CreateVulkanPipeline(VulkanRenderManager *renderManager, desc->fragmentShader = fs->GetModule(); desc->vertexShader = vs->GetModule(); + desc->geometryShader = gs ? gs->GetModule() : nullptr; VkPipelineInputAssemblyStateCreateInfo &inputAssembly = desc->inputAssembly; inputAssembly.flags = 0; @@ -301,6 +302,9 @@ static VulkanPipeline *CreateVulkanPipeline(VulkanRenderManager *renderManager, if (useBlendConstant) { pipelineFlags |= PipelineFlags::USES_BLEND_CONSTANT; } + if (gs) { + pipelineFlags |= PipelineFlags::USES_GEOMETRY_SHADER; + } if (dss.depthTestEnable || dss.stencilTestEnable) { pipelineFlags |= PipelineFlags::USES_DEPTH_STENCIL; } @@ -308,7 +312,7 @@ static VulkanPipeline *CreateVulkanPipeline(VulkanRenderManager *renderManager, return vulkanPipeline; } -VulkanPipeline *PipelineManagerVulkan::GetOrCreatePipeline(VulkanRenderManager *renderManager, VkPipelineLayout layout, const VulkanPipelineRasterStateKey &rasterKey, const DecVtxFormat *decFmt, VulkanVertexShader *vs, VulkanFragmentShader *fs, bool useHwTransform, u32 variantBitmask) { +VulkanPipeline *PipelineManagerVulkan::GetOrCreatePipeline(VulkanRenderManager *renderManager, VkPipelineLayout layout, const VulkanPipelineRasterStateKey &rasterKey, const DecVtxFormat *decFmt, VulkanVertexShader *vs, VulkanFragmentShader *fs, VulkanGeometryShader *gs, bool useHwTransform, u32 variantBitmask) { if (!pipelineCache_) { VkPipelineCacheCreateInfo pc{ VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO }; VkResult res = vkCreatePipelineCache(vulkan_->GetDevice(), &pc, nullptr, &pipelineCache_); @@ -321,6 +325,7 @@ VulkanPipeline *PipelineManagerVulkan::GetOrCreatePipeline(VulkanRenderManager * key.useHWTransform = useHwTransform; key.vShader = vs->GetModule(); key.fShader = fs->GetModule(); + key.gShader = gs ? gs->GetModule() : VK_NULL_HANDLE; key.vtxFmtId = useHwTransform ? decFmt->id : 0; auto iter = pipelines_.Get(key); @@ -334,7 +339,7 @@ VulkanPipeline *PipelineManagerVulkan::GetOrCreatePipeline(VulkanRenderManager * VulkanPipeline *pipeline = CreateVulkanPipeline( renderManager, pipelineCache_, layout, pipelineFlags, - rasterKey, decFmt, vs, fs, useHwTransform, variantBitmask); + rasterKey, decFmt, vs, fs, gs, useHwTransform, variantBitmask); pipelines_.Insert(key, pipeline); // Don't return placeholder null pipelines. @@ -589,7 +594,13 @@ void PipelineManagerVulkan::SaveCache(FILE *file, bool saveRawPipelineCache, Sha return; VulkanVertexShader *vshader = shaderManager->GetVertexShaderFromModule(pkey.vShader->BlockUntilReady()); VulkanFragmentShader *fshader = shaderManager->GetFragmentShaderFromModule(pkey.fShader->BlockUntilReady()); - if (!vshader || !fshader) { + VulkanGeometryShader *gshader = nullptr; + if (pkey.gShader) { + gshader = shaderManager->GetGeometryShaderFromModule(pkey.gShader->BlockUntilReady()); + if (!gshader) + failed = true; + } + if (!vshader || !fshader || failed) { failed = true; return; } @@ -710,7 +721,7 @@ bool PipelineManagerVulkan::LoadCache(FILE *file, bool loadRawPipelineCache, Sha DecVtxFormat fmt; fmt.InitializeFromID(key.vtxFmtId); - VulkanPipeline *pipeline = GetOrCreatePipeline(rm, layout, key.raster, key.useHWTransform ? &fmt : 0, vs, fs, key.useHWTransform, key.variants); + VulkanPipeline *pipeline = GetOrCreatePipeline(rm, layout, key.raster, key.useHWTransform ? &fmt : 0, vs, fs, nullptr, key.useHWTransform, key.variants); if (!pipeline) { pipelineCreateFailCount += 1; } diff --git a/GPU/Vulkan/PipelineManagerVulkan.h b/GPU/Vulkan/PipelineManagerVulkan.h index e72f53a800c4..32e45c3faf7d 100644 --- a/GPU/Vulkan/PipelineManagerVulkan.h +++ b/GPU/Vulkan/PipelineManagerVulkan.h @@ -38,6 +38,7 @@ struct VulkanPipelineKey { VKRRenderPass *renderPass; Promise *vShader; Promise *fShader; + Promise *gShader; uint32_t vtxFmtId; bool useHWTransform; @@ -68,6 +69,7 @@ struct VulkanPipeline { class VulkanContext; class VulkanVertexShader; class VulkanFragmentShader; +class VulkanGeometryShader; class ShaderManagerVulkan; class DrawEngineCommon; @@ -77,7 +79,7 @@ class PipelineManagerVulkan { ~PipelineManagerVulkan(); // variantMask is only used when loading pipelines from cache. - VulkanPipeline *GetOrCreatePipeline(VulkanRenderManager *renderManager, VkPipelineLayout layout, const VulkanPipelineRasterStateKey &rasterKey, const DecVtxFormat *decFmt, VulkanVertexShader *vs, VulkanFragmentShader *fs, bool useHwTransform, u32 variantMask); + VulkanPipeline *GetOrCreatePipeline(VulkanRenderManager *renderManager, VkPipelineLayout layout, const VulkanPipelineRasterStateKey &rasterKey, const DecVtxFormat *decFmt, VulkanVertexShader *vs, VulkanFragmentShader *fs, VulkanGeometryShader *gs, bool useHwTransform, u32 variantMask); int GetNumPipelines() const { return (int)pipelines_.size(); } void Clear(); diff --git a/GPU/Vulkan/ShaderManagerVulkan.cpp b/GPU/Vulkan/ShaderManagerVulkan.cpp index bfb5547b6aa3..769c258993a8 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.cpp +++ b/GPU/Vulkan/ShaderManagerVulkan.cpp @@ -163,8 +163,38 @@ std::string VulkanVertexShader::GetShaderString(DebugShaderStringType type) cons } } +VulkanGeometryShader::VulkanGeometryShader(VulkanContext *vulkan, GShaderID id, const char *code) + : vulkan_(vulkan), id_(id) { + source_ = code; + module_ = CompileShaderModuleAsync(vulkan, VK_SHADER_STAGE_GEOMETRY_BIT, source_.c_str(), new std::string(GeometryShaderDesc(id).c_str())); + if (!module_) { + failed_ = true; + } else { + VERBOSE_LOG(G3D, "Compiled geometry shader:\n%s\n", (const char *)code); + } +} + +VulkanGeometryShader::~VulkanGeometryShader() { + if (module_) { + VkShaderModule shaderModule = module_->BlockUntilReady(); + vulkan_->Delete().QueueDeleteShaderModule(shaderModule); + delete module_; + } +} + +std::string VulkanGeometryShader::GetShaderString(DebugShaderStringType type) const { + switch (type) { + case SHADER_STRING_SOURCE_CODE: + return source_; + case SHADER_STRING_SHORT_DESC: + return GeometryShaderDesc(id_); + default: + return "N/A"; + } +} + ShaderManagerVulkan::ShaderManagerVulkan(Draw::DrawContext *draw) - : ShaderManagerCommon(draw), compat_(GLSL_VULKAN), fsCache_(16), vsCache_(16) { + : ShaderManagerCommon(draw), compat_(GLSL_VULKAN), fsCache_(16), vsCache_(16), gsCache_(16) { codeBuffer_ = new char[16384]; VulkanContext *vulkan = (VulkanContext *)draw->GetNativeObject(Draw::NativeObject::CONTEXT); uboAlignment_ = vulkan->GetPhysicalDeviceProperties().properties.limits.minUniformBufferOffsetAlignment; @@ -199,10 +229,15 @@ void ShaderManagerVulkan::Clear() { vsCache_.Iterate([&](const VShaderID &key, VulkanVertexShader *shader) { delete shader; }); + gsCache_.Iterate([&](const GShaderID &key, VulkanGeometryShader *shader) { + delete shader; + }); fsCache_.Clear(); vsCache_.Clear(); + gsCache_.Clear(); lastFSID_.set_invalid(); lastVSID_.set_invalid(); + lastGSID_.set_invalid(); gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE); } @@ -216,12 +251,14 @@ void ShaderManagerVulkan::DirtyShader() { // Forget the last shader ID lastFSID_.set_invalid(); lastVSID_.set_invalid(); + lastGSID_.set_invalid(); DirtyLastShader(); } void ShaderManagerVulkan::DirtyLastShader() { lastVShader_ = nullptr; lastFShader_ = nullptr; + lastGShader_ = nullptr; gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE); } @@ -239,7 +276,7 @@ uint64_t ShaderManagerVulkan::UpdateUniforms(bool useBufferedRendering) { return dirty; } -void ShaderManagerVulkan::GetShaders(int prim, u32 vertType, VulkanVertexShader **vshader, VulkanFragmentShader **fshader, const ComputedPipelineState &pipelineState, bool useHWTransform, bool useHWTessellation, bool weightsAsFloat) { +void ShaderManagerVulkan::GetShaders(int prim, u32 vertType, VulkanVertexShader **vshader, VulkanFragmentShader **fshader, VulkanGeometryShader **gshader, const ComputedPipelineState &pipelineState, bool useHWTransform, bool useHWTessellation, bool weightsAsFloat) { VShaderID VSID; if (gstate_c.IsDirty(DIRTY_VERTEXSHADER_STATE)) { gstate_c.Clean(DIRTY_VERTEXSHADER_STATE); @@ -256,14 +293,23 @@ void ShaderManagerVulkan::GetShaders(int prim, u32 vertType, VulkanVertexShader FSID = lastFSID_; } + GShaderID GSID; + if (gstate_c.IsDirty(DIRTY_GEOMETRYSHADER_STATE)) { + gstate_c.Clean(DIRTY_GEOMETRYSHADER_STATE); + ComputeGeometryShaderID(&GSID, draw_->GetBugs(), prim); + } else { + GSID = lastGSID_; + } + _dbg_assert_(FSID.Bit(FS_BIT_LMODE) == VSID.Bit(VS_BIT_LMODE)); _dbg_assert_(FSID.Bit(FS_BIT_DO_TEXTURE) == VSID.Bit(VS_BIT_DO_TEXTURE)); _dbg_assert_(FSID.Bit(FS_BIT_FLATSHADE) == VSID.Bit(VS_BIT_FLATSHADE)); // Just update uniforms if this is the same shader as last time. - if (lastVShader_ != nullptr && lastFShader_ != nullptr && VSID == lastVSID_ && FSID == lastFSID_) { + if (lastVShader_ != nullptr && lastFShader_ != nullptr && VSID == lastVSID_ && FSID == lastFSID_ && GSID == lastGSID_) { *vshader = lastVShader_; *fshader = lastFShader_; + *gshader = lastGShader_; _dbg_assert_msg_((*vshader)->UseHWTransform() == useHWTransform, "Bad vshader was cached"); // Already all set, no need to look up in shader maps. return; @@ -281,11 +327,9 @@ void ShaderManagerVulkan::GetShaders(int prim, u32 vertType, VulkanVertexShader vs = new VulkanVertexShader(vulkan, VSID, codeBuffer_, useHWTransform); vsCache_.Insert(VSID, vs); } - lastVSID_ = VSID; VulkanFragmentShader *fs = fsCache_.Get(FSID); if (!fs) { - // uint32_t vendorID = vulkan->GetPhysicalDeviceProperties().properties.vendorID; // Fragment shader not in cache. Let's compile it. std::string genErrorString; uint64_t uniformMask = 0; // Not used @@ -296,13 +340,28 @@ void ShaderManagerVulkan::GetShaders(int prim, u32 vertType, VulkanVertexShader fsCache_.Insert(FSID, fs); } + VulkanGeometryShader *gs; + if (GSID.Bit(GS_BIT_ENABLED)) { + gs = gsCache_.Get(GSID); + if (!gs) { + // Geometry shader not in cache. Let's compile it. + // TODO + } + } else { + gs = nullptr; + } + + lastVSID_ = VSID; lastFSID_ = FSID; + lastGSID_ = GSID; lastVShader_ = vs; lastFShader_ = fs; + lastGShader_ = gs; *vshader = vs; *fshader = fs; + *gshader = gs; _dbg_assert_msg_((*vshader)->UseHWTransform() == useHWTransform, "Bad vshader was computed"); } @@ -327,6 +386,15 @@ std::vector ShaderManagerVulkan::DebugGetShaderIDs(DebugShaderType }); break; } + case SHADER_TYPE_GEOMETRY: + { + gsCache_.Iterate([&](const GShaderID &id, VulkanGeometryShader *shader) { + std::string idstr; + id.ToString(&idstr); + ids.push_back(idstr); + }); + break; + } default: break; } @@ -342,12 +410,16 @@ std::string ShaderManagerVulkan::DebugGetShaderString(std::string id, DebugShade VulkanVertexShader *vs = vsCache_.Get(VShaderID(shaderId)); return vs ? vs->GetShaderString(stringType) : ""; } - case SHADER_TYPE_FRAGMENT: { VulkanFragmentShader *fs = fsCache_.Get(FShaderID(shaderId)); return fs ? fs->GetShaderString(stringType) : ""; } + case SHADER_TYPE_GEOMETRY: + { + VulkanGeometryShader *gs = gsCache_.Get(GShaderID(shaderId)); + return gs ? gs->GetShaderString(stringType) : ""; + } default: return "N/A"; } @@ -375,6 +447,17 @@ VulkanFragmentShader *ShaderManagerVulkan::GetFragmentShaderFromModule(VkShaderM return fs; } +VulkanGeometryShader *ShaderManagerVulkan::GetGeometryShaderFromModule(VkShaderModule module) { + VulkanGeometryShader *gs = nullptr; + gsCache_.Iterate([&](const GShaderID &id, VulkanGeometryShader *shader) { + Promise *p = shader->GetModule(); + VkShaderModule m = p->BlockUntilReady(); + if (m == module) + gs = shader; + }); + return gs; +} + // Shader cache. // // We simply store the IDs of the shaders used during gameplay. On next startup of diff --git a/GPU/Vulkan/ShaderManagerVulkan.h b/GPU/Vulkan/ShaderManagerVulkan.h index 6593a366f4da..4c89f91ef5f6 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.h +++ b/GPU/Vulkan/ShaderManagerVulkan.h @@ -83,7 +83,27 @@ class VulkanVertexShader { VShaderID id_; }; -class VulkanPushBuffer; +class VulkanGeometryShader { +public: + VulkanGeometryShader(VulkanContext *vulkan, GShaderID id, const char *code); + ~VulkanGeometryShader(); + + const std::string &source() const { return source_; } + + bool Failed() const { return failed_; } + + std::string GetShaderString(DebugShaderStringType type) const; + Promise *GetModule() const { return module_; } + const GShaderID &GetID() { return id_; } + +protected: + Promise *module_ = nullptr; + + VulkanContext *vulkan_; + std::string source_; + bool failed_ = false; + GShaderID id_; +}; class ShaderManagerVulkan : public ShaderManagerCommon { public: @@ -93,19 +113,22 @@ class ShaderManagerVulkan : public ShaderManagerCommon { void DeviceLost(); void DeviceRestore(Draw::DrawContext *draw); - void GetShaders(int prim, u32 vertType, VulkanVertexShader **vshader, VulkanFragmentShader **fshader, const ComputedPipelineState &pipelineState, bool useHWTransform, bool useHWTessellation, bool weightsAsFloat); + void GetShaders(int prim, u32 vertType, VulkanVertexShader **vshader, VulkanFragmentShader **fshader, VulkanGeometryShader **gshader, const ComputedPipelineState &pipelineState, bool useHWTransform, bool useHWTessellation, bool weightsAsFloat); void ClearShaders(); void DirtyShader(); void DirtyLastShader() override; int GetNumVertexShaders() const { return (int)vsCache_.size(); } int GetNumFragmentShaders() const { return (int)fsCache_.size(); } + int GetNumGeometryShaders() const { return (int)gsCache_.size(); } // Used for saving/loading the cache. Don't need to be particularly fast. VulkanVertexShader *GetVertexShaderFromID(VShaderID id) { return vsCache_.Get(id); } VulkanFragmentShader *GetFragmentShaderFromID(FShaderID id) { return fsCache_.Get(id); } + VulkanGeometryShader *GetGeometryShaderFromID(GShaderID id) { return gsCache_.Get(id); } VulkanVertexShader *GetVertexShaderFromModule(VkShaderModule module); VulkanFragmentShader *GetFragmentShaderFromModule(VkShaderModule module); + VulkanGeometryShader *GetGeometryShaderFromModule(VkShaderModule module); std::vector DebugGetShaderIDs(DebugShaderType type); std::string DebugGetShaderString(std::string id, DebugShaderType type, DebugShaderStringType stringType); @@ -143,6 +166,9 @@ class ShaderManagerVulkan : public ShaderManagerCommon { typedef DenseHashMap VSCache; VSCache vsCache_; + typedef DenseHashMap GSCache; + GSCache gsCache_; + char *codeBuffer_; uint64_t uboAlignment_; @@ -153,7 +179,9 @@ class ShaderManagerVulkan : public ShaderManagerCommon { VulkanFragmentShader *lastFShader_ = nullptr; VulkanVertexShader *lastVShader_ = nullptr; + VulkanGeometryShader *lastGShader_ = nullptr; FShaderID lastFSID_; VShaderID lastVSID_; + GShaderID lastGSID_; }; diff --git a/UI/DevScreens.cpp b/UI/DevScreens.cpp index e85f1d3adc2e..5dea5e0921e5 100644 --- a/UI/DevScreens.cpp +++ b/UI/DevScreens.cpp @@ -1156,7 +1156,7 @@ int ShaderListScreen::ListShaders(DebugShaderType shaderType, UI::LinearLayout * struct { DebugShaderType type; const char *name; } shaderTypes[] = { { SHADER_TYPE_VERTEX, "Vertex" }, { SHADER_TYPE_FRAGMENT, "Fragment" }, - // { SHADER_TYPE_GEOMETRY, "Geometry" }, + { SHADER_TYPE_GEOMETRY, "Geometry" }, { SHADER_TYPE_VERTEXLOADER, "VertexLoader" }, { SHADER_TYPE_PIPELINE, "Pipeline" }, { SHADER_TYPE_TEXTURE, "Texture" }, From fbdb2781688a997f630cd33bb7d1d5f70613dfc3 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 1 Oct 2022 20:13:30 -0700 Subject: [PATCH 04/12] Vulkan: Update shader cache format for geo shaders. --- GPU/Vulkan/PipelineManagerVulkan.cpp | 5 ++++- GPU/Vulkan/ShaderManagerVulkan.cpp | 16 +++++++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/GPU/Vulkan/PipelineManagerVulkan.cpp b/GPU/Vulkan/PipelineManagerVulkan.cpp index f25b0cb26934..4b9ff1e4b398 100644 --- a/GPU/Vulkan/PipelineManagerVulkan.cpp +++ b/GPU/Vulkan/PipelineManagerVulkan.cpp @@ -544,6 +544,7 @@ struct StoredVulkanPipelineKey { VulkanPipelineRasterStateKey raster; VShaderID vShaderID; FShaderID fShaderID; + GShaderID gShaderID; uint32_t vtxFmtId; uint32_t variants; bool useHWTransform; // TODO: Still needed? @@ -609,6 +610,7 @@ void PipelineManagerVulkan::SaveCache(FILE *file, bool saveRawPipelineCache, Sha key.useHWTransform = pkey.useHWTransform; key.fShaderID = fshader->GetID(); key.vShaderID = vshader->GetID(); + key.gShaderID = gshader ? gshader->GetID() : GShaderID(); key.variants = value->GetVariantsBitmask(); if (key.useHWTransform) { // NOTE: This is not a vtype, but a decoded vertex format. @@ -713,6 +715,7 @@ bool PipelineManagerVulkan::LoadCache(FILE *file, bool loadRawPipelineCache, Sha } VulkanVertexShader *vs = shaderManager->GetVertexShaderFromID(key.vShaderID); VulkanFragmentShader *fs = shaderManager->GetFragmentShaderFromID(key.fShaderID); + VulkanGeometryShader *gs = shaderManager->GetGeometryShaderFromID(key.gShaderID); if (!vs || !fs) { failed = true; ERROR_LOG(G3D, "Failed to find vs or fs in of pipeline %d in cache", (int)i); @@ -721,7 +724,7 @@ bool PipelineManagerVulkan::LoadCache(FILE *file, bool loadRawPipelineCache, Sha DecVtxFormat fmt; fmt.InitializeFromID(key.vtxFmtId); - VulkanPipeline *pipeline = GetOrCreatePipeline(rm, layout, key.raster, key.useHWTransform ? &fmt : 0, vs, fs, nullptr, key.useHWTransform, key.variants); + VulkanPipeline *pipeline = GetOrCreatePipeline(rm, layout, key.raster, key.useHWTransform ? &fmt : 0, vs, fs, gs, key.useHWTransform, key.variants); if (!pipeline) { pipelineCreateFailCount += 1; } diff --git a/GPU/Vulkan/ShaderManagerVulkan.cpp b/GPU/Vulkan/ShaderManagerVulkan.cpp index 769c258993a8..07d96033224e 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.cpp +++ b/GPU/Vulkan/ShaderManagerVulkan.cpp @@ -467,7 +467,7 @@ VulkanGeometryShader *ShaderManagerVulkan::GetGeometryShaderFromModule(VkShaderM // instantaneous. #define CACHE_HEADER_MAGIC 0xff51f420 -#define CACHE_VERSION 28 +#define CACHE_VERSION 29 struct VulkanCacheHeader { uint32_t magic; uint32_t version; @@ -475,6 +475,7 @@ struct VulkanCacheHeader { uint32_t reserved; int numVertexShaders; int numFragmentShaders; + int numGeometryShaders; }; bool ShaderManagerVulkan::LoadCache(FILE *f) { @@ -522,6 +523,15 @@ bool ShaderManagerVulkan::LoadCache(FILE *f) { fsCache_.Insert(id, fs); } + for (int i = 0; i < header.numGeometryShaders; i++) { + GShaderID id; + if (fread(&id, sizeof(id), 1, f) != 1) { + ERROR_LOG(G3D, "Vulkan shader cache truncated"); + break; + } + // TODO: Actually generate geometry shaders. + } + NOTICE_LOG(G3D, "Loaded %d vertex and %d fragment shaders", header.numVertexShaders, header.numFragmentShaders); return true; } @@ -534,6 +544,7 @@ void ShaderManagerVulkan::SaveCache(FILE *f) { header.reserved = 0; header.numVertexShaders = (int)vsCache_.size(); header.numFragmentShaders = (int)fsCache_.size(); + header.numGeometryShaders = (int)gsCache_.size(); bool writeFailed = fwrite(&header, sizeof(header), 1, f) != 1; vsCache_.Iterate([&](const VShaderID &id, VulkanVertexShader *vs) { writeFailed = writeFailed || fwrite(&id, sizeof(id), 1, f) != 1; @@ -541,6 +552,9 @@ void ShaderManagerVulkan::SaveCache(FILE *f) { fsCache_.Iterate([&](const FShaderID &id, VulkanFragmentShader *fs) { writeFailed = writeFailed || fwrite(&id, sizeof(id), 1, f) != 1; }); + gsCache_.Iterate([&](const GShaderID &id, VulkanGeometryShader *gs) { + writeFailed = writeFailed || fwrite(&id, sizeof(id), 1, f) != 1; + }); if (writeFailed) { ERROR_LOG(G3D, "Failed to write Vulkan shader cache, disk full?"); } else { From cdee10fe86ddeffbfc643715586c12cec0d571e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 26 Oct 2021 09:56:14 +0200 Subject: [PATCH 05/12] Vulkan: Basic geoshader code generation. --- CMakeLists.txt | 2 + Common/GPU/Vulkan/VulkanContext.cpp | 2 - GPU/Common/GeometryShaderGenerator.cpp | 79 ++++++++++++++++++++++ GPU/Common/GeometryShaderGenerator.h | 5 ++ GPU/GPU.vcxproj | 2 + GPU/GPU.vcxproj.filters | 6 ++ GPU/Vulkan/GPU_Vulkan.cpp | 8 +++ GPU/Vulkan/PipelineManagerVulkan.cpp | 2 +- GPU/Vulkan/ShaderManagerVulkan.cpp | 19 +++++- UWP/GPU_UWP/GPU_UWP.vcxproj | 2 + UWP/GPU_UWP/GPU_UWP.vcxproj.filters | 2 + android/jni/Android.mk | 1 + libretro/Makefile.common | 1 + unittest/TestShaderGenerators.cpp | 91 ++++++++++++++++++++++++++ 14 files changed, 217 insertions(+), 5 deletions(-) create mode 100644 GPU/Common/GeometryShaderGenerator.cpp create mode 100644 GPU/Common/GeometryShaderGenerator.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 8cec0f281d7e..52fb7e40c9b9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1562,6 +1562,8 @@ set(GPU_SOURCES GPU/Common/FragmentShaderGenerator.h GPU/Common/VertexShaderGenerator.cpp GPU/Common/VertexShaderGenerator.h + GPU/Common/GeometryShaderGenerator.cpp + GPU/Common/GeometryShaderGenerator.h GPU/Common/FramebufferManagerCommon.cpp GPU/Common/FramebufferManagerCommon.h GPU/Common/GPUDebugInterface.cpp diff --git a/Common/GPU/Vulkan/VulkanContext.cpp b/Common/GPU/Vulkan/VulkanContext.cpp index 1379cb9f2116..c2140c5663d4 100644 --- a/Common/GPU/Vulkan/VulkanContext.cpp +++ b/Common/GPU/Vulkan/VulkanContext.cpp @@ -600,8 +600,6 @@ void VulkanContext::ChooseDevice(int physical_device) { deviceFeatures_.enabled.samplerAnisotropy = deviceFeatures_.available.samplerAnisotropy; deviceFeatures_.enabled.shaderClipDistance = deviceFeatures_.available.shaderClipDistance; deviceFeatures_.enabled.shaderCullDistance = deviceFeatures_.available.shaderCullDistance; - // For easy wireframe mode, someday. - deviceFeatures_.enabled.fillModeNonSolid = deviceFeatures_.available.fillModeNonSolid; deviceFeatures_.enabled.geometryShader = deviceFeatures_.available.geometryShader; GetDeviceLayerExtensionList(nullptr, device_extension_properties_); diff --git a/GPU/Common/GeometryShaderGenerator.cpp b/GPU/Common/GeometryShaderGenerator.cpp new file mode 100644 index 000000000000..fdd64c807778 --- /dev/null +++ b/GPU/Common/GeometryShaderGenerator.cpp @@ -0,0 +1,79 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include +#include +#include + +#include "Common/StringUtils.h" +#include "Common/GPU/OpenGL/GLFeatures.h" +#include "Common/GPU/ShaderWriter.h" +#include "Common/GPU/thin3d.h" +#include "Core/Config.h" +#include "GPU/ge_constants.h" +#include "GPU/GPUState.h" +#include "GPU/Common/ShaderId.h" +#include "GPU/Common/ShaderUniforms.h" +#include "GPU/Common/GeometryShaderGenerator.h" + +#undef WRITE + +#define WRITE(p, ...) p.F(__VA_ARGS__) + +bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLanguageDesc &compat, const Draw::Bugs bugs, std::string *errorString) { + std::vector gl_exts; + if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) { + if (gl_extensions.EXT_gpu_shader4) { + gl_exts.push_back("#extension GL_EXT_gpu_shader4 : enable"); + } + } + + ShaderWriter p(buffer, compat, ShaderStage::Geometry, gl_exts.data(), gl_exts.size()); + p.C("layout(triangles) in;\n"); + p.C("layout(triangle_strip, max_vertices = 3) out;\n"); + + std::vector varyings, outVaryings; + + if (id.Bit(GS_BIT_DO_TEXTURE)) { + varyings.push_back(VaryingDef{ "vec3", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" }); + outVaryings.push_back(VaryingDef{ "vec3", "v_texcoordOut", Draw::SEM_TEXCOORD0, 0, "highp" }); + } + varyings.push_back(VaryingDef{ "vec4", "v_color0", Draw::SEM_COLOR0, 1, "lowp" }); + outVaryings.push_back(VaryingDef{ "vec4", "v_color0Out", Draw::SEM_COLOR0, 1, "lowp" }); + if (id.Bit(GS_BIT_LMODE)) { + varyings.push_back(VaryingDef{ "vec3", "v_color1", Draw::SEM_COLOR1, 2, "lowp" }); + outVaryings.push_back(VaryingDef{ "vec3", "v_color1Out", Draw::SEM_COLOR1, 2, "lowp" }); + } + varyings.push_back(VaryingDef{ "float", "v_fogdepth", Draw::SEM_TEXCOORD1, 3, "highp" }); + outVaryings.push_back(VaryingDef{ "float", "v_fogdepthOut", Draw::SEM_TEXCOORD1, 3, "highp" }); + + p.BeginGSMain(varyings, outVaryings); + + p.C(" for (int i = 0; i < gl_in.length(); i++) {\n"); + p.C(" gl_Position = gl_in[i].gl_Position;\n"); // copy attributes + for (size_t i = 0; i < varyings.size(); i++) { + VaryingDef &in = varyings[i]; + VaryingDef &out = outVaryings[i]; + p.F(" %s = %s[i];\n", outVaryings[i].name, varyings[i].name); + } + p.C(" EmitVertex();\n"); + p.C(" }\n"); + + p.EndGSMain(); + + return true; +} diff --git a/GPU/Common/GeometryShaderGenerator.h b/GPU/Common/GeometryShaderGenerator.h new file mode 100644 index 000000000000..9848ffde1838 --- /dev/null +++ b/GPU/Common/GeometryShaderGenerator.h @@ -0,0 +1,5 @@ +#pragma once + +#include "GPU/Common/ShaderId.h" + +bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLanguageDesc &compat, const Draw::Bugs bugs, std::string *errorString); diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj index 0480b490d7cc..808ce53635ed 100644 --- a/GPU/GPU.vcxproj +++ b/GPU/GPU.vcxproj @@ -340,6 +340,7 @@ + @@ -455,6 +456,7 @@ + diff --git a/GPU/GPU.vcxproj.filters b/GPU/GPU.vcxproj.filters index bd28a45e9727..e110c20b8bf7 100644 --- a/GPU/GPU.vcxproj.filters +++ b/GPU/GPU.vcxproj.filters @@ -261,6 +261,9 @@ Debugger + + Common + @@ -512,6 +515,9 @@ Debugger + + Common + diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index aa31c93be465..79a2db00d63c 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -232,6 +232,14 @@ u32 GPU_Vulkan::CheckGPUFeatures() const { features |= GPU_SUPPORTS_DEPTH_CLAMP; } + if (!draw_->GetBugs().Has(Draw::Bugs::BROKEN_NAN_IN_CONDITIONAL)) { + const bool disabled = PSP_CoreParameter().compat.flags().DisableRangeCulling; + // Fall back to geometry shader culling. + if (enabledFeatures.geometryShader && !disabled && (features & GPU_SUPPORTS_VS_RANGE_CULLING) == 0) { + features |= GPU_SUPPORTS_GS_CULLING; + } + } + // These are VULKAN_4444_FORMAT and friends. uint32_t fmt4444 = draw_->GetDataFormatSupport(Draw::DataFormat::B4G4R4A4_UNORM_PACK16); uint32_t fmt1555 = draw_->GetDataFormatSupport(Draw::DataFormat::A1R5G5B5_UNORM_PACK16); diff --git a/GPU/Vulkan/PipelineManagerVulkan.cpp b/GPU/Vulkan/PipelineManagerVulkan.cpp index 4b9ff1e4b398..3d7745b45254 100644 --- a/GPU/Vulkan/PipelineManagerVulkan.cpp +++ b/GPU/Vulkan/PipelineManagerVulkan.cpp @@ -716,7 +716,7 @@ bool PipelineManagerVulkan::LoadCache(FILE *file, bool loadRawPipelineCache, Sha VulkanVertexShader *vs = shaderManager->GetVertexShaderFromID(key.vShaderID); VulkanFragmentShader *fs = shaderManager->GetFragmentShaderFromID(key.fShaderID); VulkanGeometryShader *gs = shaderManager->GetGeometryShaderFromID(key.gShaderID); - if (!vs || !fs) { + if (!vs || !fs || (!gs && key.gShaderID.Bit(GS_BIT_ENABLED))) { failed = true; ERROR_LOG(G3D, "Failed to find vs or fs in of pipeline %d in cache", (int)i); continue; diff --git a/GPU/Vulkan/ShaderManagerVulkan.cpp b/GPU/Vulkan/ShaderManagerVulkan.cpp index 07d96033224e..00649a6be184 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.cpp +++ b/GPU/Vulkan/ShaderManagerVulkan.cpp @@ -38,6 +38,7 @@ #include "GPU/ge_constants.h" #include "GPU/Common/FragmentShaderGenerator.h" #include "GPU/Common/VertexShaderGenerator.h" +#include "GPU/Common/GeometryShaderGenerator.h" #include "GPU/Vulkan/ShaderManagerVulkan.h" #include "GPU/Vulkan/DrawEngineVulkan.h" #include "GPU/Vulkan/FramebufferManagerVulkan.h" @@ -305,6 +306,11 @@ void ShaderManagerVulkan::GetShaders(int prim, u32 vertType, VulkanVertexShader _dbg_assert_(FSID.Bit(FS_BIT_DO_TEXTURE) == VSID.Bit(VS_BIT_DO_TEXTURE)); _dbg_assert_(FSID.Bit(FS_BIT_FLATSHADE) == VSID.Bit(VS_BIT_FLATSHADE)); + if (GSID.Bit(GS_BIT_ENABLED)) { + _dbg_assert_(GSID.Bit(GS_BIT_LMODE) == VSID.Bit(VS_BIT_LMODE)); + _dbg_assert_(GSID.Bit(GS_BIT_DO_TEXTURE) == VSID.Bit(VS_BIT_DO_TEXTURE)); + } + // Just update uniforms if this is the same shader as last time. if (lastVShader_ != nullptr && lastFShader_ != nullptr && VSID == lastVSID_ && FSID == lastFSID_ && GSID == lastGSID_) { *vshader = lastVShader_; @@ -345,7 +351,11 @@ void ShaderManagerVulkan::GetShaders(int prim, u32 vertType, VulkanVertexShader gs = gsCache_.Get(GSID); if (!gs) { // Geometry shader not in cache. Let's compile it. - // TODO + std::string genErrorString; + bool success = GenerateGeometryShader(GSID, codeBuffer_, compat_, draw_->GetBugs(), &genErrorString); + _assert_msg_(success, "GS gen error: %s", genErrorString.c_str()); + gs = new VulkanGeometryShader(vulkan, GSID, codeBuffer_); + gsCache_.Insert(GSID, gs); } } else { gs = nullptr; @@ -529,7 +539,12 @@ bool ShaderManagerVulkan::LoadCache(FILE *f) { ERROR_LOG(G3D, "Vulkan shader cache truncated"); break; } - // TODO: Actually generate geometry shaders. + std::string genErrorString; + if (!GenerateGeometryShader(id, codeBuffer_, compat_, draw_->GetBugs(), &genErrorString)) { + return false; + } + VulkanGeometryShader *gs = new VulkanGeometryShader(vulkan, id, codeBuffer_); + gsCache_.Insert(id, gs); } NOTICE_LOG(G3D, "Loaded %d vertex and %d fragment shaders", header.numVertexShaders, header.numFragmentShaders); diff --git a/UWP/GPU_UWP/GPU_UWP.vcxproj b/UWP/GPU_UWP/GPU_UWP.vcxproj index 87aac4d38b23..05b783973699 100644 --- a/UWP/GPU_UWP/GPU_UWP.vcxproj +++ b/UWP/GPU_UWP/GPU_UWP.vcxproj @@ -385,6 +385,7 @@ + @@ -446,6 +447,7 @@ + diff --git a/UWP/GPU_UWP/GPU_UWP.vcxproj.filters b/UWP/GPU_UWP/GPU_UWP.vcxproj.filters index dbce632981d4..2296b16a16e1 100644 --- a/UWP/GPU_UWP/GPU_UWP.vcxproj.filters +++ b/UWP/GPU_UWP/GPU_UWP.vcxproj.filters @@ -55,6 +55,7 @@ + @@ -116,6 +117,7 @@ + diff --git a/android/jni/Android.mk b/android/jni/Android.mk index 2f86711855c3..42cae5cb283e 100644 --- a/android/jni/Android.mk +++ b/android/jni/Android.mk @@ -350,6 +350,7 @@ EXEC_AND_LIB_FILES := \ $(SRC)/GPU/Common/PostShader.cpp \ $(SRC)/GPU/Common/ShaderUniforms.cpp \ $(SRC)/GPU/Common/VertexShaderGenerator.cpp \ + $(SRC)/GPU/Common/GeometryShaderGenerator.cpp \ $(SRC)/GPU/Debugger/Breakpoints.cpp \ $(SRC)/GPU/Debugger/Debugger.cpp \ $(SRC)/GPU/Debugger/GECommandTable.cpp \ diff --git a/libretro/Makefile.common b/libretro/Makefile.common index e7dd40be3531..8eed2779326b 100644 --- a/libretro/Makefile.common +++ b/libretro/Makefile.common @@ -342,6 +342,7 @@ SOURCES_CXX += \ $(GPUDIR)/Debugger/Stepping.cpp \ $(GPUDIR)/Common/FragmentShaderGenerator.cpp \ $(GPUDIR)/Common/VertexShaderGenerator.cpp \ + $(GPUDIR)/Common/GeometryShaderGenerator.cpp \ $(GPUDIR)/Common/TextureCacheCommon.cpp \ $(GPUDIR)/Common/TextureScalerCommon.cpp \ $(GPUDIR)/Common/SoftwareTransformCommon.cpp \ diff --git a/unittest/TestShaderGenerators.cpp b/unittest/TestShaderGenerators.cpp index 810de85b9e5e..346fadf6434b 100644 --- a/unittest/TestShaderGenerators.cpp +++ b/unittest/TestShaderGenerators.cpp @@ -12,6 +12,7 @@ #include "GPU/Common/FragmentShaderGenerator.h" #include "GPU/Common/VertexShaderGenerator.h" +#include "GPU/Common/GeometryShaderGenerator.h" #include "GPU/Common/ReinterpretFramebuffer.h" #include "GPU/Common/StencilCommon.h" #include "GPU/Common/DepalettizeShaderCommon.h" @@ -91,6 +92,32 @@ bool GenerateVShader(VShaderID id, char *buffer, ShaderLanguage lang, Draw::Bugs } } +bool GenerateGShader(GShaderID id, char *buffer, ShaderLanguage lang, Draw::Bugs bugs, std::string *errorString) { + errorString->clear(); + + switch (lang) { + case ShaderLanguage::GLSL_VULKAN: + { + ShaderLanguageDesc compat(ShaderLanguage::GLSL_VULKAN); + return GenerateGeometryShader(id, buffer, compat, bugs, errorString); + } + /* + case ShaderLanguage::GLSL_3xx: + { + ShaderLanguageDesc compat(ShaderLanguage::GLSL_3xx); + return GenerateGeometryShader(id, buffer, compat, bugs, errorString); + } + case ShaderLanguage::HLSL_D3D11: + { + ShaderLanguageDesc compat(ShaderLanguage::HLSL_D3D11); + return GenerateGeometryShader(id, buffer, compat, bugs, errorString); + } + */ + default: + return false; + } +} + static VkShaderStageFlagBits StageToVulkan(ShaderStage stage) { switch (stage) { case ShaderStage::Vertex: return VK_SHADER_STAGE_VERTEX_BIT; @@ -369,6 +396,9 @@ bool TestVertexShaders() { if (!id.Bit(VS_BIT_USE_HW_TRANSFORM)) { id.SetBit(VS_BIT_ENABLE_BONES, 0); } + if (id.Bit(VS_BIT_VERTEX_RANGE_CULLING)) { + continue; + } bool generateSuccess[numLanguages]{}; std::string genErrorString[numLanguages]; @@ -463,6 +493,63 @@ bool TestFragmentShaders() { return true; } + +bool TestGeometryShaders() { + char *buffer[numLanguages]; + + for (int i = 0; i < numLanguages; i++) { + buffer[i] = new char[65536]; + } + GMRng rng; + int successes = 0; + int count = 30; + + Draw::Bugs bugs; + + // Generate a bunch of random fragment shader IDs, try to generate shader source. + // Then compile it and check that it's ok. + for (int i = 0; i < count; i++) { + uint32_t bottom = i << 1; + GShaderID id; + id.d[0] = bottom; + id.d[1] = 0; + + id.SetBit(GS_BIT_ENABLED, true); + + bool generateSuccess[numLanguages]{}; + std::string genErrorString[numLanguages]; + + for (int j = 0; j < numLanguages; j++) { + generateSuccess[j] = GenerateGShader(id, buffer[j], languages[j], bugs, &genErrorString[j]); + if (!genErrorString[j].empty()) { + printf("%s\n", genErrorString[j].c_str()); + } + // We ignore the contents of the error string here, not even gonna try to compile if it errors. + } + + // Now that we have the strings ready for easy comparison (buffer,4 in the watch window), + // let's try to compile them. + for (int j = 0; j < numLanguages; j++) { + if (generateSuccess[j]) { + std::string errorMessage; + if (!TestCompileShader(buffer[j], languages[j], ShaderStage::Geometry, &errorMessage)) { + printf("Error compiling geometry shader:\n\n%s\n\n%s\n", LineNumberString(buffer[j]).c_str(), errorMessage.c_str()); + return false; + } + successes++; + } + } + } + + printf("%d/%d geometry shaders generated (it's normal that it's not all, there are invalid bit combos)\n", successes, count * numLanguages); + + for (int i = 0; i < numLanguages; i++) { + delete[] buffer[i]; + } + return true; +} + + bool TestShaderGenerators() { #if PPSSPP_PLATFORM(WINDOWS) LoadD3D11(); @@ -476,6 +563,10 @@ bool TestShaderGenerators() { return false; } + if (!TestGeometryShaders()) { + return false; + } + if (!TestReinterpretShaders()) { return false; } From ac248338be64161eb47ac88ce3892e9538731112 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 27 Oct 2021 09:28:07 +0200 Subject: [PATCH 06/12] Vulkan: Cull in geoshader, hack to on for now. --- GPU/Common/GeometryShaderGenerator.cpp | 54 +++++++++++++++++++++++++- GPU/Common/VertexShaderGenerator.cpp | 2 +- GPU/Vulkan/GPU_Vulkan.cpp | 6 +++ 3 files changed, 59 insertions(+), 3 deletions(-) diff --git a/GPU/Common/GeometryShaderGenerator.cpp b/GPU/Common/GeometryShaderGenerator.cpp index fdd64c807778..5ec7875e5cd9 100644 --- a/GPU/Common/GeometryShaderGenerator.cpp +++ b/GPU/Common/GeometryShaderGenerator.cpp @@ -34,6 +34,10 @@ #define WRITE(p, ...) p.F(__VA_ARGS__) +// TODO: Could support VK_NV_geometry_shader_passthrough, though the hardware that supports +// it is already pretty fast at geometry shaders.. + + bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLanguageDesc &compat, const Draw::Bugs bugs, std::string *errorString) { std::vector gl_exts; if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) { @@ -46,6 +50,13 @@ bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLangu p.C("layout(triangles) in;\n"); p.C("layout(triangle_strip, max_vertices = 3) out;\n"); + if (compat.shaderLanguage == GLSL_VULKAN) { + WRITE(p, "\n"); + WRITE(p, "layout (std140, set = 0, binding = 3) uniform baseVars {\n%s};\n", ub_baseStr); + } else if (compat.shaderLanguage == HLSL_D3D11) { + WRITE(p, "cbuffer base : register(b0) {\n%s};\n", ub_baseStr); + } + std::vector varyings, outVaryings; if (id.Bit(GS_BIT_DO_TEXTURE)) { @@ -63,13 +74,52 @@ bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLangu p.BeginGSMain(varyings, outVaryings); - p.C(" for (int i = 0; i < gl_in.length(); i++) {\n"); - p.C(" gl_Position = gl_in[i].gl_Position;\n"); // copy attributes + // Apply culling + p.C(" bool anyInside = false;\n"); // TODO: 3 or gl_in.length()? which will be faster? + + p.C(" for (int i = 0; i < 3; i++) {\n"); // TODO: 3 or gl_in.length()? which will be faster? + p.C(" vec4 outPos = gl_in[i].gl_Position;\n"); + p.C(" vec3 projPos = outPos.xyz / outPos.w;\n"); + p.C(" float projZ = (projPos.z - u_depthRange.z) * u_depthRange.w;\n"); + // Vertex range culling doesn't happen when Z clips, note sign of w is important. + p.C(" if (u_cullRangeMin.w <= 0.0 || projZ * outPos.w > -outPos.w) {\n"); + const char *outMin = "projPos.x < u_cullRangeMin.x || projPos.y < u_cullRangeMin.y"; + const char *outMax = "projPos.x > u_cullRangeMax.x || projPos.y > u_cullRangeMax.y"; + p.F(" if ((%s) || (%s)) {\n", outMin, outMax); + p.C(" return;\n"); // Cull! + p.C(" }\n"); + p.C(" }\n"); + p.C(" if (u_cullRangeMin.w <= 0.0) {\n"); + p.C(" if (projPos.z < u_cullRangeMin.z || projPos.z > u_cullRangeMax.z) {\n"); + p.C(" return;\n"); // Cull! + p.C(" }\n"); + p.C(" } else {\n"); + p.C(" if (projPos.z >= u_cullRangeMin.z && projPos.z <= u_cullRangeMax.z) { anyInside = true; }\n"); + p.C(" }\n"); + p.C(" } // for\n"); + + // Cull any triangle fully outside in the same direction when depth clamp enabled. + // Basically simulate cull distances. + p.C(" if (!anyInside) { return; }\n"); + + const char *clip0 = compat.shaderLanguage == HLSL_D3D11 ? "" : "[0]"; + + p.C(" for (int i = 0; i < 3; i++) {\n"); // TODO: 3 or gl_in.length()? which will be faster? + p.C(" vec4 outPos = gl_in[i].gl_Position;\n"); + p.C(" gl_Position = outPos;\n"); + p.C(" vec3 projPos = outPos.xyz / outPos.w;\n"); + p.C(" float projZ = (projPos.z - u_depthRange.z) * u_depthRange.w;\n"); + // TODO: Not rectangles... + // TODO: Check feature flag. + //p.F(" gl_ClipDistance%s = projZ * outPos.w + outPos.w;\n", clip0); + for (size_t i = 0; i < varyings.size(); i++) { VaryingDef &in = varyings[i]; VaryingDef &out = outVaryings[i]; p.F(" %s = %s[i];\n", outVaryings[i].name, varyings[i].name); } + // Debug - null the red channel + p.C(" if (i == 0) v_color0Out.x = 0.0;\n"); p.C(" EmitVertex();\n"); p.C(" }\n"); diff --git a/GPU/Common/VertexShaderGenerator.cpp b/GPU/Common/VertexShaderGenerator.cpp index 797a6e0e50fd..d6f356c6d4dd 100644 --- a/GPU/Common/VertexShaderGenerator.cpp +++ b/GPU/Common/VertexShaderGenerator.cpp @@ -1299,7 +1299,7 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag WRITE(p, " if (u_cullRangeMin.w <= 0.0 || projZ * outPos.w > -outPos.w) {\n"); const char *outMin = "projPos.x < u_cullRangeMin.x || projPos.y < u_cullRangeMin.y"; const char *outMax = "projPos.x > u_cullRangeMax.x || projPos.y > u_cullRangeMax.y"; - WRITE(p, " if (%s || %s) {\n", outMin, outMax); + WRITE(p, " if ((%s) || (%s)) {\n", outMin, outMax); WRITE(p, " outPos.xyzw = u_cullRangeMax.wwww;\n"); WRITE(p, " }\n"); WRITE(p, " }\n"); diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index 79a2db00d63c..67ed9ae2995e 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -232,6 +232,11 @@ u32 GPU_Vulkan::CheckGPUFeatures() const { features |= GPU_SUPPORTS_DEPTH_CLAMP; } + // Force geo shader culling for debugging. +#if 1 + features |= GPU_SUPPORTS_GS_CULLING; + features &= ~GPU_SUPPORTS_VS_RANGE_CULLING; +#else if (!draw_->GetBugs().Has(Draw::Bugs::BROKEN_NAN_IN_CONDITIONAL)) { const bool disabled = PSP_CoreParameter().compat.flags().DisableRangeCulling; // Fall back to geometry shader culling. @@ -239,6 +244,7 @@ u32 GPU_Vulkan::CheckGPUFeatures() const { features |= GPU_SUPPORTS_GS_CULLING; } } +#endif // These are VULKAN_4444_FORMAT and friends. uint32_t fmt4444 = draw_->GetDataFormatSupport(Draw::DataFormat::B4G4R4A4_UNORM_PACK16); From bfaa304461e99b1b088db08ecec4f78d10f6adc2 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Wed, 27 Oct 2021 21:22:45 -0700 Subject: [PATCH 07/12] Vulkan: Correct geometry shader culling. --- GPU/Common/GeometryShaderGenerator.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/GPU/Common/GeometryShaderGenerator.cpp b/GPU/Common/GeometryShaderGenerator.cpp index 5ec7875e5cd9..2c2906d10065 100644 --- a/GPU/Common/GeometryShaderGenerator.cpp +++ b/GPU/Common/GeometryShaderGenerator.cpp @@ -75,7 +75,7 @@ bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLangu p.BeginGSMain(varyings, outVaryings); // Apply culling - p.C(" bool anyInside = false;\n"); // TODO: 3 or gl_in.length()? which will be faster? + p.C(" bool anyInside = false;\n"); p.C(" for (int i = 0; i < 3; i++) {\n"); // TODO: 3 or gl_in.length()? which will be faster? p.C(" vec4 outPos = gl_in[i].gl_Position;\n"); @@ -91,16 +91,20 @@ bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLangu p.C(" }\n"); p.C(" if (u_cullRangeMin.w <= 0.0) {\n"); p.C(" if (projPos.z < u_cullRangeMin.z || projPos.z > u_cullRangeMax.z) {\n"); - p.C(" return;\n"); // Cull! + // When not clamping depth, cull the triangle of Z is outside the valid range (not based on clip Z.) + p.C(" return;\n"); p.C(" }\n"); p.C(" } else {\n"); - p.C(" if (projPos.z >= u_cullRangeMin.z && projPos.z <= u_cullRangeMax.z) { anyInside = true; }\n"); + p.C(" if (projPos.z >= u_cullRangeMin.z) { anyInside = true; }\n"); + p.C(" if (projPos.z <= u_cullRangeMax.z) { anyInside = true; }\n"); p.C(" }\n"); p.C(" } // for\n"); // Cull any triangle fully outside in the same direction when depth clamp enabled. // Basically simulate cull distances. - p.C(" if (!anyInside) { return; }\n"); + p.C(" if (u_cullRangeMin.w > 0.0 && !anyInside) {\n"); + p.C(" return;\n"); + p.C(" }\n"); const char *clip0 = compat.shaderLanguage == HLSL_D3D11 ? "" : "[0]"; @@ -119,7 +123,7 @@ bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLangu p.F(" %s = %s[i];\n", outVaryings[i].name, varyings[i].name); } // Debug - null the red channel - p.C(" if (i == 0) v_color0Out.x = 0.0;\n"); + //p.C(" if (i == 0) v_color0Out.x = 0.0;\n"); p.C(" EmitVertex();\n"); p.C(" }\n"); From 2ce0cda333c945de63ad32179026b42fc9307dc8 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 1 Oct 2022 20:39:22 -0700 Subject: [PATCH 08/12] Vulkan: Enable geo shader for culling. The compat setting was really for some previously buggy cases that couldn't work without cull. --- GPU/Vulkan/GPU_Vulkan.cpp | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index 67ed9ae2995e..6376fc062089 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -232,19 +232,15 @@ u32 GPU_Vulkan::CheckGPUFeatures() const { features |= GPU_SUPPORTS_DEPTH_CLAMP; } - // Force geo shader culling for debugging. -#if 1 - features |= GPU_SUPPORTS_GS_CULLING; - features &= ~GPU_SUPPORTS_VS_RANGE_CULLING; -#else - if (!draw_->GetBugs().Has(Draw::Bugs::BROKEN_NAN_IN_CONDITIONAL)) { - const bool disabled = PSP_CoreParameter().compat.flags().DisableRangeCulling; - // Fall back to geometry shader culling. - if (enabledFeatures.geometryShader && !disabled && (features & GPU_SUPPORTS_VS_RANGE_CULLING) == 0) { + // Fall back to geometry shader culling if we can't do vertex range culling. + if (enabledFeatures.geometryShader) { + const bool vertexSupported = draw_->GetDeviceCaps().clipDistanceSupported && draw_->GetDeviceCaps().cullDistanceSupported; + if (!vertexSupported || (features & GPU_SUPPORTS_VS_RANGE_CULLING) == 0) { + // Switch to culling via the geometry shader if not fully supported in vertex. features |= GPU_SUPPORTS_GS_CULLING; + features &= ~GPU_SUPPORTS_VS_RANGE_CULLING; } } -#endif // These are VULKAN_4444_FORMAT and friends. uint32_t fmt4444 = draw_->GetDataFormatSupport(Draw::DataFormat::B4G4R4A4_UNORM_PACK16); From 36eb0d9ad5994fe4e4ad9f02e4732efc397fc631 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 1 Oct 2022 20:42:00 -0700 Subject: [PATCH 09/12] Vulkan: Use geo clip distance only where supported. It might be supported without cull or GS. Otherwise we may need to clip the triangles manually. --- GPU/Common/GeometryShaderGenerator.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/GPU/Common/GeometryShaderGenerator.cpp b/GPU/Common/GeometryShaderGenerator.cpp index 2c2906d10065..0121d7748a0a 100644 --- a/GPU/Common/GeometryShaderGenerator.cpp +++ b/GPU/Common/GeometryShaderGenerator.cpp @@ -114,8 +114,9 @@ bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLangu p.C(" vec3 projPos = outPos.xyz / outPos.w;\n"); p.C(" float projZ = (projPos.z - u_depthRange.z) * u_depthRange.w;\n"); // TODO: Not rectangles... - // TODO: Check feature flag. - //p.F(" gl_ClipDistance%s = projZ * outPos.w + outPos.w;\n", clip0); + if (gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) { + p.F(" gl_ClipDistance%s = projZ * outPos.w + outPos.w;\n", clip0); + } for (size_t i = 0; i < varyings.size(); i++) { VaryingDef &in = varyings[i]; From 8df956b03639acbaf0e14b483295cdb7c23cac05 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 2 Oct 2022 07:32:31 -0700 Subject: [PATCH 10/12] Vulkan: Block geometry shaders on older Mali. They're too slow to be usable. --- Common/GPU/Vulkan/thin3d_vulkan.cpp | 5 +++++ Common/GPU/thin3d.h | 3 +++ GPU/Vulkan/GPU_Vulkan.cpp | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/Common/GPU/Vulkan/thin3d_vulkan.cpp b/Common/GPU/Vulkan/thin3d_vulkan.cpp index 413057b28846..06e026d35cda 100644 --- a/Common/GPU/Vulkan/thin3d_vulkan.cpp +++ b/Common/GPU/Vulkan/thin3d_vulkan.cpp @@ -846,6 +846,11 @@ VKContext::VKContext(VulkanContext *vulkan) if (majorVersion >= 32) { bugs_.Infest(Bugs::MALI_CONSTANT_LOAD_BUG); // See issue #15661 } + + // Older ARM devices have very slow geometry shaders, not worth using. At least before 15. + if (majorVersion <= 15) { + bugs_.Infest(Bugs::GEOMETRY_SHADERS_SLOW); + } } // Limited, through input attachments and self-dependencies. diff --git a/Common/GPU/thin3d.h b/Common/GPU/thin3d.h index f0c562b2f349..c04b7119cd25 100644 --- a/Common/GPU/thin3d.h +++ b/Common/GPU/thin3d.h @@ -333,11 +333,14 @@ class Bugs { RASPBERRY_SHADER_COMP_HANG = 8, MALI_CONSTANT_LOAD_BUG = 9, SUBPASS_FEEDBACK_BROKEN = 10, + GEOMETRY_SHADERS_SLOW = 11, MAX_BUG, }; protected: uint32_t flags_ = 0; + + static_assert(sizeof(flags_) * 8 > MAX_BUG, "Ran out of space for bugs."); }; class RefCountedObject { diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index 6376fc062089..6d91a3b20ec0 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -233,7 +233,7 @@ u32 GPU_Vulkan::CheckGPUFeatures() const { } // Fall back to geometry shader culling if we can't do vertex range culling. - if (enabledFeatures.geometryShader) { + if (enabledFeatures.geometryShader && !draw_->GetBugs().Has(Draw::Bugs::GEOMETRY_SHADERS_SLOW)) { const bool vertexSupported = draw_->GetDeviceCaps().clipDistanceSupported && draw_->GetDeviceCaps().cullDistanceSupported; if (!vertexSupported || (features & GPU_SUPPORTS_VS_RANGE_CULLING) == 0) { // Switch to culling via the geometry shader if not fully supported in vertex. From 2832edcc3791f05227e4fa534aaab7e69021d14c Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 2 Oct 2022 07:41:36 -0700 Subject: [PATCH 11/12] Vulkan: Allow configuring geometry shaders on/off. --- Core/Config.cpp | 1 + Core/Config.h | 1 + GPU/Vulkan/GPU_Vulkan.cpp | 5 +++-- UI/GameSettingsScreen.cpp | 9 +++++++++ 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/Core/Config.cpp b/Core/Config.cpp index 45e396c9b76b..dee26c37937a 100644 --- a/Core/Config.cpp +++ b/Core/Config.cpp @@ -874,6 +874,7 @@ static ConfigSetting graphicsSettings[] = { #endif ConfigSetting("CameraDevice", &g_Config.sCameraDevice, "", true, false), ConfigSetting("VendorBugChecksEnabled", &g_Config.bVendorBugChecksEnabled, true, false, false), + ConfigSetting("UseGeometryShader", &g_Config.bUseGeometryShader, true, true, true), ReportedConfigSetting("RenderingMode", &g_Config.iRenderingMode, 1, true, true), ConfigSetting("SoftwareRenderer", &g_Config.bSoftwareRendering, false, true, true), ConfigSetting("SoftwareRendererJit", &g_Config.bSoftwareRenderingJit, true, true, true), diff --git a/Core/Config.h b/Core/Config.h index 8a7f519621f7..65eaf74099bf 100644 --- a/Core/Config.h +++ b/Core/Config.h @@ -164,6 +164,7 @@ struct Config { bool bHardwareTransform; // only used in the GLES backend bool bSoftwareSkinning; // may speed up some games bool bVendorBugChecksEnabled; + bool bUseGeometryShader; int iRenderingMode; // 0 = non-buffered rendering 1 = buffered rendering int iTexFiltering; // 1 = auto , 2 = nearest , 3 = linear , 4 = auto max quality diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index 6d91a3b20ec0..f285af82d0b5 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -233,9 +233,10 @@ u32 GPU_Vulkan::CheckGPUFeatures() const { } // Fall back to geometry shader culling if we can't do vertex range culling. - if (enabledFeatures.geometryShader && !draw_->GetBugs().Has(Draw::Bugs::GEOMETRY_SHADERS_SLOW)) { + if (enabledFeatures.geometryShader) { + const bool useGeometry = g_Config.bUseGeometryShader && !draw_->GetBugs().Has(Draw::Bugs::GEOMETRY_SHADERS_SLOW); const bool vertexSupported = draw_->GetDeviceCaps().clipDistanceSupported && draw_->GetDeviceCaps().cullDistanceSupported; - if (!vertexSupported || (features & GPU_SUPPORTS_VS_RANGE_CULLING) == 0) { + if (useGeometry && (!vertexSupported || (features & GPU_SUPPORTS_VS_RANGE_CULLING) == 0)) { // Switch to culling via the geometry shader if not fully supported in vertex. features |= GPU_SUPPORTS_GS_CULLING; features &= ~GPU_SUPPORTS_VS_RANGE_CULLING; diff --git a/UI/GameSettingsScreen.cpp b/UI/GameSettingsScreen.cpp index 0c1c0d009eee..876db2773ef7 100644 --- a/UI/GameSettingsScreen.cpp +++ b/UI/GameSettingsScreen.cpp @@ -473,6 +473,15 @@ void GameSettingsScreen::CreateViews() { inflightChoice->OnChoice.Handle(this, &GameSettingsScreen::OnInflightFramesChoice); } + if (GetGPUBackend() == GPUBackend::VULKAN) { + const bool usable = !draw->GetBugs().Has(Draw::Bugs::GEOMETRY_SHADERS_SLOW); + const bool vertexSupported = draw->GetDeviceCaps().clipDistanceSupported && draw->GetDeviceCaps().cullDistanceSupported; + if (usable && !vertexSupported) { + CheckBox *geometryCulling = graphicsSettings->Add(new CheckBox(&g_Config.bUseGeometryShader, gr->T("Geometry shader culling"))); + geometryCulling->SetDisabledPtr(&g_Config.bSoftwareRendering); + } + } + if (deviceType != DEVICE_TYPE_VR) { CheckBox *hwTransform = graphicsSettings->Add(new CheckBox(&g_Config.bHardwareTransform, gr->T("Hardware Transform"))); hwTransform->SetDisabledPtr(&g_Config.bSoftwareRendering); From 4df7a8f3578a77f029879e25960686fdeaad96b3 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 2 Oct 2022 07:43:35 -0700 Subject: [PATCH 12/12] Vulkan: Cleanup unused geometry shader vars. Without clipping, these aren't used (but could be in the future with manual clipping.) --- GPU/Common/GeometryShaderGenerator.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/Common/GeometryShaderGenerator.cpp b/GPU/Common/GeometryShaderGenerator.cpp index 0121d7748a0a..b701bb547c73 100644 --- a/GPU/Common/GeometryShaderGenerator.cpp +++ b/GPU/Common/GeometryShaderGenerator.cpp @@ -111,10 +111,10 @@ bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLangu p.C(" for (int i = 0; i < 3; i++) {\n"); // TODO: 3 or gl_in.length()? which will be faster? p.C(" vec4 outPos = gl_in[i].gl_Position;\n"); p.C(" gl_Position = outPos;\n"); - p.C(" vec3 projPos = outPos.xyz / outPos.w;\n"); - p.C(" float projZ = (projPos.z - u_depthRange.z) * u_depthRange.w;\n"); // TODO: Not rectangles... if (gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) { + p.C(" vec3 projPos = outPos.xyz / outPos.w;\n"); + p.C(" float projZ = (projPos.z - u_depthRange.z) * u_depthRange.w;\n"); p.F(" gl_ClipDistance%s = projZ * outPos.w + outPos.w;\n", clip0); }