From 025af5f0f7f4de7a67d9269c217916daed11a38e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sat, 19 Feb 2022 20:40:27 +0100 Subject: [PATCH] Use subpass dependencies to implement shader framebuffer read in Vulkan. --- Common/GPU/Vulkan/VulkanBarrier.cpp | 2 +- Common/GPU/Vulkan/VulkanBarrier.h | 2 + Common/GPU/Vulkan/VulkanQueueRunner.cpp | 91 ++++++++++++++++++++--- Common/GPU/Vulkan/VulkanQueueRunner.h | 20 +++-- Common/GPU/Vulkan/VulkanRenderManager.cpp | 12 ++- Common/GPU/Vulkan/VulkanRenderManager.h | 4 +- Common/GPU/Vulkan/thin3d_vulkan.cpp | 62 ++++++++------- Common/GPU/thin3d.h | 4 + GPU/Common/DrawEngineCommon.cpp | 6 +- GPU/Common/DrawEngineCommon.h | 8 +- GPU/Common/FragmentShaderGenerator.cpp | 31 +++++--- GPU/Common/FragmentShaderGenerator.h | 2 +- GPU/D3D11/StateMappingD3D11.cpp | 9 ++- GPU/Directx9/DrawEngineDX9.h | 2 + GPU/Directx9/StateMappingDX9.cpp | 13 ++-- GPU/GLES/StateMappingGLES.cpp | 10 ++- GPU/GPUState.h | 3 +- GPU/Vulkan/DrawEngineVulkan.cpp | 20 +++-- GPU/Vulkan/DrawEngineVulkan.h | 3 +- GPU/Vulkan/FramebufferManagerVulkan.h | 2 +- GPU/Vulkan/GPU_Vulkan.cpp | 3 + GPU/Vulkan/PipelineManagerVulkan.cpp | 17 +++-- GPU/Vulkan/PipelineManagerVulkan.h | 9 ++- GPU/Vulkan/StateMappingVulkan.cpp | 10 ++- 24 files changed, 248 insertions(+), 97 deletions(-) diff --git a/Common/GPU/Vulkan/VulkanBarrier.cpp b/Common/GPU/Vulkan/VulkanBarrier.cpp index 125d51ea6de9..e4f2d0908933 100644 --- a/Common/GPU/Vulkan/VulkanBarrier.cpp +++ b/Common/GPU/Vulkan/VulkanBarrier.cpp @@ -4,7 +4,7 @@ void VulkanBarrier::Flush(VkCommandBuffer cmd) { if (!imageBarriers_.empty()) { - vkCmdPipelineBarrier(cmd, srcStageMask_, dstStageMask_, 0, 0, nullptr, 0, nullptr, (uint32_t)imageBarriers_.size(), imageBarriers_.data()); + vkCmdPipelineBarrier(cmd, srcStageMask_, dstStageMask_, dependencyFlags_, 0, nullptr, 0, nullptr, (uint32_t)imageBarriers_.size(), imageBarriers_.data()); } imageBarriers_.clear(); srcStageMask_ = 0; diff --git a/Common/GPU/Vulkan/VulkanBarrier.h b/Common/GPU/Vulkan/VulkanBarrier.h index eb949dd2f047..0d5754b3f4f4 100644 --- a/Common/GPU/Vulkan/VulkanBarrier.h +++ b/Common/GPU/Vulkan/VulkanBarrier.h @@ -21,6 +21,7 @@ class VulkanBarrier { ) { srcStageMask_ |= srcStageMask; dstStageMask_ |= dstStageMask; + dependencyFlags_ |= VK_DEPENDENCY_BY_REGION_BIT; VkImageMemoryBarrier imageBarrier; imageBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; @@ -112,4 +113,5 @@ class VulkanBarrier { VkPipelineStageFlags srcStageMask_ = 0; VkPipelineStageFlags dstStageMask_ = 0; std::vector imageBarriers_; + VkDependencyFlags dependencyFlags_ = 0; }; diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.cpp b/Common/GPU/Vulkan/VulkanQueueRunner.cpp index 10a88e623a80..a3278808860d 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.cpp +++ b/Common/GPU/Vulkan/VulkanQueueRunner.cpp @@ -34,8 +34,11 @@ RenderPassType MergeRPTypes(RenderPassType a, RenderPassType b) { if (a == b) { // Trivial merging case. return a; + } else if (a == RP_TYPE_COLOR_DEPTH && b == RP_TYPE_COLOR_DEPTH_INPUT) { + return RP_TYPE_COLOR_DEPTH_INPUT; + } else if (a == RP_TYPE_COLOR_DEPTH_INPUT && b == RP_TYPE_COLOR_DEPTH) { + return RP_TYPE_COLOR_DEPTH_INPUT; } - // More cases to be added later. return a; } @@ -155,7 +158,12 @@ static VkAttachmentStoreOp ConvertStoreAction(VKRRenderPassStoreAction action) { return VK_ATTACHMENT_STORE_OP_DONT_CARE; // avoid compiler warning } +// Self-dependency: https://github.com/gpuweb/gpuweb/issues/442#issuecomment-547604827 +// Also see https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-pipeline-barriers-subpass-self-dependencies + VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rpType) { + bool selfDependency = rpType == RP_TYPE_COLOR_DEPTH_INPUT; + VkAttachmentDescription attachments[2] = {}; attachments[0].format = rpType == RP_TYPE_BACKBUFFER ? vulkan->GetSwapchainFormat() : VK_FORMAT_R8G8B8A8_UNORM; attachments[0].samples = VK_SAMPLE_COUNT_1_BIT; @@ -179,7 +187,7 @@ VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rp VkAttachmentReference color_reference{}; color_reference.attachment = 0; - color_reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + color_reference.layout = selfDependency ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; VkAttachmentReference depth_reference{}; depth_reference.attachment = 1; @@ -188,8 +196,13 @@ VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rp VkSubpassDescription subpass{}; subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; subpass.flags = 0; - subpass.inputAttachmentCount = 0; - subpass.pInputAttachments = nullptr; + if (selfDependency) { + subpass.inputAttachmentCount = 1; + subpass.pInputAttachments = &color_reference; + } else { + subpass.inputAttachmentCount = 0; + subpass.pInputAttachments = nullptr; + } subpass.colorAttachmentCount = 1; subpass.pColorAttachments = &color_reference; subpass.pResolveAttachments = nullptr; @@ -198,22 +211,40 @@ VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rp subpass.pPreserveAttachments = nullptr; // Not sure if this is really necessary. - VkSubpassDependency dep{}; - dep.srcSubpass = VK_SUBPASS_EXTERNAL; - dep.dstSubpass = 0; - dep.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - dep.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - dep.srcAccessMask = 0; - dep.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + VkSubpassDependency deps[2]{}; + size_t numDeps = 0; VkRenderPassCreateInfo rp{ VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO }; rp.attachmentCount = 2; rp.pAttachments = attachments; rp.subpassCount = 1; rp.pSubpasses = &subpass; + if (rpType == RP_TYPE_BACKBUFFER) { + deps[numDeps].srcSubpass = VK_SUBPASS_EXTERNAL; + deps[numDeps].dstSubpass = 0; + deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + deps[numDeps].dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + deps[numDeps].srcAccessMask = 0; + deps[numDeps].dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + numDeps++; rp.dependencyCount = 1; - rp.pDependencies = &dep; + } + + if (selfDependency) { + deps[numDeps].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT; + deps[numDeps].srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + deps[numDeps].dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; + deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + deps[numDeps].dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + deps[numDeps].srcSubpass = 0; + deps[numDeps].dstSubpass = 0; + numDeps++; + } + + if (numDeps > 0) { + rp.dependencyCount = (u32)numDeps; + rp.pDependencies = deps; } VkRenderPass pass; @@ -246,6 +277,30 @@ VKRRenderPass *VulkanQueueRunner::GetRenderPass(const RPKey &key) { return pass; } +// Must match the subpass self-dependency declared above. +void VulkanQueueRunner::SelfDependencyBarrier(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier) { + if (aspect & VK_IMAGE_ASPECT_COLOR_BIT) { + VkAccessFlags srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + VkAccessFlags dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; + VkPipelineStageFlags srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + VkPipelineStageFlags dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + recordBarrier->TransitionImage( + img.image, + 0, + 1, + aspect, + VK_IMAGE_LAYOUT_GENERAL, + VK_IMAGE_LAYOUT_GENERAL, + srcAccessMask, + dstAccessMask, + srcStageMask, + dstStageMask + ); + } else { + _assert_msg_(false, "Depth self-dependencies not yet supported"); + } +} + void VulkanQueueRunner::PreprocessSteps(std::vector &steps) { // Optimizes renderpasses, then sequences them. // Planned optimizations: @@ -817,6 +872,9 @@ void VulkanQueueRunner::LogRenderPass(const VKRStep &pass, bool verbose) { case VKRRenderCommand::REMOVED: INFO_LOG(G3D, " (Removed)"); break; + case VKRRenderCommand::SELF_DEPENDENCY_BARRIER: + INFO_LOG(G3D, " SelfBarrier()"); + break; case VKRRenderCommand::BIND_GRAPHICS_PIPELINE: INFO_LOG(G3D, " BindGraphicsPipeline(%x)", (int)(intptr_t)cmd.graphics_pipeline.pipeline); break; @@ -1235,6 +1293,15 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c break; } + case VKRRenderCommand::SELF_DEPENDENCY_BARRIER: + { + _assert_(step.render.pipelineFlags & PipelineFlags::USES_INPUT_ATTACHMENT); + VulkanBarrier barrier; + SelfDependencyBarrier(step.render.framebuffer->color, VK_IMAGE_ASPECT_COLOR_BIT, &barrier); + barrier.Flush(cmd); + break; + } + case VKRRenderCommand::PUSH_CONSTANTS: vkCmdPushConstants(cmd, pipelineLayout, c.push.stages, c.push.offset, c.push.size, c.push.data); break; diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.h b/Common/GPU/Vulkan/VulkanQueueRunner.h index adb9e7d96e99..8f0533331a40 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.h +++ b/Common/GPU/Vulkan/VulkanQueueRunner.h @@ -20,7 +20,6 @@ struct VKRImage; enum { QUEUE_HACK_MGS2_ACID = 1, QUEUE_HACK_SONIC = 2, - // Killzone PR = 4. QUEUE_HACK_RENDERPASS_MERGE = 8, }; @@ -36,20 +35,24 @@ enum class VKRRenderCommand : uint8_t { DRAW, DRAW_INDEXED, PUSH_CONSTANTS, + SELF_DEPENDENCY_BARRIER, NUM_RENDER_COMMANDS, }; -enum PipelineFlags { - PIPELINE_FLAG_NONE = 0, - PIPELINE_FLAG_USES_LINES = (1 << 2), - PIPELINE_FLAG_USES_BLEND_CONSTANT = (1 << 3), - PIPELINE_FLAG_USES_DEPTH_STENCIL = (1 << 4), // Reads or writes the depth buffer. +enum class PipelineFlags { + NONE = 0, + USES_LINES = (1 << 2), + USES_BLEND_CONSTANT = (1 << 3), + USES_DEPTH_STENCIL = (1 << 4), // Reads or writes the depth buffer. + USES_INPUT_ATTACHMENT = (1 << 5), }; +ENUM_CLASS_BITOPS(PipelineFlags); // Pipelines need to be created for the right type of render pass. enum RenderPassType { RP_TYPE_BACKBUFFER, RP_TYPE_COLOR_DEPTH, + RP_TYPE_COLOR_DEPTH_INPUT, // Later will add pure-color render passes. RP_TYPE_COUNT, }; @@ -168,7 +171,6 @@ struct VKRStep { union { struct { VKRFramebuffer *framebuffer; - // TODO: Look these up through renderPass? VKRRenderPassLoadAction colorLoad; VKRRenderPassLoadAction depthLoad; VKRRenderPassLoadAction stencilLoad; @@ -183,7 +185,7 @@ struct VKRStep { int numReads; VkImageLayout finalColorLayout; VkImageLayout finalDepthStencilLayout; - u32 pipelineFlags; + PipelineFlags pipelineFlags; // contains the self dependency flag, in the form of USES_INPUT_ATTACHMENT VkRect2D renderArea; // Render pass type. Deduced after finishing recording the pass, from the used pipelines. // NOTE: Storing the render pass here doesn't do much good, we change the compatible parameters (load/store ops) during step optimization. @@ -324,6 +326,8 @@ class VulkanQueueRunner { static void SetupTransitionToTransferSrc(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier); static void SetupTransitionToTransferDst(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier); + static void SelfDependencyBarrier(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier); + VulkanContext *vulkan_; VkFramebuffer backbuffer_ = VK_NULL_HANDLE; diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp index 52f0b8fb27d3..577395f86c10 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.cpp +++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp @@ -221,7 +221,7 @@ void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int // Strictly speaking we don't yet need VK_IMAGE_USAGE_SAMPLED_BIT for depth buffers since we do not yet sample depth buffers. ici.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT; if (color) { - ici.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + ici.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; } else { ici.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; } @@ -767,6 +767,9 @@ void VulkanRenderManager::EndCurRenderStep() { curRenderStep_->render.pipelineFlags = curPipelineFlags_; if (!curRenderStep_->render.framebuffer) { rpType = RP_TYPE_BACKBUFFER; + } else if (curPipelineFlags_ & PipelineFlags::USES_INPUT_ATTACHMENT) { + // Not allowed on backbuffers. + rpType = RP_TYPE_COLOR_DEPTH_INPUT; } VKRRenderPass *renderPass = queueRunner_.GetRenderPass(key); @@ -796,7 +799,12 @@ void VulkanRenderManager::EndCurRenderStep() { // We no longer have a current render step. curRenderStep_ = nullptr; - curPipelineFlags_ = 0; + curPipelineFlags_ = (PipelineFlags)0; +} + +void VulkanRenderManager::BindCurrentFramebufferAsInputAttachment0(VkImageAspectFlags aspectBits) { + _dbg_assert_(curRenderStep_); + curRenderStep_->commands.push_back(VkRenderData{ VKRRenderCommand::SELF_DEPENDENCY_BARRIER }); } void VulkanRenderManager::BindFramebufferAsRenderTarget(VKRFramebuffer *fb, VKRRenderPassLoadAction color, VKRRenderPassLoadAction depth, VKRRenderPassLoadAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) { diff --git a/Common/GPU/Vulkan/VulkanRenderManager.h b/Common/GPU/Vulkan/VulkanRenderManager.h index 4cc7aafdfccc..b29b8bdcbd6f 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.h +++ b/Common/GPU/Vulkan/VulkanRenderManager.h @@ -236,6 +236,8 @@ class VulkanRenderManager { // as the other backends, even though there's no actual binding happening here. VkImageView BindFramebufferAsTexture(VKRFramebuffer *fb, int binding, VkImageAspectFlags aspectBits, int attachment); + void BindCurrentFramebufferAsInputAttachment0(VkImageAspectFlags aspectBits); + bool CopyFramebufferToMemorySync(VKRFramebuffer *src, VkImageAspectFlags aspectBits, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag); void CopyImageToMemorySync(VkImage image, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag); @@ -544,7 +546,7 @@ class VulkanRenderManager { VKRStep *curRenderStep_ = nullptr; bool curStepHasViewport_ = false; bool curStepHasScissor_ = false; - u32 curPipelineFlags_ = 0; + PipelineFlags curPipelineFlags_{}; BoundingRect curRenderArea_; std::vector steps_; diff --git a/Common/GPU/Vulkan/thin3d_vulkan.cpp b/Common/GPU/Vulkan/thin3d_vulkan.cpp index bb206fd1cb62..c7c2840ae719 100644 --- a/Common/GPU/Vulkan/thin3d_vulkan.cpp +++ b/Common/GPU/Vulkan/thin3d_vulkan.cpp @@ -401,9 +401,10 @@ class VKContext : public DrawContext { // These functions should be self explanatory. void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp, const char *tag) override; Framebuffer *GetCurrentRenderTarget() override { - return curFramebuffer_; + return (Framebuffer *)curFramebuffer_.ptr; } void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) override; + void BindCurrentFramebufferForColorInput() override; void GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) override; @@ -472,27 +473,7 @@ class VKContext : public DrawContext { std::vector GetFeatureList() const override; std::vector GetExtensionList() const override; - uint64_t GetNativeObject(NativeObject obj, void *srcObject) override { - switch (obj) { - case NativeObject::CONTEXT: - return (uint64_t)vulkan_; - case NativeObject::INIT_COMMANDBUFFER: - return (uint64_t)renderManager_.GetInitCmd(); - case NativeObject::BOUND_TEXTURE0_IMAGEVIEW: - return (uint64_t)boundImageView_[0]; - case NativeObject::BOUND_TEXTURE1_IMAGEVIEW: - return (uint64_t)boundImageView_[1]; - case NativeObject::RENDER_MANAGER: - return (uint64_t)(uintptr_t)&renderManager_; - case NativeObject::NULL_IMAGEVIEW: - return (uint64_t)GetNullTexture()->GetImageView(); - case NativeObject::TEXTURE_VIEW: - return (uint64_t)(((VKTexture *)srcObject)->GetImageView()); - default: - Crash(); - return 0; - } - } + uint64_t GetNativeObject(NativeObject obj, void *srcObject) override; void HandleEvent(Event ev, int width, int height, void *param1, void *param2) override; @@ -521,7 +502,7 @@ class VKContext : public DrawContext { VkDescriptorSetLayout descriptorSetLayout_ = VK_NULL_HANDLE; VkPipelineLayout pipelineLayout_ = VK_NULL_HANDLE; VkPipelineCache pipelineCache_ = VK_NULL_HANDLE; - AutoRef curFramebuffer_; + AutoRef curFramebuffer_; VkDevice device_; VkQueue queue_; @@ -799,6 +780,7 @@ VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit) caps_.textureNPOTFullySupported = true; caps_.fragmentShaderDepthWriteSupported = true; caps_.logicOpSupported = vulkan->GetDeviceFeatures().enabled.logicOp != 0; + caps_.framebufferFetchSupported = true; // Limited, through input attachments and self-dependencies. auto deviceProps = vulkan->GetPhysicalDeviceProperties(vulkan_->GetCurrentPhysicalDeviceIndex()).properties; switch (deviceProps.vendorID) { @@ -1049,12 +1031,12 @@ Pipeline *VKContext::CreateGraphicsPipeline(const PipelineDesc &desc, const char VKDepthStencilState *depth = (VKDepthStencilState *)desc.depthStencil; VKRasterState *raster = (VKRasterState *)desc.raster; - u32 pipelineFlags = 0; + PipelineFlags pipelineFlags = (PipelineFlags)0; if (depth->info.depthTestEnable || depth->info.stencilTestEnable) { - pipelineFlags |= PIPELINE_FLAG_USES_DEPTH_STENCIL; + pipelineFlags |= PipelineFlags::USES_DEPTH_STENCIL; } - VKPipeline *pipeline = new VKPipeline(vulkan_, desc.uniformDesc ? desc.uniformDesc->uniformBufferSize : 16 * sizeof(float), (PipelineFlags)pipelineFlags, tag); + VKPipeline *pipeline = new VKPipeline(vulkan_, desc.uniformDesc ? desc.uniformDesc->uniformBufferSize : 16 * sizeof(float), pipelineFlags, tag); VKRGraphicsPipelineDesc &gDesc = pipeline->vkrDesc; @@ -1570,6 +1552,10 @@ void VKContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChanne boundImageView_[binding] = renderManager_.BindFramebufferAsTexture(fb->GetFB(), binding, aspect, attachment); } +void VKContext::BindCurrentFramebufferForColorInput() { + renderManager_.BindCurrentFramebufferAsInputAttachment0(VK_IMAGE_ASPECT_COLOR_BIT); +} + void VKContext::GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) { VKFramebuffer *fb = (VKFramebuffer *)fbo; if (fb) { @@ -1610,4 +1596,28 @@ void VKContext::InvalidateFramebuffer(FBInvalidationStage stage, uint32_t channe } } +uint64_t VKContext::GetNativeObject(NativeObject obj, void *srcObject) { + switch (obj) { + case NativeObject::CONTEXT: + return (uint64_t)vulkan_; + case NativeObject::INIT_COMMANDBUFFER: + return (uint64_t)renderManager_.GetInitCmd(); + case NativeObject::BOUND_TEXTURE0_IMAGEVIEW: + return (uint64_t)boundImageView_[0]; + case NativeObject::BOUND_TEXTURE1_IMAGEVIEW: + return (uint64_t)boundImageView_[1]; + case NativeObject::RENDER_MANAGER: + return (uint64_t)(uintptr_t)&renderManager_; + case NativeObject::NULL_IMAGEVIEW: + return (uint64_t)GetNullTexture()->GetImageView(); + case NativeObject::TEXTURE_VIEW: + return (uint64_t)(((VKTexture *)srcObject)->GetImageView()); + case NativeObject::BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW: + return (uint64_t)curFramebuffer_->GetFB()->color.imageView; + default: + Crash(); + return 0; + } +} + } // namespace Draw diff --git a/Common/GPU/thin3d.h b/Common/GPU/thin3d.h index 6196f494f93e..e00c0502f3a5 100644 --- a/Common/GPU/thin3d.h +++ b/Common/GPU/thin3d.h @@ -242,6 +242,7 @@ enum class NativeObject { INIT_COMMANDBUFFER, BOUND_TEXTURE0_IMAGEVIEW, BOUND_TEXTURE1_IMAGEVIEW, + BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW, RENDER_MANAGER, TEXTURE_VIEW, NULL_IMAGEVIEW, @@ -650,6 +651,9 @@ class DrawContext { // binding must be < MAX_TEXTURE_SLOTS (0, 1 are okay if it's 2). virtual void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) = 0; + // Framebuffer fetch / input attachment support, needs to be explicit in Vulkan. + virtual void BindCurrentFramebufferForColorInput() {} + // deprecated, only used by D3D9 virtual uintptr_t GetFramebufferAPITexture(Framebuffer *fbo, int channelBits, int attachment) { return 0; diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp index d361458111f9..06bf6513ff11 100644 --- a/GPU/Common/DrawEngineCommon.cpp +++ b/GPU/Common/DrawEngineCommon.cpp @@ -484,12 +484,12 @@ u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, return GE_VTYPE_TC_FLOAT | GE_VTYPE_COL_8888 | GE_VTYPE_NRM_FLOAT | GE_VTYPE_POS_FLOAT | (vertType & (GE_VTYPE_IDX_MASK | GE_VTYPE_THROUGH)); } -void DrawEngineCommon::ApplyFramebufferRead(bool *fboTexNeedsBind) { +void DrawEngineCommon::ApplyFramebufferRead(FBOTexState *fboTexState) { if (gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH)) { - *fboTexNeedsBind = false; + *fboTexState = FBO_TEX_READ_FRAMEBUFFER; } else { gpuStats.numCopiesForShaderBlend++; - *fboTexNeedsBind = true; + *fboTexState = FBO_TEX_COPY_BIND_TEX; } gstate_c.Dirty(DIRTY_SHADERBLEND); diff --git a/GPU/Common/DrawEngineCommon.h b/GPU/Common/DrawEngineCommon.h index a8997454d9a0..e6ba0b37d9d9 100644 --- a/GPU/Common/DrawEngineCommon.h +++ b/GPU/Common/DrawEngineCommon.h @@ -46,6 +46,12 @@ enum { TEX_SLOT_SPLINE_WEIGHTS_V = 6, }; +enum FBOTexState { + FBO_TEX_NONE, + FBO_TEX_COPY_BIND_TEX, + FBO_TEX_READ_FRAMEBUFFER, +}; + inline uint32_t GetVertTypeID(uint32_t vertType, int uvGenMode) { // As the decoder depends on the UVGenMode when we use UV prescale, we simply mash it // into the top of the verttype where there are unused bits. @@ -130,7 +136,7 @@ class DrawEngineCommon { // Vertex decoding void DecodeVertsStep(u8 *dest, int &i, int &decodedVerts); - void ApplyFramebufferRead(bool *fboTexNeedsBind); + void ApplyFramebufferRead(FBOTexState *fboTexState); inline int IndexSize(u32 vtype) const { const u32 indexType = (vtype & GE_VTYPE_IDX_MASK); diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index db1d886b1ffa..675d9ebd80e6 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -128,10 +128,12 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu GELogicOp replaceLogicOpType = isModeClear ? GE_LOGIC_COPY : (GELogicOp)id.Bits(FS_BIT_REPLACE_LOGIC_OP, 4); bool replaceLogicOp = replaceLogicOpType != GE_LOGIC_COPY && compat.bitwiseOps; - bool readFramebuffer = replaceBlend == REPLACE_BLEND_READ_FRAMEBUFFER || colorWriteMask || replaceLogicOp; - bool readFramebufferTex = readFramebuffer && !gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH); + bool needFramebufferRead = replaceBlend == REPLACE_BLEND_READ_FRAMEBUFFER || colorWriteMask || replaceLogicOp; - bool needFragCoord = readFramebuffer || gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT); + bool fetchFramebuffer = needFramebufferRead && gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH); + bool readFramebufferTex = needFramebufferRead && !gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH); + + bool needFragCoord = readFramebufferTex || gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT); bool writeDepth = gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT); if (shaderDepal && !doTexture) { @@ -151,6 +153,11 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu if (readFramebufferTex) { WRITE(p, "layout (binding = 1) uniform sampler2D fbotex;\n"); + } else if (fetchFramebuffer) { + WRITE(p, "layout (input_attachment_index = 0, binding = 9) uniform subpassInput inputColor;\n"); + if (fragmentShaderFlags) { + *fragmentShaderFlags |= FragmentShaderFlags::INPUT_ATTACHMENT; + } } if (shaderDepal) { @@ -407,7 +414,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu if (!strcmp(compat.fragColor0, "fragColor0")) { const char *qualifierColor0 = "out"; - if (readFramebuffer && compat.lastFragData && !strcmp(compat.lastFragData, compat.fragColor0)) { + if (fetchFramebuffer && compat.lastFragData && !strcmp(compat.lastFragData, compat.fragColor0)) { qualifierColor0 = "inout"; } // Output the output color definitions. @@ -483,20 +490,26 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } // Two things read from the old framebuffer - shader replacement blending and bit-level masking. - if (readFramebuffer) { + if (readFramebufferTex) { if (compat.shaderLanguage == HLSL_D3D11) { WRITE(p, " vec4 destColor = fbotex.Load(int3((int)gl_FragCoord.x, (int)gl_FragCoord.y, 0));\n"); } else if (compat.shaderLanguage == HLSL_D3D9) { WRITE(p, " vec4 destColor = tex2D(fbotex, gl_FragCoord.xy * u_fbotexSize.xy);\n", compat.texture); - } else if (gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH)) { - // If we have EXT_shader_framebuffer_fetch / ARM_shader_framebuffer_fetch, we skip the blit. - // We can just read the prev value more directly. - WRITE(p, " lowp vec4 destColor = %s;\n", compat.lastFragData); } else if (!compat.texelFetch) { WRITE(p, " lowp vec4 destColor = %s(fbotex, gl_FragCoord.xy * u_fbotexSize.xy);\n", compat.texture); } else { WRITE(p, " lowp vec4 destColor = %s(fbotex, ivec2(gl_FragCoord.x, gl_FragCoord.y), 0);\n", compat.texelFetch); } + } else if (fetchFramebuffer) { + // If we have EXT_shader_framebuffer_fetch / ARM_shader_framebuffer_fetch, we skip the blit. + // We can just read the prev value more directly. + if (compat.shaderLanguage == GLSL_3xx) { + WRITE(p, " lowp vec4 destColor = %s;\n", compat.lastFragData); + } else if (compat.shaderLanguage == GLSL_VULKAN) { + WRITE(p, " lowp vec4 destColor = subpassLoad(inputColor);\n", compat.lastFragData); + } else { + _assert_msg_(false, "Need fetch destColor, but not a compatible language"); + } } if (isModeClear) { diff --git a/GPU/Common/FragmentShaderGenerator.h b/GPU/Common/FragmentShaderGenerator.h index 85c651cf8bf6..8f358fa7ff8e 100644 --- a/GPU/Common/FragmentShaderGenerator.h +++ b/GPU/Common/FragmentShaderGenerator.h @@ -42,7 +42,7 @@ struct FShaderID; // Can technically be deduced from the fragment shader ID, but this is safer. enum class FragmentShaderFlags : u32 { - FS_FLAG_INPUT_ATTACHMENT = 1, + INPUT_ATTACHMENT = 1, }; ENUM_CLASS_BITOPS(FragmentShaderFlags); diff --git a/GPU/D3D11/StateMappingD3D11.cpp b/GPU/D3D11/StateMappingD3D11.cpp index 0cee52a15586..e594042abd1c 100644 --- a/GPU/D3D11/StateMappingD3D11.cpp +++ b/GPU/D3D11/StateMappingD3D11.cpp @@ -153,20 +153,23 @@ void DrawEngineD3D11::ApplyDrawState(int prim) { // We ignore the logicState on D3D since there's no support, the emulation of it is blend-and-shader only. if (pipelineState_.FramebufferRead()) { - bool fboTexNeedsBind = false; - ApplyFramebufferRead(&fboTexNeedsBind); + FBOTexState fboTexBindState_ = FBO_TEX_NONE; + ApplyFramebufferRead(&fboTexBindState_); // The shader takes over the responsibility for blending, so recompute. ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState); - if (fboTexNeedsBind) { + if (fboTexBindState_ == FBO_TEX_COPY_BIND_TEX) { framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY); // No sampler required, we do a plain Load in the pixel shader. fboTexBound_ = true; + fboTexBindState_ = FBO_TEX_NONE; framebufferManager_->RebindFramebuffer("RebindFramebuffer - ApplyDrawState"); // Must dirty blend state here so we re-copy next time. Example: Lunar's spell effects. dirtyRequiresRecheck_ |= DIRTY_BLEND_STATE; gstate_c.Dirty(DIRTY_BLEND_STATE); + } else if (fboTexBindState_ == FBO_TEX_READ_FRAMEBUFFER) { + fboTexBindState_ = FBO_TEX_NONE; } dirtyRequiresRecheck_ |= DIRTY_FRAGMENTSHADER_STATE; diff --git a/GPU/Directx9/DrawEngineDX9.h b/GPU/Directx9/DrawEngineDX9.h index a0ee23e60d1e..9ef5b37c650a 100644 --- a/GPU/Directx9/DrawEngineDX9.h +++ b/GPU/Directx9/DrawEngineDX9.h @@ -170,6 +170,8 @@ class DrawEngineDX9 : public DrawEngineCommon { // Hardware tessellation TessellationDataTransferDX9 *tessDataTransferDX9; + FBOTexState fboTexBindState_ = FBO_TEX_NONE; + int lastRenderStepId_ = -1; bool fboTexNeedsBind_ = false; diff --git a/GPU/Directx9/StateMappingDX9.cpp b/GPU/Directx9/StateMappingDX9.cpp index 2ebadb397416..0dfa352f0007 100644 --- a/GPU/Directx9/StateMappingDX9.cpp +++ b/GPU/Directx9/StateMappingDX9.cpp @@ -99,14 +99,14 @@ void DrawEngineDX9::ApplyDrawState(int prim) { if (!gstate.isModeClear()) { textureCache_->ApplyTexture(); - if (fboTexNeedsBind_) { + if (fboTexBindState_ = FBO_TEX_COPY_BIND_TEX) { // Note that this is positions, not UVs, that we need the copy from. framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY); // If we are rendering at a higher resolution, linear is probably best for the dest color. device_->SetSamplerState(1, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); device_->SetSamplerState(1, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); fboTexBound_ = true; - fboTexNeedsBind_ = false; + fboTexBindState_ = FBO_TEX_NONE; } // TODO: Test texture? @@ -133,20 +133,23 @@ void DrawEngineDX9::ApplyDrawState(int prim) { // We ignore the logicState on D3D since there's no support, the emulation of it is blend-and-shader only. if (pipelineState_.FramebufferRead()) { - bool fboTexNeedsBind = false; - ApplyFramebufferRead(&fboTexNeedsBind); + ApplyFramebufferRead(&fboTexBindState_); // The shader takes over the responsibility for blending, so recompute. ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState); - if (fboTexNeedsBind) { + if (fboTexBindState_ == FBO_TEX_COPY_BIND_TEX) { // Note that this is positions, not UVs, that we need the copy from. framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY); // If we are rendering at a higher resolution, linear is probably best for the dest color. device_->SetSamplerState(1, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); device_->SetSamplerState(1, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); fboTexBound_ = true; + fboTexBindState_ = FBO_TEX_NONE; dirtyRequiresRecheck_ |= DIRTY_BLEND_STATE; gstate_c.Dirty(DIRTY_BLEND_STATE); + } else if (fboTexBindState_ == FBO_TEX_READ_FRAMEBUFFER) { + // Not supported. + fboTexBindState_ = FBO_TEX_NONE; } dirtyRequiresRecheck_ |= DIRTY_FRAGMENTSHADER_STATE; diff --git a/GPU/GLES/StateMappingGLES.cpp b/GPU/GLES/StateMappingGLES.cpp index dbfc115a9184..cdcc9e5069a9 100644 --- a/GPU/GLES/StateMappingGLES.cpp +++ b/GPU/GLES/StateMappingGLES.cpp @@ -149,13 +149,14 @@ void DrawEngineGLES::ApplyDrawState(int prim) { GenericLogicState &logicState = pipelineState_.logicState; if (pipelineState_.FramebufferRead()) { - bool fboTexNeedsBind = false; - ApplyFramebufferRead(&fboTexNeedsBind); + FBOTexState fboTexBindState = FBO_TEX_NONE; + ApplyFramebufferRead(&fboTexBindState); // The shader takes over the responsibility for blending, so recompute. ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState); // We copy the framebuffer here, as doing so will wipe any blend state if we do it later. - if (fboTexNeedsBind) { + // fboTexNeedsBind_ won't be set if we can read directly from the target. + if (fboTexBindState == FBO_TEX_COPY_BIND_TEX) { // Note that this is positions, not UVs, that we need the copy from. framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY); // If we are rendering at a higher resolution, linear is probably best for the dest color. @@ -166,6 +167,9 @@ void DrawEngineGLES::ApplyDrawState(int prim) { // Must dirty blend state here so we re-copy next time. Example: Lunar's spell effects. dirtyRequiresRecheck_ |= DIRTY_BLEND_STATE; gstate_c.Dirty(DIRTY_BLEND_STATE); + } else if (fboTexBindState == FBO_TEX_READ_FRAMEBUFFER) { + // No action needed here. + fboTexBindState = FBO_TEX_NONE; } dirtyRequiresRecheck_ |= DIRTY_FRAGMENTSHADER_STATE; gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE); diff --git a/GPU/GPUState.h b/GPU/GPUState.h index f31d31e17614..91f45e37c7cf 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -485,7 +485,8 @@ enum { // Free bit: 15 GPU_SUPPORTS_DEPTH_TEXTURE = FLAG_BIT(16), GPU_SUPPORTS_ACCURATE_DEPTH = FLAG_BIT(17), - // Free bits: 18-19 + GPU_SUPPORTS_FRAGMENT_SHADER_INTERLOCK = FLAG_BIT(18), + // Free bits: 19 GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH = FLAG_BIT(20), GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT = FLAG_BIT(21), GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT = FLAG_BIT(22), diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index e8ebd485d105..c67eb59ea259 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -71,6 +71,7 @@ enum { DRAW_BINDING_TESS_STORAGE_BUF = 6, DRAW_BINDING_TESS_STORAGE_BUF_WU = 7, DRAW_BINDING_TESS_STORAGE_BUF_WV = 8, + DRAW_BINDING_INPUT_ATTACHMENT = 9, }; enum { @@ -94,7 +95,10 @@ DrawEngineVulkan::DrawEngineVulkan(Draw::DrawContext *draw) void DrawEngineVulkan::InitDeviceObjects() { // All resources we need for PSP drawing. Usually only bindings 0 and 2-4 are populated. - VkDescriptorSetLayoutBinding bindings[9]{}; + + // TODO: Make things more flexible, so we at least have specialized layouts for input attachments and tess. + // Note that it becomes a support matrix.. + VkDescriptorSetLayoutBinding bindings[10]{}; bindings[0].descriptorCount = 1; bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; bindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; @@ -132,6 +136,10 @@ void DrawEngineVulkan::InitDeviceObjects() { bindings[8].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; bindings[8].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; bindings[8].binding = DRAW_BINDING_TESS_STORAGE_BUF_WV; + bindings[9].descriptorCount = 1; + bindings[9].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; + bindings[9].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[9].binding = DRAW_BINDING_INPUT_ATTACHMENT; VulkanContext *vulkan = (VulkanContext *)draw_->GetNativeObject(Draw::NativeObject::CONTEXT); VkDevice device = vulkan->GetDevice(); @@ -417,15 +425,15 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView } if (boundSecondary_) { - tex[1].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + tex[1].imageLayout = key.secondaryIsInputAttachment ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; tex[1].imageView = boundSecondary_; tex[1].sampler = samplerSecondaryNearest_; writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; writes[n].pNext = nullptr; - writes[n].dstBinding = DRAW_BINDING_2ND_TEXTURE; + writes[n].dstBinding = key.secondaryIsInputAttachment ? DRAW_BINDING_INPUT_ATTACHMENT : DRAW_BINDING_2ND_TEXTURE; writes[n].pImageInfo = &tex[1]; writes[n].descriptorCount = 1; - writes[n].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + writes[n].descriptorType = key.secondaryIsInputAttachment ? VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT : VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; writes[n].dstSet = desc; n++; } @@ -788,7 +796,7 @@ void DrawEngineVulkan::DoFlush() { lastRenderStepId_ = curRenderStepId; } - renderManager->BindPipeline(pipeline->pipeline, (PipelineFlags)pipeline->flags, pipelineLayout_); + renderManager->BindPipeline(pipeline->pipeline, pipeline->pipelineFlags, pipelineLayout_); if (pipeline != lastPipeline_) { if (lastPipeline_ && !(lastPipeline_->UsesBlendConstant() && pipeline->UsesBlendConstant())) { gstate_c.Dirty(DIRTY_BLEND_STATE); @@ -916,7 +924,7 @@ void DrawEngineVulkan::DoFlush() { lastRenderStepId_ = curRenderStepId; } - renderManager->BindPipeline(pipeline->pipeline, (PipelineFlags)pipeline->flags, pipelineLayout_); + renderManager->BindPipeline(pipeline->pipeline, pipeline->pipelineFlags, pipelineLayout_); if (pipeline != lastPipeline_) { if (lastPipeline_ && !lastPipeline_->UsesBlendConstant() && pipeline->UsesBlendConstant()) { gstate_c.Dirty(DIRTY_BLEND_STATE); diff --git a/GPU/Vulkan/DrawEngineVulkan.h b/GPU/Vulkan/DrawEngineVulkan.h index 531e05c4ed51..653dfd3f1ff8 100644 --- a/GPU/Vulkan/DrawEngineVulkan.h +++ b/GPU/Vulkan/DrawEngineVulkan.h @@ -234,6 +234,7 @@ class DrawEngineVulkan : public DrawEngineCommon { VkSampler sampler_; VkBuffer base_, light_, bone_; // All three UBO slots will be set to this. This will usually be identical // for all draws in a frame, except when the buffer has to grow. + bool secondaryIsInputAttachment; }; // We alternate between these. @@ -281,7 +282,7 @@ class DrawEngineVulkan : public DrawEngineCommon { VulkanDynamicState dynState_{}; int tessOffset_ = 0; - bool fboTexNeedsBind_ = false; + FBOTexState fboTexBindState_ = FBO_TEX_NONE; // Hardware tessellation TessellationDataTransferVulkan *tessDataTransferVulkan; diff --git a/GPU/Vulkan/FramebufferManagerVulkan.h b/GPU/Vulkan/FramebufferManagerVulkan.h index 0f5d7c4f532f..d3370fafb7a8 100644 --- a/GPU/Vulkan/FramebufferManagerVulkan.h +++ b/GPU/Vulkan/FramebufferManagerVulkan.h @@ -33,7 +33,7 @@ class VulkanPushBuffer; class FramebufferManagerVulkan : public FramebufferManagerCommon { public: - FramebufferManagerVulkan(Draw::DrawContext *draw); + explicit FramebufferManagerVulkan(Draw::DrawContext *draw); ~FramebufferManagerVulkan(); // If within a render pass, this will just issue a regular clear. If beginning a new render pass, diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index f96d282fc771..039f855d866f 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -229,6 +229,9 @@ void GPU_Vulkan::CheckGPUFeatures() { features |= GPU_SUPPORTS_TEXTURE_FLOAT; features |= GPU_SUPPORTS_DEPTH_TEXTURE; + // input attachments + features |= GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH; + auto &enabledFeatures = vulkan->GetDeviceFeatures().enabled; if (enabledFeatures.depthClamp) { features |= GPU_SUPPORTS_DEPTH_CLAMP; diff --git a/GPU/Vulkan/PipelineManagerVulkan.cpp b/GPU/Vulkan/PipelineManagerVulkan.cpp index a88fe235a546..e950dfc45740 100644 --- a/GPU/Vulkan/PipelineManagerVulkan.cpp +++ b/GPU/Vulkan/PipelineManagerVulkan.cpp @@ -170,7 +170,7 @@ static std::string CutFromMain(std::string str) { } static VulkanPipeline *CreateVulkanPipeline(VulkanRenderManager *renderManager, VkPipelineCache pipelineCache, - VkPipelineLayout layout, const VulkanPipelineRasterStateKey &key, + VkPipelineLayout layout, PipelineFlags pipelineFlags, const VulkanPipelineRasterStateKey &key, const DecVtxFormat *decFmt, VulkanVertexShader *vs, VulkanFragmentShader *fs, bool useHwTransform, u32 variantBitmask) { VulkanPipeline *vulkanPipeline = new VulkanPipeline(); VKRGraphicsPipelineDesc *desc = &vulkanPipeline->desc; @@ -299,14 +299,14 @@ static VulkanPipeline *CreateVulkanPipeline(VulkanRenderManager *renderManager, VKRGraphicsPipeline *pipeline = renderManager->CreateGraphicsPipeline(desc, variantBitmask, "game"); vulkanPipeline->pipeline = pipeline; - vulkanPipeline->flags = 0; if (useBlendConstant) - vulkanPipeline->flags |= PIPELINE_FLAG_USES_BLEND_CONSTANT; + pipelineFlags |= PipelineFlags::USES_BLEND_CONSTANT; if (key.topology == VK_PRIMITIVE_TOPOLOGY_LINE_LIST || key.topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP) - vulkanPipeline->flags |= PIPELINE_FLAG_USES_LINES; + pipelineFlags |= PipelineFlags::USES_LINES; if (dss.depthTestEnable || dss.stencilTestEnable) { - vulkanPipeline->flags |= PIPELINE_FLAG_USES_DEPTH_STENCIL; + pipelineFlags |= PipelineFlags::USES_DEPTH_STENCIL; } + vulkanPipeline->pipelineFlags = pipelineFlags; return vulkanPipeline; } @@ -329,8 +329,13 @@ VulkanPipeline *PipelineManagerVulkan::GetOrCreatePipeline(VulkanRenderManager * if (iter) return iter; + PipelineFlags pipelineFlags = (PipelineFlags)0; + if (fs->Flags() & FragmentShaderFlags::INPUT_ATTACHMENT) { + pipelineFlags |= PipelineFlags::USES_INPUT_ATTACHMENT; + } + VulkanPipeline *pipeline = CreateVulkanPipeline( - renderManager, pipelineCache_, layout, + renderManager, pipelineCache_, layout, pipelineFlags, rasterKey, decFmt, vs, fs, useHwTransform, variantBitmask); pipelines_.Insert(key, pipeline); diff --git a/GPU/Vulkan/PipelineManagerVulkan.h b/GPU/Vulkan/PipelineManagerVulkan.h index af32aa81bd59..08907e3b3ee8 100644 --- a/GPU/Vulkan/PipelineManagerVulkan.h +++ b/GPU/Vulkan/PipelineManagerVulkan.h @@ -55,11 +55,12 @@ struct VulkanPipelineKey { struct VulkanPipeline { VKRGraphicsPipeline *pipeline; VKRGraphicsPipelineDesc desc; - int flags; // PipelineFlags enum above. + PipelineFlags pipelineFlags; // PipelineFlags enum above. - bool UsesBlendConstant() const { return (flags & PIPELINE_FLAG_USES_BLEND_CONSTANT) != 0; } - bool UsesLines() const { return (flags & PIPELINE_FLAG_USES_LINES) != 0; } - bool UsesDepthStencil() const { return (flags & PIPELINE_FLAG_USES_DEPTH_STENCIL) != 0; } + bool UsesBlendConstant() const { return (pipelineFlags & PipelineFlags::USES_BLEND_CONSTANT) != 0; } + bool UsesLines() const { return (pipelineFlags & PipelineFlags::USES_LINES) != 0; } + bool UsesDepthStencil() const { return (pipelineFlags & PipelineFlags::USES_DEPTH_STENCIL) != 0; } + bool UsesInputAttachment() const { return (pipelineFlags & PipelineFlags::USES_INPUT_ATTACHMENT) != 0; } u32 GetVariantsBitmask() const; }; diff --git a/GPU/Vulkan/StateMappingVulkan.cpp b/GPU/Vulkan/StateMappingVulkan.cpp index 6457b99ad966..7377b059a0bc 100644 --- a/GPU/Vulkan/StateMappingVulkan.cpp +++ b/GPU/Vulkan/StateMappingVulkan.cpp @@ -153,7 +153,7 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag GenericLogicState &logicState = pipelineState_.logicState; if (pipelineState_.FramebufferRead()) { - ApplyFramebufferRead(&fboTexNeedsBind_); + ApplyFramebufferRead(&fboTexBindState_); // The shader takes over the responsibility for blending, so recompute. // We might still end up using blend to write something to alpha. ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState); @@ -364,15 +364,19 @@ void DrawEngineVulkan::BindShaderBlendTex() { // TODO: At this point, we know if the vertices are full alpha or not. // Set the nearest/linear here (since we correctly know if alpha/color tests are needed)? if (!gstate.isModeClear()) { - if (fboTexNeedsBind_) { + if (fboTexBindState_ == FBO_TEX_COPY_BIND_TEX) { bool bindResult = framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY); _dbg_assert_(bindResult); boundSecondary_ = (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_TEXTURE1_IMAGEVIEW); fboTexBound_ = true; - fboTexNeedsBind_ = false; + fboTexBindState_ = FBO_TEX_NONE; // Must dirty blend state here so we re-copy next time. Example: Lunar's spell effects. dirtyRequiresRecheck_ |= DIRTY_BLEND_STATE; + } else if (fboTexBindState_ == FBO_TEX_READ_FRAMEBUFFER) { + draw_->BindCurrentFramebufferForColorInput(); + boundSecondary_ = (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW); + fboTexBindState_ = FBO_TEX_NONE; } } }