diff --git a/Common/GPU/Vulkan/VulkanBarrier.cpp b/Common/GPU/Vulkan/VulkanBarrier.cpp index 125d51ea6de9..e4f2d0908933 100644 --- a/Common/GPU/Vulkan/VulkanBarrier.cpp +++ b/Common/GPU/Vulkan/VulkanBarrier.cpp @@ -4,7 +4,7 @@ void VulkanBarrier::Flush(VkCommandBuffer cmd) { if (!imageBarriers_.empty()) { - vkCmdPipelineBarrier(cmd, srcStageMask_, dstStageMask_, 0, 0, nullptr, 0, nullptr, (uint32_t)imageBarriers_.size(), imageBarriers_.data()); + vkCmdPipelineBarrier(cmd, srcStageMask_, dstStageMask_, dependencyFlags_, 0, nullptr, 0, nullptr, (uint32_t)imageBarriers_.size(), imageBarriers_.data()); } imageBarriers_.clear(); srcStageMask_ = 0; diff --git a/Common/GPU/Vulkan/VulkanBarrier.h b/Common/GPU/Vulkan/VulkanBarrier.h index eb949dd2f047..0d5754b3f4f4 100644 --- a/Common/GPU/Vulkan/VulkanBarrier.h +++ b/Common/GPU/Vulkan/VulkanBarrier.h @@ -21,6 +21,7 @@ class VulkanBarrier { ) { srcStageMask_ |= srcStageMask; dstStageMask_ |= dstStageMask; + dependencyFlags_ |= VK_DEPENDENCY_BY_REGION_BIT; VkImageMemoryBarrier imageBarrier; imageBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; @@ -112,4 +113,5 @@ class VulkanBarrier { VkPipelineStageFlags srcStageMask_ = 0; VkPipelineStageFlags dstStageMask_ = 0; std::vector imageBarriers_; + VkDependencyFlags dependencyFlags_ = 0; }; diff --git a/Common/GPU/Vulkan/VulkanContext.cpp b/Common/GPU/Vulkan/VulkanContext.cpp index 55edc0f31f86..0d29b518df9f 100644 --- a/Common/GPU/Vulkan/VulkanContext.cpp +++ b/Common/GPU/Vulkan/VulkanContext.cpp @@ -667,7 +667,10 @@ VkResult VulkanContext::CreateDevice() { extensionsLookup_.KHR_create_renderpass2 = true; extensionsLookup_.KHR_depth_stencil_resolve = EnableDeviceExtension(VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME); } + extensionsLookup_.EXT_shader_stencil_export = EnableDeviceExtension(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME); + extensionsLookup_.EXT_fragment_shader_interlock = EnableDeviceExtension(VK_EXT_FRAGMENT_SHADER_INTERLOCK_EXTENSION_NAME); + extensionsLookup_.ARM_rasterization_order_attachment_access = EnableDeviceExtension(VK_ARM_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_EXTENSION_NAME); VkDeviceCreateInfo device_info{ VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO }; device_info.queueCreateInfoCount = 1; diff --git a/Common/GPU/Vulkan/VulkanDebug.cpp b/Common/GPU/Vulkan/VulkanDebug.cpp index d4052e970a0e..b52e4396cb61 100644 --- a/Common/GPU/Vulkan/VulkanDebug.cpp +++ b/Common/GPU/Vulkan/VulkanDebug.cpp @@ -86,7 +86,6 @@ VKAPI_ATTR VkBool32 VKAPI_CALL VulkanDebugUtilsCallback( } else { WARN_LOG(G3D, "VKDEBUG: %s", msg.c_str()); } - // false indicates that layer should not bail-out of an // API call that had validation failures. This may mean that the // app dies inside the driver due to invalid parameter(s). @@ -94,3 +93,4 @@ VKAPI_ATTR VkBool32 VKAPI_CALL VulkanDebugUtilsCallback( // keep that behavior here. return false; } + diff --git a/Common/GPU/Vulkan/VulkanLoader.h b/Common/GPU/Vulkan/VulkanLoader.h index 1f3d7d7704c2..b0c14570da26 100644 --- a/Common/GPU/Vulkan/VulkanLoader.h +++ b/Common/GPU/Vulkan/VulkanLoader.h @@ -241,6 +241,8 @@ struct VulkanExtensions { bool KHR_depth_stencil_resolve; bool EXT_shader_stencil_export; bool EXT_swapchain_colorspace; + bool ARM_rasterization_order_attachment_access; + bool EXT_fragment_shader_interlock; // bool EXT_depth_range_unrestricted; // Allows depth outside [0.0, 1.0] in 32-bit float depth buffers. }; diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.cpp b/Common/GPU/Vulkan/VulkanQueueRunner.cpp index 10a88e623a80..2a7f81690f7e 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.cpp +++ b/Common/GPU/Vulkan/VulkanQueueRunner.cpp @@ -34,8 +34,11 @@ RenderPassType MergeRPTypes(RenderPassType a, RenderPassType b) { if (a == b) { // Trivial merging case. return a; + } else if (a == RP_TYPE_COLOR_DEPTH && b == RP_TYPE_COLOR_DEPTH_INPUT) { + return RP_TYPE_COLOR_DEPTH_INPUT; + } else if (a == RP_TYPE_COLOR_DEPTH_INPUT && b == RP_TYPE_COLOR_DEPTH) { + return RP_TYPE_COLOR_DEPTH_INPUT; } - // More cases to be added later. return a; } @@ -155,7 +158,12 @@ static VkAttachmentStoreOp ConvertStoreAction(VKRRenderPassStoreAction action) { return VK_ATTACHMENT_STORE_OP_DONT_CARE; // avoid compiler warning } +// Self-dependency: https://github.com/gpuweb/gpuweb/issues/442#issuecomment-547604827 +// Also see https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-pipeline-barriers-subpass-self-dependencies + VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rpType) { + bool selfDependency = rpType == RP_TYPE_COLOR_DEPTH_INPUT; + VkAttachmentDescription attachments[2] = {}; attachments[0].format = rpType == RP_TYPE_BACKBUFFER ? vulkan->GetSwapchainFormat() : VK_FORMAT_R8G8B8A8_UNORM; attachments[0].samples = VK_SAMPLE_COUNT_1_BIT; @@ -179,7 +187,7 @@ VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rp VkAttachmentReference color_reference{}; color_reference.attachment = 0; - color_reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + color_reference.layout = selfDependency ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; VkAttachmentReference depth_reference{}; depth_reference.attachment = 1; @@ -188,8 +196,13 @@ VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rp VkSubpassDescription subpass{}; subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; subpass.flags = 0; - subpass.inputAttachmentCount = 0; - subpass.pInputAttachments = nullptr; + if (selfDependency) { + subpass.inputAttachmentCount = 1; + subpass.pInputAttachments = &color_reference; + } else { + subpass.inputAttachmentCount = 0; + subpass.pInputAttachments = nullptr; + } subpass.colorAttachmentCount = 1; subpass.pColorAttachments = &color_reference; subpass.pResolveAttachments = nullptr; @@ -198,22 +211,40 @@ VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rp subpass.pPreserveAttachments = nullptr; // Not sure if this is really necessary. - VkSubpassDependency dep{}; - dep.srcSubpass = VK_SUBPASS_EXTERNAL; - dep.dstSubpass = 0; - dep.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - dep.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - dep.srcAccessMask = 0; - dep.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + VkSubpassDependency deps[2]{}; + size_t numDeps = 0; VkRenderPassCreateInfo rp{ VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO }; rp.attachmentCount = 2; rp.pAttachments = attachments; rp.subpassCount = 1; rp.pSubpasses = &subpass; + if (rpType == RP_TYPE_BACKBUFFER) { + deps[numDeps].srcSubpass = VK_SUBPASS_EXTERNAL; + deps[numDeps].dstSubpass = 0; + deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + deps[numDeps].dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + deps[numDeps].srcAccessMask = 0; + deps[numDeps].dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + numDeps++; rp.dependencyCount = 1; - rp.pDependencies = &dep; + } + + if (selfDependency) { + deps[numDeps].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT; + deps[numDeps].srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + deps[numDeps].dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; + deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + deps[numDeps].dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + deps[numDeps].srcSubpass = 0; + deps[numDeps].dstSubpass = 0; + numDeps++; + } + + if (numDeps > 0) { + rp.dependencyCount = (u32)numDeps; + rp.pDependencies = deps; } VkRenderPass pass; @@ -246,6 +277,30 @@ VKRRenderPass *VulkanQueueRunner::GetRenderPass(const RPKey &key) { return pass; } +// Must match the subpass self-dependency declared above. +void VulkanQueueRunner::SelfDependencyBarrier(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier) { + if (aspect & VK_IMAGE_ASPECT_COLOR_BIT) { + VkAccessFlags srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + VkAccessFlags dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; + VkPipelineStageFlags srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + VkPipelineStageFlags dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + recordBarrier->TransitionImage( + img.image, + 0, + 1, + aspect, + VK_IMAGE_LAYOUT_GENERAL, + VK_IMAGE_LAYOUT_GENERAL, + srcAccessMask, + dstAccessMask, + srcStageMask, + dstStageMask + ); + } else { + _assert_msg_(false, "Depth self-dependencies not yet supported"); + } +} + void VulkanQueueRunner::PreprocessSteps(std::vector &steps) { // Optimizes renderpasses, then sequences them. // Planned optimizations: @@ -628,6 +683,7 @@ std::string VulkanQueueRunner::StepToString(const VKRStep &step) const { switch (step.render.renderPassType) { case RP_TYPE_BACKBUFFER: renderCmd = "BACKBUF"; break; case RP_TYPE_COLOR_DEPTH: renderCmd = "RENDER"; break; + case RP_TYPE_COLOR_DEPTH_INPUT: renderCmd = "RENDER_INPUT"; break; default: renderCmd = "N/A"; } snprintf(buffer, sizeof(buffer), "%s %s (draws: %d, %dx%d/%dx%d, fb: %p, )", renderCmd, step.tag, step.render.numDraws, actual_w, actual_h, w, h, step.render.framebuffer); @@ -817,6 +873,9 @@ void VulkanQueueRunner::LogRenderPass(const VKRStep &pass, bool verbose) { case VKRRenderCommand::REMOVED: INFO_LOG(G3D, " (Removed)"); break; + case VKRRenderCommand::SELF_DEPENDENCY_BARRIER: + INFO_LOG(G3D, " SelfBarrier()"); + break; case VKRRenderCommand::BIND_GRAPHICS_PIPELINE: INFO_LOG(G3D, " BindGraphicsPipeline(%x)", (int)(intptr_t)cmd.graphics_pipeline.pipeline); break; @@ -1235,6 +1294,15 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c break; } + case VKRRenderCommand::SELF_DEPENDENCY_BARRIER: + { + _assert_(step.render.pipelineFlags & PipelineFlags::USES_INPUT_ATTACHMENT); + VulkanBarrier barrier; + SelfDependencyBarrier(step.render.framebuffer->color, VK_IMAGE_ASPECT_COLOR_BIT, &barrier); + barrier.Flush(cmd); + break; + } + case VKRRenderCommand::PUSH_CONSTANTS: vkCmdPushConstants(cmd, pipelineLayout, c.push.stages, c.push.offset, c.push.size, c.push.data); break; diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.h b/Common/GPU/Vulkan/VulkanQueueRunner.h index adb9e7d96e99..8f0533331a40 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.h +++ b/Common/GPU/Vulkan/VulkanQueueRunner.h @@ -20,7 +20,6 @@ struct VKRImage; enum { QUEUE_HACK_MGS2_ACID = 1, QUEUE_HACK_SONIC = 2, - // Killzone PR = 4. QUEUE_HACK_RENDERPASS_MERGE = 8, }; @@ -36,20 +35,24 @@ enum class VKRRenderCommand : uint8_t { DRAW, DRAW_INDEXED, PUSH_CONSTANTS, + SELF_DEPENDENCY_BARRIER, NUM_RENDER_COMMANDS, }; -enum PipelineFlags { - PIPELINE_FLAG_NONE = 0, - PIPELINE_FLAG_USES_LINES = (1 << 2), - PIPELINE_FLAG_USES_BLEND_CONSTANT = (1 << 3), - PIPELINE_FLAG_USES_DEPTH_STENCIL = (1 << 4), // Reads or writes the depth buffer. +enum class PipelineFlags { + NONE = 0, + USES_LINES = (1 << 2), + USES_BLEND_CONSTANT = (1 << 3), + USES_DEPTH_STENCIL = (1 << 4), // Reads or writes the depth buffer. + USES_INPUT_ATTACHMENT = (1 << 5), }; +ENUM_CLASS_BITOPS(PipelineFlags); // Pipelines need to be created for the right type of render pass. enum RenderPassType { RP_TYPE_BACKBUFFER, RP_TYPE_COLOR_DEPTH, + RP_TYPE_COLOR_DEPTH_INPUT, // Later will add pure-color render passes. RP_TYPE_COUNT, }; @@ -168,7 +171,6 @@ struct VKRStep { union { struct { VKRFramebuffer *framebuffer; - // TODO: Look these up through renderPass? VKRRenderPassLoadAction colorLoad; VKRRenderPassLoadAction depthLoad; VKRRenderPassLoadAction stencilLoad; @@ -183,7 +185,7 @@ struct VKRStep { int numReads; VkImageLayout finalColorLayout; VkImageLayout finalDepthStencilLayout; - u32 pipelineFlags; + PipelineFlags pipelineFlags; // contains the self dependency flag, in the form of USES_INPUT_ATTACHMENT VkRect2D renderArea; // Render pass type. Deduced after finishing recording the pass, from the used pipelines. // NOTE: Storing the render pass here doesn't do much good, we change the compatible parameters (load/store ops) during step optimization. @@ -324,6 +326,8 @@ class VulkanQueueRunner { static void SetupTransitionToTransferSrc(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier); static void SetupTransitionToTransferDst(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier); + static void SelfDependencyBarrier(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier); + VulkanContext *vulkan_; VkFramebuffer backbuffer_ = VK_NULL_HANDLE; diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp index 56f40655f7c8..8dcd0985fa9f 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.cpp +++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp @@ -223,7 +223,7 @@ void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int // Strictly speaking we don't yet need VK_IMAGE_USAGE_SAMPLED_BIT for depth buffers since we do not yet sample depth buffers. ici.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT; if (color) { - ici.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + ici.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; } else { ici.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; } @@ -534,7 +534,9 @@ void VulkanRenderManager::CompileThreadFunc() { break; } - INFO_LOG(G3D, "Compilation thread has %d pipelines to create", (int)toCompile.size()); + if (!toCompile.empty()) { + INFO_LOG(G3D, "Compilation thread has %d pipelines to create", (int)toCompile.size()); + } // TODO: Here we can sort the pending pipelines by vertex and fragment shaders, // and split up further. @@ -774,6 +776,9 @@ void VulkanRenderManager::EndCurRenderStep() { curRenderStep_->render.pipelineFlags = curPipelineFlags_; if (!curRenderStep_->render.framebuffer) { rpType = RP_TYPE_BACKBUFFER; + } else if (curPipelineFlags_ & PipelineFlags::USES_INPUT_ATTACHMENT) { + // Not allowed on backbuffers. + rpType = RP_TYPE_COLOR_DEPTH_INPUT; } VKRRenderPass *renderPass = queueRunner_.GetRenderPass(key); @@ -806,7 +811,12 @@ void VulkanRenderManager::EndCurRenderStep() { // We no longer have a current render step. curRenderStep_ = nullptr; - curPipelineFlags_ = 0; + curPipelineFlags_ = (PipelineFlags)0; +} + +void VulkanRenderManager::BindCurrentFramebufferAsInputAttachment0(VkImageAspectFlags aspectBits) { + _dbg_assert_(curRenderStep_); + curRenderStep_->commands.push_back(VkRenderData{ VKRRenderCommand::SELF_DEPENDENCY_BARRIER }); } void VulkanRenderManager::BindFramebufferAsRenderTarget(VKRFramebuffer *fb, VKRRenderPassLoadAction color, VKRRenderPassLoadAction depth, VKRRenderPassLoadAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) { diff --git a/Common/GPU/Vulkan/VulkanRenderManager.h b/Common/GPU/Vulkan/VulkanRenderManager.h index 4cc7aafdfccc..b29b8bdcbd6f 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.h +++ b/Common/GPU/Vulkan/VulkanRenderManager.h @@ -236,6 +236,8 @@ class VulkanRenderManager { // as the other backends, even though there's no actual binding happening here. VkImageView BindFramebufferAsTexture(VKRFramebuffer *fb, int binding, VkImageAspectFlags aspectBits, int attachment); + void BindCurrentFramebufferAsInputAttachment0(VkImageAspectFlags aspectBits); + bool CopyFramebufferToMemorySync(VKRFramebuffer *src, VkImageAspectFlags aspectBits, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag); void CopyImageToMemorySync(VkImage image, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag); @@ -544,7 +546,7 @@ class VulkanRenderManager { VKRStep *curRenderStep_ = nullptr; bool curStepHasViewport_ = false; bool curStepHasScissor_ = false; - u32 curPipelineFlags_ = 0; + PipelineFlags curPipelineFlags_{}; BoundingRect curRenderArea_; std::vector steps_; diff --git a/Common/GPU/Vulkan/thin3d_vulkan.cpp b/Common/GPU/Vulkan/thin3d_vulkan.cpp index 91a6f5ff3e97..7d07e6a3cb4c 100644 --- a/Common/GPU/Vulkan/thin3d_vulkan.cpp +++ b/Common/GPU/Vulkan/thin3d_vulkan.cpp @@ -401,9 +401,10 @@ class VKContext : public DrawContext { // These functions should be self explanatory. void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp, const char *tag) override; Framebuffer *GetCurrentRenderTarget() override { - return curFramebuffer_; + return (Framebuffer *)curFramebuffer_.ptr; } void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) override; + void BindCurrentFramebufferForColorInput() override; void GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) override; @@ -473,27 +474,7 @@ class VKContext : public DrawContext { std::vector GetFeatureList() const override; std::vector GetExtensionList() const override; - uint64_t GetNativeObject(NativeObject obj, void *srcObject) override { - switch (obj) { - case NativeObject::CONTEXT: - return (uint64_t)vulkan_; - case NativeObject::INIT_COMMANDBUFFER: - return (uint64_t)renderManager_.GetInitCmd(); - case NativeObject::BOUND_TEXTURE0_IMAGEVIEW: - return (uint64_t)boundImageView_[0]; - case NativeObject::BOUND_TEXTURE1_IMAGEVIEW: - return (uint64_t)boundImageView_[1]; - case NativeObject::RENDER_MANAGER: - return (uint64_t)(uintptr_t)&renderManager_; - case NativeObject::NULL_IMAGEVIEW: - return (uint64_t)GetNullTexture()->GetImageView(); - case NativeObject::TEXTURE_VIEW: - return (uint64_t)(((VKTexture *)srcObject)->GetImageView()); - default: - Crash(); - return 0; - } - } + uint64_t GetNativeObject(NativeObject obj, void *srcObject) override; void HandleEvent(Event ev, int width, int height, void *param1, void *param2) override; @@ -522,7 +503,7 @@ class VKContext : public DrawContext { VkDescriptorSetLayout descriptorSetLayout_ = VK_NULL_HANDLE; VkPipelineLayout pipelineLayout_ = VK_NULL_HANDLE; VkPipelineCache pipelineCache_ = VK_NULL_HANDLE; - AutoRef curFramebuffer_; + AutoRef curFramebuffer_; VkDevice device_; VkQueue queue_; @@ -831,6 +812,11 @@ VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit) // Color write mask not masking write in certain scenarios with a depth test, see #10421. // Known still present on driver 0x80180000 and Adreno 5xx (possibly more.) bugs_.Infest(Bugs::COLORWRITEMASK_BROKEN_WITH_DEPTHTEST); + + // Trying to follow all the rules in https://registry.khronos.org/vulkan/specs/1.3/html/vkspec.html#synchronization-pipeline-barriers-subpass-self-dependencies + // and https://registry.khronos.org/vulkan/specs/1.3/html/vkspec.html#renderpass-feedbackloop, but still it doesn't + // quite work - artifacts on triangle boundaries on Adreno. + bugs_.Infest(Bugs::SUBPASS_FEEDBACK_BROKEN); } else if (caps_.vendor == GPUVendor::VENDOR_AMD) { // See issue #10074, and also #10065 (AMD) and #10109 for the choice of the driver version to check for. if (deviceProps.driverVersion < 0x00407000) { @@ -857,6 +843,10 @@ VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit) } } + // Limited, through input attachments and self-dependencies. + // We turn it off here already if buggy. + caps_.framebufferFetchSupported = !bugs_.Has(Bugs::SUBPASS_FEEDBACK_BROKEN); + caps_.deviceID = deviceProps.deviceID; device_ = vulkan->GetDevice(); @@ -1062,12 +1052,12 @@ Pipeline *VKContext::CreateGraphicsPipeline(const PipelineDesc &desc, const char VKDepthStencilState *depth = (VKDepthStencilState *)desc.depthStencil; VKRasterState *raster = (VKRasterState *)desc.raster; - u32 pipelineFlags = 0; + PipelineFlags pipelineFlags = (PipelineFlags)0; if (depth->info.depthTestEnable || depth->info.stencilTestEnable) { - pipelineFlags |= PIPELINE_FLAG_USES_DEPTH_STENCIL; + pipelineFlags |= PipelineFlags::USES_DEPTH_STENCIL; } - VKPipeline *pipeline = new VKPipeline(vulkan_, desc.uniformDesc ? desc.uniformDesc->uniformBufferSize : 16 * sizeof(float), (PipelineFlags)pipelineFlags, tag); + VKPipeline *pipeline = new VKPipeline(vulkan_, desc.uniformDesc ? desc.uniformDesc->uniformBufferSize : 16 * sizeof(float), pipelineFlags, tag); VKRGraphicsPipelineDesc &gDesc = pipeline->vkrDesc; @@ -1588,6 +1578,10 @@ void VKContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChanne boundImageView_[binding] = renderManager_.BindFramebufferAsTexture(fb->GetFB(), binding, aspect, attachment); } +void VKContext::BindCurrentFramebufferForColorInput() { + renderManager_.BindCurrentFramebufferAsInputAttachment0(VK_IMAGE_ASPECT_COLOR_BIT); +} + void VKContext::GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) { VKFramebuffer *fb = (VKFramebuffer *)fbo; if (fb) { @@ -1628,4 +1622,28 @@ void VKContext::InvalidateFramebuffer(FBInvalidationStage stage, uint32_t channe } } +uint64_t VKContext::GetNativeObject(NativeObject obj, void *srcObject) { + switch (obj) { + case NativeObject::CONTEXT: + return (uint64_t)vulkan_; + case NativeObject::INIT_COMMANDBUFFER: + return (uint64_t)renderManager_.GetInitCmd(); + case NativeObject::BOUND_TEXTURE0_IMAGEVIEW: + return (uint64_t)boundImageView_[0]; + case NativeObject::BOUND_TEXTURE1_IMAGEVIEW: + return (uint64_t)boundImageView_[1]; + case NativeObject::RENDER_MANAGER: + return (uint64_t)(uintptr_t)&renderManager_; + case NativeObject::NULL_IMAGEVIEW: + return (uint64_t)GetNullTexture()->GetImageView(); + case NativeObject::TEXTURE_VIEW: + return (uint64_t)(((VKTexture *)srcObject)->GetImageView()); + case NativeObject::BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW: + return (uint64_t)curFramebuffer_->GetFB()->color.imageView; + default: + Crash(); + return 0; + } +} + } // namespace Draw diff --git a/Common/GPU/thin3d.cpp b/Common/GPU/thin3d.cpp index 1cc3c11fa0e9..2560765f43ce 100644 --- a/Common/GPU/thin3d.cpp +++ b/Common/GPU/thin3d.cpp @@ -681,6 +681,7 @@ const char *Bugs::GetBugName(uint32_t bug) { case MALI_STENCIL_DISCARD_BUG: return "MALI_STENCIL_DISCARD_BUG"; case RASPBERRY_SHADER_COMP_HANG: return "RASPBERRY_SHADER_COMP_HANG"; case MALI_CONSTANT_LOAD_BUG: return "MALI_CONSTANT_LOAD_BUG"; + case SUBPASS_FEEDBACK_BROKEN: return "SUBPASS_FEEDBACK_BROKEN"; default: return "(N/A)"; } } diff --git a/Common/GPU/thin3d.h b/Common/GPU/thin3d.h index 843e6e8813a8..389640ec9eb3 100644 --- a/Common/GPU/thin3d.h +++ b/Common/GPU/thin3d.h @@ -242,6 +242,7 @@ enum class NativeObject { INIT_COMMANDBUFFER, BOUND_TEXTURE0_IMAGEVIEW, BOUND_TEXTURE1_IMAGEVIEW, + BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW, RENDER_MANAGER, TEXTURE_VIEW, NULL_IMAGEVIEW, @@ -331,6 +332,7 @@ class Bugs { MALI_STENCIL_DISCARD_BUG = 8, RASPBERRY_SHADER_COMP_HANG = 9, MALI_CONSTANT_LOAD_BUG = 10, + SUBPASS_FEEDBACK_BROKEN = 11, MAX_BUG, }; @@ -651,6 +653,9 @@ class DrawContext { // binding must be < MAX_TEXTURE_SLOTS (0, 1 are okay if it's 2). virtual void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) = 0; + // Framebuffer fetch / input attachment support, needs to be explicit in Vulkan. + virtual void BindCurrentFramebufferForColorInput() {} + // deprecated, only used by D3D9 virtual uintptr_t GetFramebufferAPITexture(Framebuffer *fbo, int channelBits, int attachment) { return 0; diff --git a/Common/UI/UIScreen.h b/Common/UI/UIScreen.h index d667873e8cc1..4b113d3001e0 100644 --- a/Common/UI/UIScreen.h +++ b/Common/UI/UIScreen.h @@ -187,7 +187,7 @@ class SliderPopupScreen : public PopupScreen { disabled_ = *value_ < 0; } - const char *tag() const { return "SliderPopup"; } + const char *tag() const override { return "SliderPopup"; } Event OnChange; @@ -216,7 +216,7 @@ class SliderFloatPopupScreen : public PopupScreen { : PopupScreen(title, "OK", "Cancel"), units_(units), value_(value), originalValue_(*value), minValue_(minValue), maxValue_(maxValue), step_(step), changing_(false), liveUpdate_(liveUpdate) {} void CreatePopupContents(UI::ViewGroup *parent) override; - const char *tag() const { return "SliderFloatPopup"; } + const char *tag() const override { return "SliderFloatPopup"; } Event OnChange; @@ -245,7 +245,7 @@ class TextEditPopupScreen : public PopupScreen { : PopupScreen(title, "OK", "Cancel"), value_(value), placeholder_(placeholder), maxLen_(maxLen) {} virtual void CreatePopupContents(ViewGroup *parent) override; - const char *tag() const { return "TextEditPopup"; } + const char *tag() const override { return "TextEditPopup"; } Event OnChange; diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp index d361458111f9..06bf6513ff11 100644 --- a/GPU/Common/DrawEngineCommon.cpp +++ b/GPU/Common/DrawEngineCommon.cpp @@ -484,12 +484,12 @@ u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, return GE_VTYPE_TC_FLOAT | GE_VTYPE_COL_8888 | GE_VTYPE_NRM_FLOAT | GE_VTYPE_POS_FLOAT | (vertType & (GE_VTYPE_IDX_MASK | GE_VTYPE_THROUGH)); } -void DrawEngineCommon::ApplyFramebufferRead(bool *fboTexNeedsBind) { +void DrawEngineCommon::ApplyFramebufferRead(FBOTexState *fboTexState) { if (gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH)) { - *fboTexNeedsBind = false; + *fboTexState = FBO_TEX_READ_FRAMEBUFFER; } else { gpuStats.numCopiesForShaderBlend++; - *fboTexNeedsBind = true; + *fboTexState = FBO_TEX_COPY_BIND_TEX; } gstate_c.Dirty(DIRTY_SHADERBLEND); diff --git a/GPU/Common/DrawEngineCommon.h b/GPU/Common/DrawEngineCommon.h index a8997454d9a0..e6ba0b37d9d9 100644 --- a/GPU/Common/DrawEngineCommon.h +++ b/GPU/Common/DrawEngineCommon.h @@ -46,6 +46,12 @@ enum { TEX_SLOT_SPLINE_WEIGHTS_V = 6, }; +enum FBOTexState { + FBO_TEX_NONE, + FBO_TEX_COPY_BIND_TEX, + FBO_TEX_READ_FRAMEBUFFER, +}; + inline uint32_t GetVertTypeID(uint32_t vertType, int uvGenMode) { // As the decoder depends on the UVGenMode when we use UV prescale, we simply mash it // into the top of the verttype where there are unused bits. @@ -130,7 +136,7 @@ class DrawEngineCommon { // Vertex decoding void DecodeVertsStep(u8 *dest, int &i, int &decodedVerts); - void ApplyFramebufferRead(bool *fboTexNeedsBind); + void ApplyFramebufferRead(FBOTexState *fboTexState); inline int IndexSize(u32 vtype) const { const u32 indexType = (vtype & GE_VTYPE_IDX_MASK); diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index d37b4a772d5d..e2088731af95 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -134,10 +134,12 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu GELogicOp replaceLogicOpType = isModeClear ? GE_LOGIC_COPY : (GELogicOp)id.Bits(FS_BIT_REPLACE_LOGIC_OP, 4); bool replaceLogicOp = replaceLogicOpType != GE_LOGIC_COPY && compat.bitwiseOps; - bool readFramebuffer = replaceBlend == REPLACE_BLEND_READ_FRAMEBUFFER || colorWriteMask || replaceLogicOp; - bool readFramebufferTex = readFramebuffer && !gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH); + bool needFramebufferRead = replaceBlend == REPLACE_BLEND_READ_FRAMEBUFFER || colorWriteMask || replaceLogicOp; - bool needFragCoord = readFramebuffer || gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT); + bool fetchFramebuffer = needFramebufferRead && gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH); + bool readFramebufferTex = needFramebufferRead && !gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH); + + bool needFragCoord = readFramebufferTex || gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT); bool writeDepth = gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT); if (shaderDepalMode != ShaderDepalMode::OFF && !doTexture) { @@ -157,6 +159,11 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu if (readFramebufferTex) { WRITE(p, "layout (binding = 1) uniform sampler2D fbotex;\n"); + } else if (fetchFramebuffer) { + WRITE(p, "layout (input_attachment_index = 0, binding = 9) uniform subpassInput inputColor;\n"); + if (fragmentShaderFlags) { + *fragmentShaderFlags |= FragmentShaderFlags::INPUT_ATTACHMENT; + } } if (shaderDepalMode != ShaderDepalMode::OFF) { @@ -416,7 +423,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu if (!strcmp(compat.fragColor0, "fragColor0")) { const char *qualifierColor0 = "out"; - if (readFramebuffer && compat.lastFragData && !strcmp(compat.lastFragData, compat.fragColor0)) { + if (fetchFramebuffer && compat.lastFragData && !strcmp(compat.lastFragData, compat.fragColor0)) { qualifierColor0 = "inout"; } // Output the output color definitions. @@ -492,20 +499,26 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } // Two things read from the old framebuffer - shader replacement blending and bit-level masking. - if (readFramebuffer) { + if (readFramebufferTex) { if (compat.shaderLanguage == HLSL_D3D11) { WRITE(p, " vec4 destColor = fbotex.Load(int3((int)gl_FragCoord.x, (int)gl_FragCoord.y, 0));\n"); } else if (compat.shaderLanguage == HLSL_D3D9) { WRITE(p, " vec4 destColor = tex2D(fbotex, gl_FragCoord.xy * u_fbotexSize.xy);\n", compat.texture); - } else if (gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH)) { - // If we have EXT_shader_framebuffer_fetch / ARM_shader_framebuffer_fetch, we skip the blit. - // We can just read the prev value more directly. - WRITE(p, " lowp vec4 destColor = %s;\n", compat.lastFragData); } else if (!compat.texelFetch) { WRITE(p, " lowp vec4 destColor = %s(fbotex, gl_FragCoord.xy * u_fbotexSize.xy);\n", compat.texture); } else { WRITE(p, " lowp vec4 destColor = %s(fbotex, ivec2(gl_FragCoord.x, gl_FragCoord.y), 0);\n", compat.texelFetch); } + } else if (fetchFramebuffer) { + // If we have EXT_shader_framebuffer_fetch / ARM_shader_framebuffer_fetch, we skip the blit. + // We can just read the prev value more directly. + if (compat.shaderLanguage == GLSL_3xx) { + WRITE(p, " lowp vec4 destColor = %s;\n", compat.lastFragData); + } else if (compat.shaderLanguage == GLSL_VULKAN) { + WRITE(p, " lowp vec4 destColor = subpassLoad(inputColor);\n", compat.lastFragData); + } else { + _assert_msg_(false, "Need fetch destColor, but not a compatible language"); + } } if (isModeClear) { diff --git a/GPU/Common/FragmentShaderGenerator.h b/GPU/Common/FragmentShaderGenerator.h index 85c651cf8bf6..8f358fa7ff8e 100644 --- a/GPU/Common/FragmentShaderGenerator.h +++ b/GPU/Common/FragmentShaderGenerator.h @@ -42,7 +42,7 @@ struct FShaderID; // Can technically be deduced from the fragment shader ID, but this is safer. enum class FragmentShaderFlags : u32 { - FS_FLAG_INPUT_ATTACHMENT = 1, + INPUT_ATTACHMENT = 1, }; ENUM_CLASS_BITOPS(FragmentShaderFlags); diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index ff31455fae59..cba174d41671 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -292,9 +292,13 @@ SamplerCacheKey TextureCacheCommon::GetSamplingParams(int maxLevel, const TexCac SamplerCacheKey TextureCacheCommon::GetFramebufferSamplingParams(u16 bufferWidth, u16 bufferHeight) { SamplerCacheKey key = GetSamplingParams(0, nullptr); - // Kill any mipmapping settings, and reset min filtering. - int minFilt = gstate.texfilter & 0x7; - key.minFilt = minFilt & 1; + // In case auto max quality was on, restore min filt. Another fix for water in Outrun. + if (g_Config.iTexFiltering == TEX_FILTER_AUTO_MAX_QUALITY) { + int minFilt = gstate.texfilter & 0x7; + key.minFilt = minFilt & 1; + } + + // Kill any mipmapping settings. key.mipEnable = false; key.mipFilt = false; key.aniso = 0.0; diff --git a/GPU/D3D11/StateMappingD3D11.cpp b/GPU/D3D11/StateMappingD3D11.cpp index 0cee52a15586..f4ce888f7bd8 100644 --- a/GPU/D3D11/StateMappingD3D11.cpp +++ b/GPU/D3D11/StateMappingD3D11.cpp @@ -153,15 +153,16 @@ void DrawEngineD3D11::ApplyDrawState(int prim) { // We ignore the logicState on D3D since there's no support, the emulation of it is blend-and-shader only. if (pipelineState_.FramebufferRead()) { - bool fboTexNeedsBind = false; - ApplyFramebufferRead(&fboTexNeedsBind); + FBOTexState fboTexBindState = FBO_TEX_NONE; + ApplyFramebufferRead(&fboTexBindState); // The shader takes over the responsibility for blending, so recompute. ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState); - if (fboTexNeedsBind) { + if (fboTexBindState == FBO_TEX_COPY_BIND_TEX) { framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY); // No sampler required, we do a plain Load in the pixel shader. fboTexBound_ = true; + fboTexBindState = FBO_TEX_NONE; framebufferManager_->RebindFramebuffer("RebindFramebuffer - ApplyDrawState"); // Must dirty blend state here so we re-copy next time. Example: Lunar's spell effects. diff --git a/GPU/Directx9/DrawEngineDX9.h b/GPU/Directx9/DrawEngineDX9.h index a0ee23e60d1e..9ef5b37c650a 100644 --- a/GPU/Directx9/DrawEngineDX9.h +++ b/GPU/Directx9/DrawEngineDX9.h @@ -170,6 +170,8 @@ class DrawEngineDX9 : public DrawEngineCommon { // Hardware tessellation TessellationDataTransferDX9 *tessDataTransferDX9; + FBOTexState fboTexBindState_ = FBO_TEX_NONE; + int lastRenderStepId_ = -1; bool fboTexNeedsBind_ = false; diff --git a/GPU/Directx9/StateMappingDX9.cpp b/GPU/Directx9/StateMappingDX9.cpp index 2ebadb397416..0dfa352f0007 100644 --- a/GPU/Directx9/StateMappingDX9.cpp +++ b/GPU/Directx9/StateMappingDX9.cpp @@ -99,14 +99,14 @@ void DrawEngineDX9::ApplyDrawState(int prim) { if (!gstate.isModeClear()) { textureCache_->ApplyTexture(); - if (fboTexNeedsBind_) { + if (fboTexBindState_ = FBO_TEX_COPY_BIND_TEX) { // Note that this is positions, not UVs, that we need the copy from. framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY); // If we are rendering at a higher resolution, linear is probably best for the dest color. device_->SetSamplerState(1, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); device_->SetSamplerState(1, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); fboTexBound_ = true; - fboTexNeedsBind_ = false; + fboTexBindState_ = FBO_TEX_NONE; } // TODO: Test texture? @@ -133,20 +133,23 @@ void DrawEngineDX9::ApplyDrawState(int prim) { // We ignore the logicState on D3D since there's no support, the emulation of it is blend-and-shader only. if (pipelineState_.FramebufferRead()) { - bool fboTexNeedsBind = false; - ApplyFramebufferRead(&fboTexNeedsBind); + ApplyFramebufferRead(&fboTexBindState_); // The shader takes over the responsibility for blending, so recompute. ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState); - if (fboTexNeedsBind) { + if (fboTexBindState_ == FBO_TEX_COPY_BIND_TEX) { // Note that this is positions, not UVs, that we need the copy from. framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY); // If we are rendering at a higher resolution, linear is probably best for the dest color. device_->SetSamplerState(1, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); device_->SetSamplerState(1, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); fboTexBound_ = true; + fboTexBindState_ = FBO_TEX_NONE; dirtyRequiresRecheck_ |= DIRTY_BLEND_STATE; gstate_c.Dirty(DIRTY_BLEND_STATE); + } else if (fboTexBindState_ == FBO_TEX_READ_FRAMEBUFFER) { + // Not supported. + fboTexBindState_ = FBO_TEX_NONE; } dirtyRequiresRecheck_ |= DIRTY_FRAGMENTSHADER_STATE; diff --git a/GPU/GLES/StateMappingGLES.cpp b/GPU/GLES/StateMappingGLES.cpp index dbfc115a9184..cdcc9e5069a9 100644 --- a/GPU/GLES/StateMappingGLES.cpp +++ b/GPU/GLES/StateMappingGLES.cpp @@ -149,13 +149,14 @@ void DrawEngineGLES::ApplyDrawState(int prim) { GenericLogicState &logicState = pipelineState_.logicState; if (pipelineState_.FramebufferRead()) { - bool fboTexNeedsBind = false; - ApplyFramebufferRead(&fboTexNeedsBind); + FBOTexState fboTexBindState = FBO_TEX_NONE; + ApplyFramebufferRead(&fboTexBindState); // The shader takes over the responsibility for blending, so recompute. ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState); // We copy the framebuffer here, as doing so will wipe any blend state if we do it later. - if (fboTexNeedsBind) { + // fboTexNeedsBind_ won't be set if we can read directly from the target. + if (fboTexBindState == FBO_TEX_COPY_BIND_TEX) { // Note that this is positions, not UVs, that we need the copy from. framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY); // If we are rendering at a higher resolution, linear is probably best for the dest color. @@ -166,6 +167,9 @@ void DrawEngineGLES::ApplyDrawState(int prim) { // Must dirty blend state here so we re-copy next time. Example: Lunar's spell effects. dirtyRequiresRecheck_ |= DIRTY_BLEND_STATE; gstate_c.Dirty(DIRTY_BLEND_STATE); + } else if (fboTexBindState == FBO_TEX_READ_FRAMEBUFFER) { + // No action needed here. + fboTexBindState = FBO_TEX_NONE; } dirtyRequiresRecheck_ |= DIRTY_FRAGMENTSHADER_STATE; gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE); diff --git a/GPU/GPUState.h b/GPU/GPUState.h index cbfb58395d93..191a67037608 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -485,7 +485,8 @@ enum { // Free bit: 15 GPU_SUPPORTS_DEPTH_TEXTURE = FLAG_BIT(16), GPU_SUPPORTS_ACCURATE_DEPTH = FLAG_BIT(17), - // Free bits: 18-19 + GPU_SUPPORTS_FRAGMENT_SHADER_INTERLOCK = FLAG_BIT(18), + // Free bits: 19 GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH = FLAG_BIT(20), GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT = FLAG_BIT(21), GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT = FLAG_BIT(22), diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index e8ebd485d105..f992df082fa5 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -71,6 +71,7 @@ enum { DRAW_BINDING_TESS_STORAGE_BUF = 6, DRAW_BINDING_TESS_STORAGE_BUF_WU = 7, DRAW_BINDING_TESS_STORAGE_BUF_WV = 8, + DRAW_BINDING_INPUT_ATTACHMENT = 9, }; enum { @@ -94,7 +95,10 @@ DrawEngineVulkan::DrawEngineVulkan(Draw::DrawContext *draw) void DrawEngineVulkan::InitDeviceObjects() { // All resources we need for PSP drawing. Usually only bindings 0 and 2-4 are populated. - VkDescriptorSetLayoutBinding bindings[9]{}; + + // TODO: Make things more flexible, so we at least have specialized layouts for input attachments and tess. + // Note that it becomes a support matrix.. + VkDescriptorSetLayoutBinding bindings[10]{}; bindings[0].descriptorCount = 1; bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; bindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; @@ -132,6 +136,10 @@ void DrawEngineVulkan::InitDeviceObjects() { bindings[8].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; bindings[8].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; bindings[8].binding = DRAW_BINDING_TESS_STORAGE_BUF_WV; + bindings[9].descriptorCount = 1; + bindings[9].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; + bindings[9].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[9].binding = DRAW_BINDING_INPUT_ATTACHMENT; VulkanContext *vulkan = (VulkanContext *)draw_->GetNativeObject(Draw::NativeObject::CONTEXT); VkDevice device = vulkan->GetDevice(); @@ -145,13 +153,15 @@ void DrawEngineVulkan::InitDeviceObjects() { static constexpr int DEFAULT_DESC_POOL_SIZE = 512; std::vector dpTypes; - dpTypes.resize(3); + dpTypes.resize(4); dpTypes[0].descriptorCount = DEFAULT_DESC_POOL_SIZE * 3; dpTypes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; dpTypes[1].descriptorCount = DEFAULT_DESC_POOL_SIZE * 3; // Don't use these for tess anymore, need max three per set. dpTypes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; dpTypes[2].descriptorCount = DEFAULT_DESC_POOL_SIZE * 3; // TODO: Use a separate layout when no spline stuff is needed to reduce the need for these. dpTypes[2].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + dpTypes[3].descriptorCount = DEFAULT_DESC_POOL_SIZE; // TODO: Use a separate layout when no spline stuff is needed to reduce the need for these. + dpTypes[3].type = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; VkDescriptorPoolCreateInfo dp{ VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO }; // Don't want to mess around with individually freeing these. @@ -379,6 +389,7 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView key.base_ = base; key.light_ = light; key.bone_ = bone; + key.secondaryIsInputAttachment = boundSecondaryIsInputAttachment_; FrameData &frame = GetCurFrame(); // See if we already have this descriptor set cached. @@ -417,15 +428,15 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView } if (boundSecondary_) { - tex[1].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + tex[1].imageLayout = key.secondaryIsInputAttachment ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; tex[1].imageView = boundSecondary_; tex[1].sampler = samplerSecondaryNearest_; writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; writes[n].pNext = nullptr; - writes[n].dstBinding = DRAW_BINDING_2ND_TEXTURE; + writes[n].dstBinding = key.secondaryIsInputAttachment ? DRAW_BINDING_INPUT_ATTACHMENT : DRAW_BINDING_2ND_TEXTURE; writes[n].pImageInfo = &tex[1]; writes[n].descriptorCount = 1; - writes[n].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + writes[n].descriptorType = key.secondaryIsInputAttachment ? VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT : VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; writes[n].dstSet = desc; n++; } @@ -788,7 +799,7 @@ void DrawEngineVulkan::DoFlush() { lastRenderStepId_ = curRenderStepId; } - renderManager->BindPipeline(pipeline->pipeline, (PipelineFlags)pipeline->flags, pipelineLayout_); + renderManager->BindPipeline(pipeline->pipeline, pipeline->pipelineFlags, pipelineLayout_); if (pipeline != lastPipeline_) { if (lastPipeline_ && !(lastPipeline_->UsesBlendConstant() && pipeline->UsesBlendConstant())) { gstate_c.Dirty(DIRTY_BLEND_STATE); @@ -916,7 +927,7 @@ void DrawEngineVulkan::DoFlush() { lastRenderStepId_ = curRenderStepId; } - renderManager->BindPipeline(pipeline->pipeline, (PipelineFlags)pipeline->flags, pipelineLayout_); + renderManager->BindPipeline(pipeline->pipeline, pipeline->pipelineFlags, pipelineLayout_); if (pipeline != lastPipeline_) { if (lastPipeline_ && !lastPipeline_->UsesBlendConstant() && pipeline->UsesBlendConstant()) { gstate_c.Dirty(DIRTY_BLEND_STATE); diff --git a/GPU/Vulkan/DrawEngineVulkan.h b/GPU/Vulkan/DrawEngineVulkan.h index 531e05c4ed51..0de67940dec9 100644 --- a/GPU/Vulkan/DrawEngineVulkan.h +++ b/GPU/Vulkan/DrawEngineVulkan.h @@ -217,6 +217,8 @@ class DrawEngineVulkan : public DrawEngineCommon { // Secondary texture for shader blending VkImageView boundSecondary_ = VK_NULL_HANDLE; + bool boundSecondaryIsInputAttachment_ = false; + // CLUT texture for shader depal VkImageView boundDepal_ = VK_NULL_HANDLE; bool boundDepalSmoothed_ = false; @@ -234,6 +236,7 @@ class DrawEngineVulkan : public DrawEngineCommon { VkSampler sampler_; VkBuffer base_, light_, bone_; // All three UBO slots will be set to this. This will usually be identical // for all draws in a frame, except when the buffer has to grow. + bool secondaryIsInputAttachment; }; // We alternate between these. @@ -281,7 +284,7 @@ class DrawEngineVulkan : public DrawEngineCommon { VulkanDynamicState dynState_{}; int tessOffset_ = 0; - bool fboTexNeedsBind_ = false; + FBOTexState fboTexBindState_ = FBO_TEX_NONE; // Hardware tessellation TessellationDataTransferVulkan *tessDataTransferVulkan; diff --git a/GPU/Vulkan/FramebufferManagerVulkan.h b/GPU/Vulkan/FramebufferManagerVulkan.h index 0f5d7c4f532f..d3370fafb7a8 100644 --- a/GPU/Vulkan/FramebufferManagerVulkan.h +++ b/GPU/Vulkan/FramebufferManagerVulkan.h @@ -33,7 +33,7 @@ class VulkanPushBuffer; class FramebufferManagerVulkan : public FramebufferManagerCommon { public: - FramebufferManagerVulkan(Draw::DrawContext *draw); + explicit FramebufferManagerVulkan(Draw::DrawContext *draw); ~FramebufferManagerVulkan(); // If within a render pass, this will just issue a regular clear. If beginning a new render pass, diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index f96d282fc771..eed7c8b9d5a0 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -229,6 +229,11 @@ void GPU_Vulkan::CheckGPUFeatures() { features |= GPU_SUPPORTS_TEXTURE_FLOAT; features |= GPU_SUPPORTS_DEPTH_TEXTURE; + // through input attachments, when not broken. + if (draw_->GetDeviceCaps().framebufferFetchSupported) { + features |= GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH; + } + auto &enabledFeatures = vulkan->GetDeviceFeatures().enabled; if (enabledFeatures.depthClamp) { features |= GPU_SUPPORTS_DEPTH_CLAMP; diff --git a/GPU/Vulkan/PipelineManagerVulkan.cpp b/GPU/Vulkan/PipelineManagerVulkan.cpp index a88fe235a546..e950dfc45740 100644 --- a/GPU/Vulkan/PipelineManagerVulkan.cpp +++ b/GPU/Vulkan/PipelineManagerVulkan.cpp @@ -170,7 +170,7 @@ static std::string CutFromMain(std::string str) { } static VulkanPipeline *CreateVulkanPipeline(VulkanRenderManager *renderManager, VkPipelineCache pipelineCache, - VkPipelineLayout layout, const VulkanPipelineRasterStateKey &key, + VkPipelineLayout layout, PipelineFlags pipelineFlags, const VulkanPipelineRasterStateKey &key, const DecVtxFormat *decFmt, VulkanVertexShader *vs, VulkanFragmentShader *fs, bool useHwTransform, u32 variantBitmask) { VulkanPipeline *vulkanPipeline = new VulkanPipeline(); VKRGraphicsPipelineDesc *desc = &vulkanPipeline->desc; @@ -299,14 +299,14 @@ static VulkanPipeline *CreateVulkanPipeline(VulkanRenderManager *renderManager, VKRGraphicsPipeline *pipeline = renderManager->CreateGraphicsPipeline(desc, variantBitmask, "game"); vulkanPipeline->pipeline = pipeline; - vulkanPipeline->flags = 0; if (useBlendConstant) - vulkanPipeline->flags |= PIPELINE_FLAG_USES_BLEND_CONSTANT; + pipelineFlags |= PipelineFlags::USES_BLEND_CONSTANT; if (key.topology == VK_PRIMITIVE_TOPOLOGY_LINE_LIST || key.topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP) - vulkanPipeline->flags |= PIPELINE_FLAG_USES_LINES; + pipelineFlags |= PipelineFlags::USES_LINES; if (dss.depthTestEnable || dss.stencilTestEnable) { - vulkanPipeline->flags |= PIPELINE_FLAG_USES_DEPTH_STENCIL; + pipelineFlags |= PipelineFlags::USES_DEPTH_STENCIL; } + vulkanPipeline->pipelineFlags = pipelineFlags; return vulkanPipeline; } @@ -329,8 +329,13 @@ VulkanPipeline *PipelineManagerVulkan::GetOrCreatePipeline(VulkanRenderManager * if (iter) return iter; + PipelineFlags pipelineFlags = (PipelineFlags)0; + if (fs->Flags() & FragmentShaderFlags::INPUT_ATTACHMENT) { + pipelineFlags |= PipelineFlags::USES_INPUT_ATTACHMENT; + } + VulkanPipeline *pipeline = CreateVulkanPipeline( - renderManager, pipelineCache_, layout, + renderManager, pipelineCache_, layout, pipelineFlags, rasterKey, decFmt, vs, fs, useHwTransform, variantBitmask); pipelines_.Insert(key, pipeline); diff --git a/GPU/Vulkan/PipelineManagerVulkan.h b/GPU/Vulkan/PipelineManagerVulkan.h index af32aa81bd59..08907e3b3ee8 100644 --- a/GPU/Vulkan/PipelineManagerVulkan.h +++ b/GPU/Vulkan/PipelineManagerVulkan.h @@ -55,11 +55,12 @@ struct VulkanPipelineKey { struct VulkanPipeline { VKRGraphicsPipeline *pipeline; VKRGraphicsPipelineDesc desc; - int flags; // PipelineFlags enum above. + PipelineFlags pipelineFlags; // PipelineFlags enum above. - bool UsesBlendConstant() const { return (flags & PIPELINE_FLAG_USES_BLEND_CONSTANT) != 0; } - bool UsesLines() const { return (flags & PIPELINE_FLAG_USES_LINES) != 0; } - bool UsesDepthStencil() const { return (flags & PIPELINE_FLAG_USES_DEPTH_STENCIL) != 0; } + bool UsesBlendConstant() const { return (pipelineFlags & PipelineFlags::USES_BLEND_CONSTANT) != 0; } + bool UsesLines() const { return (pipelineFlags & PipelineFlags::USES_LINES) != 0; } + bool UsesDepthStencil() const { return (pipelineFlags & PipelineFlags::USES_DEPTH_STENCIL) != 0; } + bool UsesInputAttachment() const { return (pipelineFlags & PipelineFlags::USES_INPUT_ATTACHMENT) != 0; } u32 GetVariantsBitmask() const; }; diff --git a/GPU/Vulkan/StateMappingVulkan.cpp b/GPU/Vulkan/StateMappingVulkan.cpp index 6457b99ad966..290c7b010ce6 100644 --- a/GPU/Vulkan/StateMappingVulkan.cpp +++ b/GPU/Vulkan/StateMappingVulkan.cpp @@ -153,7 +153,7 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag GenericLogicState &logicState = pipelineState_.logicState; if (pipelineState_.FramebufferRead()) { - ApplyFramebufferRead(&fboTexNeedsBind_); + ApplyFramebufferRead(&fboTexBindState_); // The shader takes over the responsibility for blending, so recompute. // We might still end up using blend to write something to alpha. ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState); @@ -364,15 +364,23 @@ void DrawEngineVulkan::BindShaderBlendTex() { // TODO: At this point, we know if the vertices are full alpha or not. // Set the nearest/linear here (since we correctly know if alpha/color tests are needed)? if (!gstate.isModeClear()) { - if (fboTexNeedsBind_) { + if (fboTexBindState_ == FBO_TEX_COPY_BIND_TEX) { bool bindResult = framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY); _dbg_assert_(bindResult); boundSecondary_ = (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_TEXTURE1_IMAGEVIEW); + boundSecondaryIsInputAttachment_ = false; fboTexBound_ = true; - fboTexNeedsBind_ = false; + fboTexBindState_ = FBO_TEX_NONE; // Must dirty blend state here so we re-copy next time. Example: Lunar's spell effects. dirtyRequiresRecheck_ |= DIRTY_BLEND_STATE; + } else if (fboTexBindState_ == FBO_TEX_READ_FRAMEBUFFER) { + draw_->BindCurrentFramebufferForColorInput(); + boundSecondary_ = (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW); + boundSecondaryIsInputAttachment_ = true; + fboTexBindState_ = FBO_TEX_NONE; + } else { + boundSecondary_ = VK_NULL_HANDLE; } } } diff --git a/UI/InstallZipScreen.h b/UI/InstallZipScreen.h index a7073ca73f15..2f705b5510b6 100644 --- a/UI/InstallZipScreen.h +++ b/UI/InstallZipScreen.h @@ -30,7 +30,7 @@ class InstallZipScreen : public UIDialogScreenWithBackground { virtual void update() override; virtual bool key(const KeyInput &key) override; - const char *tag() const { return "install_zip"; } + const char *tag() const override { return "install_zip"; } protected: virtual void CreateViews() override;