Skip to content

Commit

Permalink
Use subpass dependencies to implement shader framebuffer read in Vulkan.
Browse files Browse the repository at this point in the history
  • Loading branch information
hrydgard committed Sep 2, 2022
1 parent 55beefe commit fd16769
Show file tree
Hide file tree
Showing 21 changed files with 202 additions and 73 deletions.
1 change: 0 additions & 1 deletion Common/GPU/ShaderWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ const char * const vulkan_glsl_preamble_fs =
"#version 450\n"
"#extension GL_ARB_separate_shader_objects : enable\n"
"#extension GL_ARB_shading_language_420pack : enable\n"
"#extension GL_ARB_conservative_depth : enable\n"
"#extension GL_ARB_shader_image_load_store : enable\n"
"#define splat3(x) vec3(x)\n"
"#define DISCARD discard\n"
Expand Down
2 changes: 1 addition & 1 deletion Common/GPU/Vulkan/VulkanBarrier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

void VulkanBarrier::Flush(VkCommandBuffer cmd) {
if (!imageBarriers_.empty()) {
vkCmdPipelineBarrier(cmd, srcStageMask_, dstStageMask_, 0, 0, nullptr, 0, nullptr, (uint32_t)imageBarriers_.size(), imageBarriers_.data());
vkCmdPipelineBarrier(cmd, srcStageMask_, dstStageMask_, dependencyFlags_, 0, nullptr, 0, nullptr, (uint32_t)imageBarriers_.size(), imageBarriers_.data());
}
imageBarriers_.clear();
srcStageMask_ = 0;
Expand Down
2 changes: 2 additions & 0 deletions Common/GPU/Vulkan/VulkanBarrier.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class VulkanBarrier {
) {
srcStageMask_ |= srcStageMask;
dstStageMask_ |= dstStageMask;
dependencyFlags_ |= VK_DEPENDENCY_BY_REGION_BIT;

VkImageMemoryBarrier imageBarrier;
imageBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
Expand Down Expand Up @@ -112,4 +113,5 @@ class VulkanBarrier {
VkPipelineStageFlags srcStageMask_ = 0;
VkPipelineStageFlags dstStageMask_ = 0;
std::vector<VkImageMemoryBarrier> imageBarriers_;
VkDependencyFlags dependencyFlags_ = 0;
};
67 changes: 61 additions & 6 deletions Common/GPU/Vulkan/VulkanQueueRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ void VulkanQueueRunner::DestroyDeviceObjects() {
}

void VulkanQueueRunner::InitBackbufferRenderPass() {
VkAttachmentDescription attachments[2];
VkAttachmentDescription attachments[2]{};
attachments[0].format = vulkan_->GetSwapchainFormat();
attachments[0].samples = VK_SAMPLE_COUNT_1_BIT;
attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
Expand All @@ -138,7 +138,6 @@ void VulkanQueueRunner::InitBackbufferRenderPass() {
attachments[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachments[0].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; // We don't want to preserve the backbuffer between frames so we really don't care.
attachments[0].finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; // We only render once to the backbuffer per frame so we can do this here.
attachments[0].flags = 0;

attachments[1].format = vulkan_->GetDeviceInfo().preferredDepthStencilFormat; // must use this same format later for the back depth buffer.
attachments[1].samples = VK_SAMPLE_COUNT_1_BIT;
Expand All @@ -148,7 +147,6 @@ void VulkanQueueRunner::InitBackbufferRenderPass() {
attachments[1].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachments[1].initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachments[1].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachments[1].flags = 0;

VkAttachmentReference color_reference{};
color_reference.attachment = 0;
Expand All @@ -172,6 +170,7 @@ void VulkanQueueRunner::InitBackbufferRenderPass() {

// For the built-in layout transitions.
VkSubpassDependency dep{};
dep.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
dep.srcSubpass = VK_SUBPASS_EXTERNAL;
dep.dstSubpass = 0;
dep.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
Expand Down Expand Up @@ -208,12 +207,16 @@ static VkAttachmentStoreOp ConvertStoreAction(VKRRenderPassStoreAction action) {
return VK_ATTACHMENT_STORE_OP_DONT_CARE; // avoid compiler warning
}

// Self-dependency: https://github.com/gpuweb/gpuweb/issues/442#issuecomment-547604827
// Also see https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-pipeline-barriers-subpass-self-dependencies
VkRenderPass VulkanQueueRunner::GetRenderPass(const RPKey &key) {
auto pass = renderPasses_.Get(key);
if (pass) {
return pass;
}

bool selfDependency = true || key.selfDependencyColor;

VkAttachmentDescription attachments[2] = {};
attachments[0].format = VK_FORMAT_R8G8B8A8_UNORM;
attachments[0].samples = VK_SAMPLE_COUNT_1_BIT;
Expand All @@ -237,7 +240,7 @@ VkRenderPass VulkanQueueRunner::GetRenderPass(const RPKey &key) {

VkAttachmentReference color_reference{};
color_reference.attachment = 0;
color_reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
color_reference.layout = selfDependency ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;

VkAttachmentReference depth_reference{};
depth_reference.attachment = 1;
Expand All @@ -246,8 +249,13 @@ VkRenderPass VulkanQueueRunner::GetRenderPass(const RPKey &key) {
VkSubpassDescription subpass{};
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpass.flags = 0;
subpass.inputAttachmentCount = 0;
subpass.pInputAttachments = nullptr;
if (selfDependency) {
subpass.inputAttachmentCount = 1;
subpass.pInputAttachments = &color_reference;
} else {
subpass.inputAttachmentCount = 0;
subpass.pInputAttachments = nullptr;
}
subpass.colorAttachmentCount = 1;
subpass.pColorAttachments = &color_reference;
subpass.pResolveAttachments = nullptr;
Expand All @@ -261,13 +269,51 @@ VkRenderPass VulkanQueueRunner::GetRenderPass(const RPKey &key) {
rp.subpassCount = 1;
rp.pSubpasses = &subpass;

// must be declared outside the "if".
VkSubpassDependency self_dep { VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO };
if (selfDependency) {
self_dep.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
self_dep.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
self_dep.dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
self_dep.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
self_dep.dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
self_dep.srcSubpass = 0;
self_dep.dstSubpass = 0;
rp.dependencyCount = 1;
rp.pDependencies = &self_dep;
}

VkResult res = vkCreateRenderPass(vulkan_->GetDevice(), &rp, nullptr, &pass);
_assert_(res == VK_SUCCESS);
_assert_(pass != VK_NULL_HANDLE);
renderPasses_.Insert(key, pass);
return pass;
}

// Must match the subpass self-dependency declared above.
void VulkanQueueRunner::SelfDependencyBarrier(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier) {
if (aspect & VK_IMAGE_ASPECT_COLOR_BIT) {
VkAccessFlags srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
VkAccessFlags dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
VkPipelineStageFlags srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
VkPipelineStageFlags dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
recordBarrier->TransitionImage(
img.image,
0,
1,
aspect,
VK_IMAGE_LAYOUT_GENERAL,
VK_IMAGE_LAYOUT_GENERAL,
srcAccessMask,
dstAccessMask,
srcStageMask,
dstStageMask
);
} else {
_assert_msg_(false, "Depth self-dependencies not yet supported");
}
}

void VulkanQueueRunner::PreprocessSteps(std::vector<VKRStep *> &steps) {
// Optimizes renderpasses, then sequences them.
// Planned optimizations:
Expand Down Expand Up @@ -1263,6 +1309,15 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c
break;
}

case VKRRenderCommand::SELF_DEPENDENCY_BARRIER:
{
_assert_(step.render.selfDependency);
VulkanBarrier barrier;
SelfDependencyBarrier(step.render.framebuffer->color, VK_IMAGE_ASPECT_COLOR_BIT, &barrier);
barrier.Flush(cmd);
break;
}

case VKRRenderCommand::PUSH_CONSTANTS:
vkCmdPushConstants(cmd, pipelineLayout, c.push.stages, c.push.offset, c.push.size, c.push.data);
break;
Expand Down
9 changes: 9 additions & 0 deletions Common/GPU/Vulkan/VulkanQueueRunner.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ enum class VKRRenderCommand : uint8_t {
DRAW,
DRAW_INDEXED,
PUSH_CONSTANTS,
SELF_DEPENDENCY_BARRIER,
NUM_RENDER_COMMANDS,
};

Expand Down Expand Up @@ -166,6 +167,7 @@ struct VKRStep {
VKRRenderPassStoreAction depthStore;
VKRRenderPassStoreAction stencilStore;
u8 clearStencil;
bool selfDependency;
uint32_t clearColor;
float clearDepth;
int numDraws;
Expand Down Expand Up @@ -244,6 +246,11 @@ class VulkanQueueRunner {
VKRRenderPassStoreAction colorStoreAction;
VKRRenderPassStoreAction depthStoreAction;
VKRRenderPassStoreAction stencilStoreAction;

// Sets up a renderpass that can read from the texture being rendered to by using an input attachment.
// Can be used for limited programmable blending with no additional extensions, or unlimited programmable
// blending using VK_ARM_rasterization_order_attachment_access or VK_EXT_fragment_shader_interlock.
bool selfDependencyColor;
};

VkRenderPass GetRenderPass(const RPKey &key);
Expand Down Expand Up @@ -297,6 +304,8 @@ class VulkanQueueRunner {
static void SetupTransitionToTransferSrc(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier);
static void SetupTransitionToTransferDst(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier);

static void SelfDependencyBarrier(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier);

VulkanContext *vulkan_;

VkFramebuffer backbuffer_ = VK_NULL_HANDLE;
Expand Down
8 changes: 7 additions & 1 deletion Common/GPU/Vulkan/VulkanRenderManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int
// Strictly speaking we don't yet need VK_IMAGE_USAGE_SAMPLED_BIT for depth buffers since we do not yet sample depth buffers.
ici.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
if (color) {
ici.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
ici.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
} else {
ici.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
}
Expand Down Expand Up @@ -642,6 +642,12 @@ void VulkanRenderManager::EndCurRenderStep() {
}
}

void VulkanRenderManager::BindCurrentFramebufferAsInputAttachment0(VkImageAspectFlags aspectBits) {
_dbg_assert_(curRenderStep_);
curRenderStep_->render.selfDependency = true;
curRenderStep_->commands.push_back(VkRenderData{ VKRRenderCommand::SELF_DEPENDENCY_BARRIER });
}

void VulkanRenderManager::BindFramebufferAsRenderTarget(VKRFramebuffer *fb, VKRRenderPassLoadAction color, VKRRenderPassLoadAction depth, VKRRenderPassLoadAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) {
_dbg_assert_(insideFrame_);
// Eliminate dupes (bind of the framebuffer we already are rendering to), instantly convert to a clear if possible.
Expand Down
2 changes: 2 additions & 0 deletions Common/GPU/Vulkan/VulkanRenderManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,8 @@ class VulkanRenderManager {
// as the other backends, even though there's no actual binding happening here.
VkImageView BindFramebufferAsTexture(VKRFramebuffer *fb, int binding, VkImageAspectFlags aspectBits, int attachment);

void BindCurrentFramebufferAsInputAttachment0(VkImageAspectFlags aspectBits);

bool CopyFramebufferToMemorySync(VKRFramebuffer *src, VkImageAspectFlags aspectBits, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag);
void CopyImageToMemorySync(VkImage image, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag);

Expand Down
70 changes: 40 additions & 30 deletions Common/GPU/Vulkan/thin3d_vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -391,9 +391,10 @@ class VKContext : public DrawContext {
// These functions should be self explanatory.
void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp, const char *tag) override;
Framebuffer *GetCurrentRenderTarget() override {
return curFramebuffer_;
return (Framebuffer *)curFramebuffer_.ptr;
}
void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) override;
void BindCurrentFramebufferForColorInput() override;

void GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) override;

Expand Down Expand Up @@ -462,34 +463,7 @@ class VKContext : public DrawContext {
std::vector<std::string> GetFeatureList() const override;
std::vector<std::string> GetExtensionList() const override;

uint64_t GetNativeObject(NativeObject obj, void *srcObject) override {
switch (obj) {
case NativeObject::CONTEXT:
return (uint64_t)vulkan_;
case NativeObject::FRAMEBUFFER_RENDERPASS:
// Return a representative renderpass.
return (uint64_t)renderManager_.GetFramebufferRenderPass();
case NativeObject::BACKBUFFER_RENDERPASS:
return (uint64_t)renderManager_.GetBackbufferRenderPass();
case NativeObject::COMPATIBLE_RENDERPASS:
return (uint64_t)renderManager_.GetCompatibleRenderPass();
case NativeObject::INIT_COMMANDBUFFER:
return (uint64_t)renderManager_.GetInitCmd();
case NativeObject::BOUND_TEXTURE0_IMAGEVIEW:
return (uint64_t)boundImageView_[0];
case NativeObject::BOUND_TEXTURE1_IMAGEVIEW:
return (uint64_t)boundImageView_[1];
case NativeObject::RENDER_MANAGER:
return (uint64_t)(uintptr_t)&renderManager_;
case NativeObject::NULL_IMAGEVIEW:
return (uint64_t)GetNullTexture()->GetImageView();
case NativeObject::TEXTURE_VIEW:
return (uint64_t)(((VKTexture *)srcObject)->GetImageView());
default:
Crash();
return 0;
}
}
uint64_t GetNativeObject(NativeObject obj, void *srcObject) override;

void HandleEvent(Event ev, int width, int height, void *param1, void *param2) override;

Expand Down Expand Up @@ -518,7 +492,7 @@ class VKContext : public DrawContext {
VkDescriptorSetLayout descriptorSetLayout_ = VK_NULL_HANDLE;
VkPipelineLayout pipelineLayout_ = VK_NULL_HANDLE;
VkPipelineCache pipelineCache_ = VK_NULL_HANDLE;
AutoRef<Framebuffer> curFramebuffer_;
AutoRef<VKFramebuffer> curFramebuffer_;

VkDevice device_;
VkQueue queue_;
Expand Down Expand Up @@ -795,6 +769,7 @@ VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit)
caps_.fragmentShaderInt32Supported = true;
caps_.textureNPOTFullySupported = true;
caps_.fragmentShaderDepthWriteSupported = true;
caps_.framebufferFetchSupported = true; // Limited, through input attachments and self-dependencies.

auto deviceProps = vulkan->GetPhysicalDeviceProperties(vulkan_->GetCurrentPhysicalDeviceIndex()).properties;
switch (deviceProps.vendorID) {
Expand Down Expand Up @@ -1581,6 +1556,10 @@ void VKContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChanne
boundImageView_[binding] = renderManager_.BindFramebufferAsTexture(fb->GetFB(), binding, aspect, attachment);
}

void VKContext::BindCurrentFramebufferForColorInput() {
renderManager_.BindCurrentFramebufferAsInputAttachment0(VK_IMAGE_ASPECT_COLOR_BIT);
}

void VKContext::GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) {
VKFramebuffer *fb = (VKFramebuffer *)fbo;
if (fb) {
Expand Down Expand Up @@ -1621,4 +1600,35 @@ void VKContext::InvalidateFramebuffer(FBInvalidationStage stage, uint32_t channe
}
}

uint64_t VKContext::GetNativeObject(NativeObject obj, void *srcObject) {
switch (obj) {
case NativeObject::CONTEXT:
return (uint64_t)vulkan_;
case NativeObject::FRAMEBUFFER_RENDERPASS:
// Return a representative renderpass.
return (uint64_t)renderManager_.GetFramebufferRenderPass();
case NativeObject::BACKBUFFER_RENDERPASS:
return (uint64_t)renderManager_.GetBackbufferRenderPass();
case NativeObject::COMPATIBLE_RENDERPASS:
return (uint64_t)renderManager_.GetCompatibleRenderPass();
case NativeObject::INIT_COMMANDBUFFER:
return (uint64_t)renderManager_.GetInitCmd();
case NativeObject::BOUND_TEXTURE0_IMAGEVIEW:
return (uint64_t)boundImageView_[0];
case NativeObject::BOUND_TEXTURE1_IMAGEVIEW:
return (uint64_t)boundImageView_[1];
case NativeObject::RENDER_MANAGER:
return (uint64_t)(uintptr_t)&renderManager_;
case NativeObject::NULL_IMAGEVIEW:
return (uint64_t)GetNullTexture()->GetImageView();
case NativeObject::TEXTURE_VIEW:
return (uint64_t)(((VKTexture *)srcObject)->GetImageView());
case NativeObject::BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW:
return (uint64_t)curFramebuffer_->GetFB()->color.imageView;
default:
Crash();
return 0;
}
}

} // namespace Draw
4 changes: 4 additions & 0 deletions Common/GPU/thin3d.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ enum class NativeObject {
INIT_COMMANDBUFFER,
BOUND_TEXTURE0_IMAGEVIEW,
BOUND_TEXTURE1_IMAGEVIEW,
BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW,
RENDER_MANAGER,
TEXTURE_VIEW,
NULL_IMAGEVIEW,
Expand Down Expand Up @@ -653,6 +654,9 @@ class DrawContext {
// binding must be < MAX_TEXTURE_SLOTS (0, 1 are okay if it's 2).
virtual void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) = 0;

// Framebuffer fetch / input attachment support, needs to be explicit in Vulkan.
virtual void BindCurrentFramebufferForColorInput() {}

// deprecated, only used by D3D9
virtual uintptr_t GetFramebufferAPITexture(Framebuffer *fbo, int channelBits, int attachment) {
return 0;
Expand Down
6 changes: 3 additions & 3 deletions GPU/Common/DrawEngineCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -483,12 +483,12 @@ u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr,
return GE_VTYPE_TC_FLOAT | GE_VTYPE_COL_8888 | GE_VTYPE_NRM_FLOAT | GE_VTYPE_POS_FLOAT | (vertType & (GE_VTYPE_IDX_MASK | GE_VTYPE_THROUGH));
}

void DrawEngineCommon::ApplyFramebufferRead(bool *fboTexNeedsBind) {
void DrawEngineCommon::ApplyFramebufferRead(FBOTexState *fboTexState) {
if (gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH)) {
*fboTexNeedsBind = false;
*fboTexState = FBO_TEX_READ_FRAMEBUFFER;
} else {
gpuStats.numCopiesForShaderBlend++;
*fboTexNeedsBind = true;
*fboTexState = FBO_TEX_COPY_BIND_TEX;
}

gstate_c.Dirty(DIRTY_SHADERBLEND);
Expand Down
Loading

0 comments on commit fd16769

Please sign in to comment.