From 65843152b5770b5f29b345aea0aece525af3ff94 Mon Sep 17 00:00:00 2001 From: Bradley Austin Davis Date: Mon, 5 Feb 2024 13:26:02 -0800 Subject: [PATCH] fixup! fixup! fixup! Fix barrier issues in computecloth --- examples/computecloth/computecloth.cpp | 138 ++++++++++++------------- 1 file changed, 67 insertions(+), 71 deletions(-) diff --git a/examples/computecloth/computecloth.cpp b/examples/computecloth/computecloth.cpp index e0a74b776..f26bffcc6 100644 --- a/examples/computecloth/computecloth.cpp +++ b/examples/computecloth/computecloth.cpp @@ -38,6 +38,7 @@ class VulkanExample : public VulkanExampleBase struct Cloth { glm::uvec2 gridsize{ 60, 60 }; glm::vec2 size{ 5.0f, 5.0f }; + uint32_t numParticles{ gridsize.x * gridsize.y }; } cloth; // We put the resource "types" into structs to make this sample easier to understand @@ -370,10 +371,8 @@ class VulkanExample : public VulkanExampleBase } // Setup and fill the shader storage buffers containing the particles - // These buffers are used as shader storage buffers in the compute shader (to update them) and as vertex input in the vertex shader (to display them) - void prepareStorageBuffers() - { - std::vector particleBuffer(cloth.gridsize.x * cloth.gridsize.y); + std::vector populateParticleBuffer() { + std::vector particleBuffer{ cloth.numParticles }; float dx = cloth.size.x / (cloth.gridsize.x - 1); float dy = cloth.size.y / (cloth.gridsize.y - 1); @@ -389,21 +388,14 @@ class VulkanExample : public VulkanExampleBase particleBuffer[i + j * cloth.gridsize.y].uv = glm::vec4(1.0f - du * i, dv * j, 0.0f, 0.0f); } } + return particleBuffer; + } - VkDeviceSize storageBufferSize = particleBuffer.size() * sizeof(Particle); - - // Staging - // SSBO won't be changed on the host after upload so copy to device local memory - - vks::Buffer stagingBuffer; - - vulkanDevice->createBuffer( - VK_BUFFER_USAGE_TRANSFER_SRC_BIT, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, - &stagingBuffer, - storageBufferSize, - particleBuffer.data()); - + // Allocate the shader storage buffers containing the particles (but do not populate them yet) + // The "input" buffer is used on the compute queue only, while the "output" buffer is + // used as shader storage buffers in the compute shader (to update them) and as vertex input in the vertex shader (to display them) + void prepareStorageBuffers() { + VkDeviceSize storageBufferSize = cloth.gridsize.x * cloth.gridsize.y * sizeof(Particle); // storageBuffers.input is used as an SSBO and only on the compute queue as an intermediate location for iterative computation vulkanDevice->createBuffer( VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT, @@ -418,37 +410,64 @@ class VulkanExample : public VulkanExampleBase &storageBuffers.output, storageBufferSize); - // Copy from staging buffer - VkCommandBuffer copyCmd = vulkanDevice->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true); - VkBufferCopy copyRegion = {}; - copyRegion.size = storageBufferSize; - vkCmdCopyBuffer(copyCmd, stagingBuffer.buffer, storageBuffers.input.buffer, 1, ©Region); - vkCmdCopyBuffer(copyCmd, stagingBuffer.buffer, storageBuffers.output.buffer, 1, ©Region); - - // Add an initial release barrier to the graphics queue, - // so that when the compute command buffer executes for the first time - // it doesn't complain about a lack of a corresponding "release" to its "acquire" if (dedicatedComputeQueue) { - std::array bufferBarriers; - bufferBarriers[0] = vks::initializers::bufferMemoryBarrier(); - bufferBarriers[0].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - bufferBarriers[0].dstAccessMask = 0; - bufferBarriers[0].srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics; - bufferBarriers[0].dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; - bufferBarriers[0].size = VK_WHOLE_SIZE; - bufferBarriers[0].buffer = storageBuffers.input.buffer; - bufferBarriers[1] = bufferBarriers[0]; - bufferBarriers[1].buffer = storageBuffers.output.buffer; + VkCommandBuffer releaseCmd = vulkanDevice->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true); - vkCmdPipelineBarrier(copyCmd, - VK_PIPELINE_STAGE_TRANSFER_BIT, + VkBufferMemoryBarrier bufferBarrier; + bufferBarrier = vks::initializers::bufferMemoryBarrier(); + bufferBarrier.srcAccessMask = 0; + bufferBarrier.dstAccessMask = 0; + bufferBarrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics; + bufferBarrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; + bufferBarrier.size = VK_WHOLE_SIZE; + bufferBarrier.buffer = storageBuffers.output.buffer; + // This implicitly takes ownership of the output buffer by the graphics queue and sends a release barrier that + // will pair up with the first compute frame acquire barrier + vkCmdPipelineBarrier( + releaseCmd, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_FLAGS_NONE, 0, nullptr, - 2, bufferBarriers.data(), + 1, &bufferBarrier, 0, nullptr); + vulkanDevice->flushCommandBuffer(releaseCmd, queue, true); } - vulkanDevice->flushCommandBuffer(copyCmd, queue, true); + + // Two indices per face plus a restart primitive at the end + uint32_t indexRowCount = (cloth.gridsize.x * 2) + 1; + uint32_t indexBufferSize = indexRowCount * (cloth.gridsize.y - 1) * sizeof(uint32_t); + + vulkanDevice->createBuffer( + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + &graphics.indices, + indexBufferSize); + } + + void populateBuffers() { + std::vector particleBuffer = populateParticleBuffer(); + + // Staging + // SSBO won't be changed on the host after upload so copy to device local memory + assert(storageBuffers.input.size == storageBuffers.output.size); + VkDeviceSize storageBufferSize = storageBuffers.input.size; + + vks::Buffer stagingBuffer; + + vulkanDevice->createBuffer( + VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + &stagingBuffer, + storageBufferSize, + particleBuffer.data()); + + // Copy from staging buffer to the input buffer (on the compute queue, so that it takes ownership of the input buffer) + VkCommandBuffer copyCmd = vulkanDevice->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, compute.commandPool, true); + VkBufferCopy copyRegion = {}; + copyRegion.size = storageBufferSize; + vkCmdCopyBuffer(copyCmd, stagingBuffer.buffer, storageBuffers.input.buffer, 1, ©Region); + vulkanDevice->flushCommandBuffer(copyCmd, compute.queue, compute.commandPool, true); stagingBuffer.destroy(); @@ -462,26 +481,22 @@ class VulkanExample : public VulkanExampleBase // Primitive restart (signaled by special value 0xFFFFFFFF) indices.push_back(0xFFFFFFFF); } - uint32_t indexBufferSize = static_cast(indices.size()) * sizeof(uint32_t); + + assert(graphics.indices.size == static_cast(indices.size()) * sizeof(uint32_t)); indexCount = static_cast(indices.size()); vulkanDevice->createBuffer( VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, &stagingBuffer, - indexBufferSize, + graphics.indices.size, indices.data()); - vulkanDevice->createBuffer( - VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - &graphics.indices, - indexBufferSize); // Copy from staging buffer copyCmd = vulkanDevice->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true); copyRegion = {}; - copyRegion.size = indexBufferSize; + copyRegion.size = graphics.indices.size; vkCmdCopyBuffer(copyCmd, stagingBuffer.buffer, graphics.indices.buffer, 1, ©Region); vulkanDevice->flushCommandBuffer(copyCmd, queue, true); @@ -664,27 +679,6 @@ class VulkanExample : public VulkanExampleBase VK_CHECK_RESULT(vkCreateSemaphore(device, &semaphoreCreateInfo, nullptr, &compute.semaphores.ready)); VK_CHECK_RESULT(vkCreateSemaphore(device, &semaphoreCreateInfo, nullptr, &compute.semaphores.complete)); - // Acquire the input buffer from the graphics queue explicitly here. This is the only time we need to do this - // as the input buffer doesn't move between queues after initialization. Note, this wouldn't be necessary - // if we created the input buffer on the compute queue in the first place, but the order of initialization - // means both storage buffers are created on the graphics queue. - // There are alternative paths, but all would be complicated by the fact that we need the input buffer - // populated with the initial data, and the output buffer to be initially owned by the graphics queue with a - // pending release barrier to the compute queue, to line up with the first compute dispatch. - VkCommandBuffer acquireCmd = vulkanDevice->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, compute.commandPool, true); - - VkBufferMemoryBarrier bufferBarrier = vks::initializers::bufferMemoryBarrier(); - bufferBarrier.srcAccessMask = 0; - bufferBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - bufferBarrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics; - bufferBarrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; - bufferBarrier.size = VK_WHOLE_SIZE; - bufferBarrier.buffer = storageBuffers.input.buffer; - // Explicitly acquire the storageBuffer.input buffer (the output buffer will ping-pong back and forth so it's acquired as part of the main compute command buffer) - vkCmdPipelineBarrier(acquireCmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_FLAGS_NONE, 0, nullptr, 1, &bufferBarrier, 0, nullptr); - - vulkanDevice->flushCommandBuffer(acquireCmd, compute.queue, compute.commandPool, true); - // Build a single command buffer containing the compute dispatch commands buildComputeCommandBuffer(); } @@ -781,6 +775,8 @@ class VulkanExample : public VulkanExampleBase prepareStorageBuffers(); prepareGraphics(); prepareCompute(); + // Now that the compute queue exists we can populate all the buffers on their corresponding queues + populateBuffers(); prepared = true; }