From 60889767bfac869904558a7dfd81b0500835ce40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 25 Mar 2019 21:38:01 +0100 Subject: [PATCH] Vulkan: Request cached memory for readbacks. First try coherent too but also support non-coherent cached memory. Should speed up readbacks slightly. This is just a common sense optimization, I haven't measured it. Writes to coherent non-cached memory is OK due to hardware write combining, but for reads you really want cached to avoid a memory transaction for every single read (instead reading full cache lines). --- ext/native/thin3d/VulkanQueueRunner.cpp | 25 ++++++++++++++++++++++--- ext/native/thin3d/VulkanQueueRunner.h | 1 + 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/ext/native/thin3d/VulkanQueueRunner.cpp b/ext/native/thin3d/VulkanQueueRunner.cpp index 9ca8d67efc3e..b290cf8ba464 100644 --- a/ext/native/thin3d/VulkanQueueRunner.cpp +++ b/ext/native/thin3d/VulkanQueueRunner.cpp @@ -53,9 +53,17 @@ void VulkanQueueRunner::ResizeReadbackBuffer(VkDeviceSize requiredSize) { VkMemoryAllocateInfo allocInfo{ VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; allocInfo.allocationSize = reqs.size; - VkFlags typeReqs = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - bool success = vulkan_->MemoryTypeFromProperties(reqs.memoryTypeBits, typeReqs, &allocInfo.memoryTypeIndex); - _assert_(success); + // For speedy readbacks, we want the CPU cache to be enabled. However on most hardware we then have to + // sacrifice coherency, which means manual flushing. But try to find such memory first! + VkFlags typeReqs = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + if (vulkan_->MemoryTypeFromProperties(reqs.memoryTypeBits, typeReqs, &allocInfo.memoryTypeIndex)) { + readbackBufferIsCoherent_ = true; + } else { + typeReqs = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + bool success = vulkan_->MemoryTypeFromProperties(reqs.memoryTypeBits, typeReqs, &allocInfo.memoryTypeIndex); + _assert_(success); + readbackBufferIsCoherent_ = false; + } VkResult res = vkAllocateMemory(device, &allocInfo, nullptr, &readbackMemory_); if (res != VK_SUCCESS) { @@ -1288,6 +1296,7 @@ void VulkanQueueRunner::PerformReadbackImage(const VKRStep &step, VkCommandBuffe VK_ACCESS_TRANSFER_READ_BIT, VK_ACCESS_SHADER_READ_BIT); // NOTE: Can't read the buffer using the CPU here - need to sync first. + // Doing that will also act like a heavyweight barrier ensuring that device writes are visible on the host. } void VulkanQueueRunner::CopyReadbackBuffer(int width, int height, Draw::DataFormat srcFormat, Draw::DataFormat destFormat, int pixelStride, uint8_t *pixels) { @@ -1299,10 +1308,20 @@ void VulkanQueueRunner::CopyReadbackBuffer(int width, int height, Draw::DataForm const size_t srcPixelSize = DataFormatSizeInBytes(srcFormat); VkResult res = vkMapMemory(vulkan_->GetDevice(), readbackMemory_, 0, width * height * srcPixelSize, 0, &mappedData); + if (!readbackBufferIsCoherent_) { + VkMappedMemoryRange range{}; + range.memory = readbackMemory_; + range.offset = 0; + range.size = width * height * srcPixelSize; + vkInvalidateMappedMemoryRanges(vulkan_->GetDevice(), 1, &range); + } + if (res != VK_SUCCESS) { ELOG("CopyReadbackBuffer: vkMapMemory failed! result=%d", (int)res); return; } + + // TODO: Perform these conversions in a compute shader on the GPU. if (srcFormat == Draw::DataFormat::R8G8B8A8_UNORM) { ConvertFromRGBA8888(pixels, (const uint8_t *)mappedData, pixelStride, width, width, height, destFormat); } else if (srcFormat == Draw::DataFormat::B8G8R8A8_UNORM) { diff --git a/ext/native/thin3d/VulkanQueueRunner.h b/ext/native/thin3d/VulkanQueueRunner.h index cfcd4c98fc47..911ef1ab0bee 100644 --- a/ext/native/thin3d/VulkanQueueRunner.h +++ b/ext/native/thin3d/VulkanQueueRunner.h @@ -261,6 +261,7 @@ class VulkanQueueRunner { VkDeviceMemory readbackMemory_ = VK_NULL_HANDLE; VkBuffer readbackBuffer_ = VK_NULL_HANDLE; VkDeviceSize readbackBufferSize_ = 0; + bool readbackBufferIsCoherent_ = false; // TODO: Enable based on compat.ini. uint32_t hacksEnabled_ = 0;