From b3e6b81e43b9eceaf65a7a17f591d470fc345faf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 5 Feb 2023 16:59:23 +0100 Subject: [PATCH 1/6] Implement delayed depth readbacks for Vulkan only --- Common/GPU/Vulkan/VulkanFrameData.cpp | 13 +++ Common/GPU/Vulkan/VulkanFrameData.h | 25 ++++- Common/GPU/Vulkan/VulkanQueueRunner.cpp | 115 ++++++++++++++-------- Common/GPU/Vulkan/VulkanQueueRunner.h | 13 +-- Common/GPU/Vulkan/VulkanRenderManager.cpp | 10 +- GPU/Common/FramebufferManagerCommon.cpp | 10 +- GPU/GPU.h | 2 + GPU/GPUCommon.cpp | 3 +- 8 files changed, 136 insertions(+), 55 deletions(-) diff --git a/Common/GPU/Vulkan/VulkanFrameData.cpp b/Common/GPU/Vulkan/VulkanFrameData.cpp index 192fe1f28ac8..90d2c434839c 100644 --- a/Common/GPU/Vulkan/VulkanFrameData.cpp +++ b/Common/GPU/Vulkan/VulkanFrameData.cpp @@ -4,6 +4,13 @@ #include "Common/Log.h" #include "Common/StringUtils.h" +void CachedReadback::Destroy(VulkanContext *vulkan) { + if (buffer) { + vulkan->Delete().QueueDeleteBufferAllocation(buffer, allocation); + } + bufferSize = 0; +} + void FrameData::Init(VulkanContext *vulkan, int index) { this->index = index; VkDevice device = vulkan->GetDevice(); @@ -48,6 +55,12 @@ void FrameData::Destroy(VulkanContext *vulkan) { vkDestroyCommandPool(device, cmdPoolMain, nullptr); vkDestroyFence(device, fence, nullptr); vkDestroyQueryPool(device, profile.queryPool, nullptr); + + readbacks_.IterateMut([=](const ReadbackKey &key, CachedReadback *value) { + value->Destroy(vulkan); + delete value; + }); + readbacks_.Clear(); } void FrameData::AcquireNextImage(VulkanContext *vulkan, FrameDataShared &shared) { diff --git a/Common/GPU/Vulkan/VulkanFrameData.h b/Common/GPU/Vulkan/VulkanFrameData.h index 148dc66338ff..0e1344f24e85 100644 --- a/Common/GPU/Vulkan/VulkanFrameData.h +++ b/Common/GPU/Vulkan/VulkanFrameData.h @@ -6,6 +6,7 @@ #include #include "Common/GPU/Vulkan/VulkanContext.h" +#include "Common/Data/Collections/Hashmaps.h" enum { MAX_TIMESTAMP_QUERIES = 128, @@ -25,6 +26,23 @@ struct QueueProfileContext { double cpuEndTime; }; +class VKRFramebuffer; + +struct ReadbackKey { + const VKRFramebuffer *framebuf; + int width; + int height; +}; + +struct CachedReadback { + VkBuffer buffer; + VmaAllocation allocation; + VkDeviceSize bufferSize; + bool isCoherent; + + void Destroy(VulkanContext *vulkan); +}; + struct FrameDataShared { // Permanent objects VkSemaphore acquireSemaphore = VK_NULL_HANDLE; @@ -77,6 +95,11 @@ struct FrameData { QueueProfileContext profile; bool profilingEnabled_ = false; + // Async readback cache. + DenseHashMap readbacks_; + + FrameData() : readbacks_(8) {} + void Init(VulkanContext *vulkan, int index); void Destroy(VulkanContext *vulkan); @@ -91,5 +114,5 @@ struct FrameData { private: // Metadata for logging etc - int index; + int index = -1; }; diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.cpp b/Common/GPU/Vulkan/VulkanQueueRunner.cpp index a28f54ba0a62..3b2bbdd9be28 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.cpp +++ b/Common/GPU/Vulkan/VulkanQueueRunner.cpp @@ -69,7 +69,8 @@ void VulkanQueueRunner::CreateDeviceObjects() { void VulkanQueueRunner::DestroyDeviceObjects() { INFO_LOG(G3D, "VulkanQueueRunner::DestroyDeviceObjects"); - DestroyReadbackBuffer(); + + syncReadback_.Destroy(vulkan_); renderPasses_.IterateMut([&](const RPKey &rpkey, VKRRenderPass *rp) { _assert_(rp); @@ -419,7 +420,7 @@ void VulkanQueueRunner::RunSteps(std::vector &steps, FrameData &frame PerformBlit(step, cmd); break; case VKRStepType::READBACK: - PerformReadback(step, cmd); + PerformReadback(step, cmd, frameData); break; case VKRStepType::READBACK_IMAGE: PerformReadbackImage(step, cmd); @@ -1944,52 +1945,35 @@ void VulkanQueueRunner::SetupTransferDstWriteAfterWrite(VKRImage &img, VkImageAs ); } -void VulkanQueueRunner::ResizeReadbackBuffer(VkDeviceSize requiredSize) { - if (readbackBuffer_ && requiredSize <= readbackBufferSize_) { +void VulkanQueueRunner::ResizeReadbackBuffer(CachedReadback *readback, VkDeviceSize requiredSize) { + if (readback->buffer && requiredSize <= readback->bufferSize) { return; } - if (readbackBuffer_) { - vulkan_->Delete().QueueDeleteBufferAllocation(readbackBuffer_, readbackAllocation_); + + if (readback->buffer) { + vulkan_->Delete().QueueDeleteBufferAllocation(readback->buffer, readback->allocation); } - readbackBufferSize_ = requiredSize; + readback->bufferSize = requiredSize; VkDevice device = vulkan_->GetDevice(); VkBufferCreateInfo buf{ VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; - buf.size = readbackBufferSize_; + buf.size = readback->bufferSize; buf.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT; VmaAllocationCreateInfo allocCreateInfo{}; allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_TO_CPU; VmaAllocationInfo allocInfo{}; - VkResult res = vmaCreateBuffer(vulkan_->Allocator(), &buf, &allocCreateInfo, &readbackBuffer_, &readbackAllocation_, &allocInfo); + VkResult res = vmaCreateBuffer(vulkan_->Allocator(), &buf, &allocCreateInfo, &readback->buffer, &readback->allocation, &allocInfo); _assert_(res == VK_SUCCESS); const VkMemoryType &memoryType = vulkan_->GetMemoryProperties().memoryTypes[allocInfo.memoryType]; - readbackBufferIsCoherent_ = (memoryType.propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0; -} - -void VulkanQueueRunner::DestroyReadbackBuffer() { - if (readbackBuffer_) { - vulkan_->Delete().QueueDeleteBufferAllocation(readbackBuffer_, readbackAllocation_); - } - readbackBufferSize_ = 0; + readback->isCoherent = (memoryType.propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0; } -void VulkanQueueRunner::PerformReadback(const VKRStep &step, VkCommandBuffer cmd) { - ResizeReadbackBuffer(sizeof(uint32_t) * step.readback.srcRect.extent.width * step.readback.srcRect.extent.height); - - VkBufferImageCopy region{}; - region.imageOffset = { step.readback.srcRect.offset.x, step.readback.srcRect.offset.y, 0 }; - region.imageExtent = { step.readback.srcRect.extent.width, step.readback.srcRect.extent.height, 1 }; - region.imageSubresource.aspectMask = step.readback.aspectMask; - region.imageSubresource.layerCount = 1; - region.bufferOffset = 0; - region.bufferRowLength = step.readback.srcRect.extent.width; - region.bufferImageHeight = step.readback.srcRect.extent.height; - +void VulkanQueueRunner::PerformReadback(const VKRStep &step, VkCommandBuffer cmd, FrameData &frameData) { VkImage image; VkImageLayout copyLayout; // Special case for backbuffer readbacks. @@ -2023,7 +2007,40 @@ void VulkanQueueRunner::PerformReadback(const VKRStep &step, VkCommandBuffer cmd copyLayout = srcImage->layout; } - vkCmdCopyImageToBuffer(cmd, image, copyLayout, readbackBuffer_, 1, ®ion); + // TODO: Handle different readback formats! + u32 readbackSizeInBytes = sizeof(uint32_t) * step.readback.srcRect.extent.width * step.readback.srcRect.extent.height; + + CachedReadback *cached = nullptr; + + if (step.readback.delayed) { + ReadbackKey key; + key.framebuf = step.readback.src; + key.width = step.readback.srcRect.extent.width; + key.height = step.readback.srcRect.extent.height; + + // See if there's already a buffer we can reuse + cached = frameData.readbacks_.Get(key); + if (!cached) { + cached = new CachedReadback(); + cached->bufferSize = 0; + frameData.readbacks_.Insert(key, cached); + } + } else { + cached = &syncReadback_; + } + + ResizeReadbackBuffer(cached, readbackSizeInBytes); + + VkBufferImageCopy region{}; + region.imageOffset = { step.readback.srcRect.offset.x, step.readback.srcRect.offset.y, 0 }; + region.imageExtent = { step.readback.srcRect.extent.width, step.readback.srcRect.extent.height, 1 }; + region.imageSubresource.aspectMask = step.readback.aspectMask; + region.imageSubresource.layerCount = 1; + region.bufferOffset = 0; + region.bufferRowLength = step.readback.srcRect.extent.width; + region.bufferImageHeight = step.readback.srcRect.extent.height; + + vkCmdCopyImageToBuffer(cmd, image, copyLayout, cached->buffer, 1, ®ion); // NOTE: Can't read the buffer using the CPU here - need to sync first. @@ -2050,7 +2067,7 @@ void VulkanQueueRunner::PerformReadbackImage(const VKRStep &step, VkCommandBuffe SetupTransitionToTransferSrc(srcImage, VK_IMAGE_ASPECT_COLOR_BIT, &recordBarrier_); recordBarrier_.Flush(cmd); - ResizeReadbackBuffer(sizeof(uint32_t) * step.readback_image.srcRect.extent.width * step.readback_image.srcRect.extent.height); + ResizeReadbackBuffer(&syncReadback_, sizeof(uint32_t) * step.readback_image.srcRect.extent.width * step.readback_image.srcRect.extent.height); VkBufferImageCopy region{}; region.imageOffset = { step.readback_image.srcRect.offset.x, step.readback_image.srcRect.offset.y, 0 }; @@ -2061,7 +2078,7 @@ void VulkanQueueRunner::PerformReadbackImage(const VKRStep &step, VkCommandBuffe region.bufferOffset = 0; region.bufferRowLength = step.readback_image.srcRect.extent.width; region.bufferImageHeight = step.readback_image.srcRect.extent.height; - vkCmdCopyImageToBuffer(cmd, step.readback_image.image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, readbackBuffer_, 1, ®ion); + vkCmdCopyImageToBuffer(cmd, step.readback_image.image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, syncReadback_.buffer, 1, ®ion); // Now transfer it back to a texture. TransitionImageLayout2(cmd, step.readback_image.image, 0, 1, 1, // I don't think we have any multilayer cases for regular textures. Above in PerformReadback, though.. @@ -2074,22 +2091,39 @@ void VulkanQueueRunner::PerformReadbackImage(const VKRStep &step, VkCommandBuffe // Doing that will also act like a heavyweight barrier ensuring that device writes are visible on the host. } -void VulkanQueueRunner::CopyReadbackBuffer(VKRFramebuffer *src, int width, int height, Draw::DataFormat srcFormat, Draw::DataFormat destFormat, int pixelStride, uint8_t *pixels) { - if (!readbackBuffer_) - return; // Something has gone really wrong. +bool VulkanQueueRunner::CopyReadbackBuffer(FrameData &frameData, VKRFramebuffer *src, int width, int height, Draw::DataFormat srcFormat, Draw::DataFormat destFormat, int pixelStride, uint8_t *pixels) { + CachedReadback *readback = &syncReadback_; + + // Look up in readback cache. + if (src) { + ReadbackKey key; + key.framebuf = src; + key.width = width; + key.height = height; + CachedReadback *cached = frameData.readbacks_.Get(key); + if (cached) { + readback = cached; + } else { + // Didn't have a cached image ready yet + return false; + } + } + + if (!readback->buffer) + return false; // Didn't find anything in cache, or something has gone really wrong. // Read back to the requested address in ram from buffer. void *mappedData; const size_t srcPixelSize = DataFormatSizeInBytes(srcFormat); - VkResult res = vmaMapMemory(vulkan_->Allocator(), readbackAllocation_, &mappedData); + VkResult res = vmaMapMemory(vulkan_->Allocator(), readback->allocation, &mappedData); if (res != VK_SUCCESS) { ERROR_LOG(G3D, "CopyReadbackBuffer: vkMapMemory failed! result=%d", (int)res); - return; + return false; } - if (!readbackBufferIsCoherent_) { - vmaInvalidateAllocation(vulkan_->Allocator(), readbackAllocation_, 0, width * height * srcPixelSize); + if (!readback->isCoherent) { + vmaInvalidateAllocation(vulkan_->Allocator(), readback->allocation, 0, width * height * srcPixelSize); } // TODO: Perform these conversions in a compute shader on the GPU. @@ -2116,5 +2150,6 @@ void VulkanQueueRunner::CopyReadbackBuffer(VKRFramebuffer *src, int width, int h _assert_msg_(false, "CopyReadbackBuffer: Unknown src format %d", (int)srcFormat); } - vmaUnmapMemory(vulkan_->Allocator(), readbackAllocation_); + vmaUnmapMemory(vulkan_->Allocator(), readback->allocation); + return true; } diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.h b/Common/GPU/Vulkan/VulkanQueueRunner.h index 9567a8f243fe..6df7dfc68103 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.h +++ b/Common/GPU/Vulkan/VulkanQueueRunner.h @@ -199,6 +199,7 @@ struct VKRStep { int aspectMask; VKRFramebuffer *src; VkRect2D srcRect; + bool delayed; } readback; struct { VkImage image; @@ -253,7 +254,7 @@ class VulkanQueueRunner { } // src == 0 means to copy from the sync readback buffer. - void CopyReadbackBuffer(VKRFramebuffer *src, int width, int height, Draw::DataFormat srcFormat, Draw::DataFormat destFormat, int pixelStride, uint8_t *pixels); + bool CopyReadbackBuffer(FrameData &frameData, VKRFramebuffer *src, int width, int height, Draw::DataFormat srcFormat, Draw::DataFormat destFormat, int pixelStride, uint8_t *pixels); VKRRenderPass *GetRenderPass(const RPKey &key); @@ -289,7 +290,7 @@ class VulkanQueueRunner { void PerformRenderPass(const VKRStep &pass, VkCommandBuffer cmd); void PerformCopy(const VKRStep &pass, VkCommandBuffer cmd); void PerformBlit(const VKRStep &pass, VkCommandBuffer cmd); - void PerformReadback(const VKRStep &pass, VkCommandBuffer cmd); + void PerformReadback(const VKRStep &pass, VkCommandBuffer cmd, FrameData &frameData); void PerformReadbackImage(const VKRStep &pass, VkCommandBuffer cmd); void LogRenderPass(const VKRStep &pass, bool verbose); @@ -298,8 +299,7 @@ class VulkanQueueRunner { void LogReadback(const VKRStep &pass); void LogReadbackImage(const VKRStep &pass); - void ResizeReadbackBuffer(VkDeviceSize requiredSize); - void DestroyReadbackBuffer(); + void ResizeReadbackBuffer(CachedReadback *readback, VkDeviceSize requiredSize); void ApplyMGSHack(std::vector &steps); void ApplySonicHack(std::vector &steps); @@ -325,10 +325,7 @@ class VulkanQueueRunner { // Readback buffer. Currently we only support synchronous readback, so we only really need one. // We size it generously. - VmaAllocation readbackAllocation_ = VK_NULL_HANDLE; - VkBuffer readbackBuffer_ = VK_NULL_HANDLE; - VkDeviceSize readbackBufferSize_ = 0; - bool readbackBufferIsCoherent_ = false; + CachedReadback syncReadback_{}; // TODO: Enable based on compat.ini. uint32_t hacksEnabled_ = 0; diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp index fbe6b72c7deb..620e857ff1e2 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.cpp +++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp @@ -929,11 +929,14 @@ bool VulkanRenderManager::CopyFramebufferToMemory(VKRFramebuffer *src, VkImageAs step->readback.src = src; step->readback.srcRect.offset = { x, y }; step->readback.srcRect.extent = { (uint32_t)w, (uint32_t)h }; + step->readback.delayed = mode == Draw::ReadbackMode::OLD_DATA_OK; step->dependencies.insert(src); step->tag = tag; steps_.push_back(step); - FlushSync(); + if (mode == Draw::ReadbackMode::BLOCK) { + FlushSync(); + } Draw::DataFormat srcFormat = Draw::DataFormat::UNDEFINED; if (aspectBits & VK_IMAGE_ASPECT_COLOR_BIT) { @@ -972,7 +975,8 @@ bool VulkanRenderManager::CopyFramebufferToMemory(VKRFramebuffer *src, VkImageAs } // Need to call this after FlushSync so the pixels are guaranteed to be ready in CPU-accessible VRAM. - queueRunner_.CopyReadbackBuffer(src, w, h, srcFormat, destFormat, pixelStride, pixels); + queueRunner_.CopyReadbackBuffer(frameData_[vulkan_->GetCurFrame()], + mode == Draw::ReadbackMode::OLD_DATA_OK ? src : nullptr, w, h, srcFormat, destFormat, pixelStride, pixels); return true; } @@ -992,7 +996,7 @@ void VulkanRenderManager::CopyImageToMemorySync(VkImage image, int mipLevel, int FlushSync(); // Need to call this after FlushSync so the pixels are guaranteed to be ready in CPU-accessible VRAM. - queueRunner_.CopyReadbackBuffer(nullptr, w, h, destFormat, destFormat, pixelStride, pixels); + queueRunner_.CopyReadbackBuffer(frameData_[vulkan_->GetCurFrame()], nullptr, w, h, destFormat, destFormat, pixelStride, pixels); } static void RemoveDrawCommands(std::vector *cmds) { diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index e04cf5d98ab1..e36d838afae7 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -2631,7 +2631,9 @@ bool FramebufferManagerCommon::GetFramebuffer(u32 fb_address, int fb_stride, GEB bool flipY = (GetGPUBackend() == GPUBackend::OPENGL && !useBufferedRendering_) ? true : false; buffer.Allocate(w, h, GE_FORMAT_8888, flipY); bool retval = draw_->CopyFramebufferToMemory(bound, Draw::FB_COLOR_BIT, 0, 0, w, h, Draw::DataFormat::R8G8B8A8_UNORM, buffer.GetData(), w, Draw::ReadbackMode::BLOCK, "GetFramebuffer"); - gpuStats.numReadbacks++; + + // Don't need to increment gpu stats for readback count here, this is a debugger-only function. + // After a readback we'll have flushed and started over, need to dirty a bunch of things to be safe. gstate_c.Dirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS); // We may have blitted to a temp FBO. @@ -2792,7 +2794,11 @@ void FramebufferManagerCommon::ReadbackFramebuffer(VirtualFramebuffer *vfb, int size_t len = snprintf(tag, sizeof(tag), "FramebufferPack/%08x_%08x_%dx%d_%s", vfb->fb_address, vfb->z_address, w, h, GeBufferFormatToString(vfb->fb_format)); NotifyMemInfo(MemBlockFlags::WRITE, fb_address + dstByteOffset, dstSize, tag, len); - gpuStats.numReadbacks++; + if (mode == Draw::ReadbackMode::BLOCK) { + gpuStats.numBlockingReadbacks++; + } else { + gpuStats.numReadbacks++; + } } bool FramebufferManagerCommon::ReadbackStencilbuffer(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint8_t *pixels, int pixelsStride, Draw::ReadbackMode mode) { diff --git a/GPU/GPU.h b/GPU/GPU.h index 4411733728d5..3c3481c43b71 100644 --- a/GPU/GPU.h +++ b/GPU/GPU.h @@ -88,6 +88,7 @@ struct GPUStatistics { numFlushes = 0; numTexturesDecoded = 0; numFramebufferEvaluations = 0; + numBlockingReadbacks = 0; numReadbacks = 0; numUploads = 0; numDepal = 0; @@ -118,6 +119,7 @@ struct GPUStatistics { int numShaderSwitches; int numTexturesDecoded; int numFramebufferEvaluations; + int numBlockingReadbacks; int numReadbacks; int numUploads; int numDepal; diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index fa26a53a1cc5..19a8fece4b39 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -3467,7 +3467,7 @@ size_t GPUCommon::FormatGPUStatsCommon(char *buffer, size_t size) { "Vertices: %d cached: %d uncached: %d\n" "FBOs active: %d (evaluations: %d)\n" "Textures: %d, dec: %d, invalidated: %d, hashed: %d kB\n" - "readbacks %d, uploads %d, depal %d\n" + "readbacks %d (%d non-block), uploads %d, depal %d\n" "Copies: depth %d, color %d, reint %d, blend %d, selftex %d\n" "GPU cycles executed: %d (%f per vertex)\n", gpuStats.msProcessingDisplayLists * 1000.0f, @@ -3485,6 +3485,7 @@ size_t GPUCommon::FormatGPUStatsCommon(char *buffer, size_t size) { gpuStats.numTexturesDecoded, gpuStats.numTextureInvalidations, gpuStats.numTextureDataBytesHashed / 1024, + gpuStats.numBlockingReadbacks, gpuStats.numReadbacks, gpuStats.numUploads, gpuStats.numDepal, From 5f13bc061a998e1dc1da937a0b6528c0b075f28e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 7 Feb 2023 19:28:16 +0100 Subject: [PATCH 2/6] Color readback: Read the previous framebuffer instead of the one being switched to --- GPU/Common/FramebufferManagerCommon.cpp | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index e36d838afae7..401f79f0f1ac 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -1040,17 +1040,19 @@ bool FramebufferManagerCommon::ShouldDownloadFramebufferDepth(const VirtualFrame } void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb, bool isClearingDepth) { - // TODO: Isn't this wrong? Shouldn't we download the prevVfb if anything? - if (ShouldDownloadFramebufferColor(vfb) && !vfb->memoryUpdated) { - ReadFramebufferToMemory(vfb, 0, 0, vfb->width, vfb->height, RASTER_COLOR, Draw::ReadbackMode::BLOCK); - vfb->usageFlags = (vfb->usageFlags | FB_USAGE_DOWNLOAD | FB_USAGE_FIRST_FRAME_SAVED) & ~FB_USAGE_DOWNLOAD_CLEAR; - } else { - DownloadFramebufferOnSwitch(prevVfb); - } + if (prevVfb) { + // TODO: Isn't this wrong? Shouldn't we download the prevVfb if anything? + if (ShouldDownloadFramebufferColor(prevVfb) && !prevVfb->memoryUpdated) { + ReadFramebufferToMemory(prevVfb, 0, 0, prevVfb->width, prevVfb->height, RASTER_COLOR, Draw::ReadbackMode::BLOCK); + prevVfb->usageFlags = (prevVfb->usageFlags | FB_USAGE_DOWNLOAD | FB_USAGE_FIRST_FRAME_SAVED) & ~FB_USAGE_DOWNLOAD_CLEAR; + } else { + DownloadFramebufferOnSwitch(prevVfb); + } - if (prevVfb && ShouldDownloadFramebufferDepth(prevVfb)) { - // Allow old data here to avoid blocking, if possible - no uses cases for this depend on data being super fresh. - ReadFramebufferToMemory(prevVfb, 0, 0, prevVfb->width, prevVfb->height, RasterChannel::RASTER_DEPTH, Draw::ReadbackMode::OLD_DATA_OK); + if (ShouldDownloadFramebufferDepth(prevVfb)) { + // Allow old data here to avoid blocking, if possible - no uses cases for this depend on data being super fresh. + ReadFramebufferToMemory(prevVfb, 0, 0, prevVfb->width, prevVfb->height, RasterChannel::RASTER_DEPTH, Draw::ReadbackMode::OLD_DATA_OK); + } } textureCache_->ForgetLastTexture(); From 94f51d5d26d58b88e9aff8841d4629869aac1364 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 7 Feb 2023 19:32:44 +0100 Subject: [PATCH 3/6] Make the Dangan Ronpa readback async --- GPU/Common/FramebufferManagerCommon.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 401f79f0f1ac..2f4604f9e765 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -1043,15 +1043,14 @@ void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffe if (prevVfb) { // TODO: Isn't this wrong? Shouldn't we download the prevVfb if anything? if (ShouldDownloadFramebufferColor(prevVfb) && !prevVfb->memoryUpdated) { - ReadFramebufferToMemory(prevVfb, 0, 0, prevVfb->width, prevVfb->height, RASTER_COLOR, Draw::ReadbackMode::BLOCK); + ReadFramebufferToMemory(prevVfb, 0, 0, prevVfb->width, prevVfb->height, RASTER_COLOR, Draw::ReadbackMode::OLD_DATA_OK); prevVfb->usageFlags = (prevVfb->usageFlags | FB_USAGE_DOWNLOAD | FB_USAGE_FIRST_FRAME_SAVED) & ~FB_USAGE_DOWNLOAD_CLEAR; } else { DownloadFramebufferOnSwitch(prevVfb); } if (ShouldDownloadFramebufferDepth(prevVfb)) { - // Allow old data here to avoid blocking, if possible - no uses cases for this depend on data being super fresh. - ReadFramebufferToMemory(prevVfb, 0, 0, prevVfb->width, prevVfb->height, RasterChannel::RASTER_DEPTH, Draw::ReadbackMode::OLD_DATA_OK); + ReadFramebufferToMemory(prevVfb, 0, 0, prevVfb->width, prevVfb->height, RasterChannel::RASTER_DEPTH, Draw::ReadbackMode::BLOCK); } } From 735cd26db62a77625d4edf782c5a32a6a98c47f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 7 Feb 2023 23:13:12 +0100 Subject: [PATCH 4/6] Document Syphon Filter hack, but don't enable it --- GPU/Common/DepthBufferCommon.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/GPU/Common/DepthBufferCommon.cpp b/GPU/Common/DepthBufferCommon.cpp index 96ec6c95bdc6..2e69260410ff 100644 --- a/GPU/Common/DepthBufferCommon.cpp +++ b/GPU/Common/DepthBufferCommon.cpp @@ -218,14 +218,18 @@ bool FramebufferManagerCommon::ReadbackDepthbuffer(Draw::Framebuffer *fbo, int x DepthUB ub{}; + // Setting this to 0.95f eliminates flickering lights with delayed readback in Syphon Filter. + // That's pretty ugly though! But we'll need to do that if we're gonna enable delayed readback in those games. + const float fudgeFactor = 1.0f; + if (!gstate_c.Use(GPU_USE_ACCURATE_DEPTH)) { // Don't scale anything, since we're not using factors outside accurate mode. ub.u_depthFactor[0] = 0.0f; - ub.u_depthFactor[1] = 1.0f; + ub.u_depthFactor[1] = fudgeFactor; } else { const float factor = DepthSliceFactor(); ub.u_depthFactor[0] = -0.5f * (factor - 1.0f) * (1.0f / factor); - ub.u_depthFactor[1] = factor; + ub.u_depthFactor[1] = factor * fudgeFactor; } static constexpr float shifts[] = { 16777215.0f, 16777215.0f / 256.0f, 16777215.0f / 65536.0f, 16777215.0f / 16777216.0f }; memcpy(ub.u_depthShift, shifts, sizeof(shifts)); From 5ad111cfc4043ebc51de298074c3d1fc1941d60f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 8 Feb 2023 08:59:47 +0100 Subject: [PATCH 5/6] Return value fix --- Common/GPU/OpenGL/thin3d_gl.cpp | 3 +-- Common/GPU/Vulkan/VulkanRenderManager.cpp | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/Common/GPU/OpenGL/thin3d_gl.cpp b/Common/GPU/OpenGL/thin3d_gl.cpp index 95f43d9217dd..ca539f1f17e8 100644 --- a/Common/GPU/OpenGL/thin3d_gl.cpp +++ b/Common/GPU/OpenGL/thin3d_gl.cpp @@ -1001,8 +1001,7 @@ bool OpenGLContext::CopyFramebufferToMemory(Framebuffer *src, int channelBits, i aspect |= GL_DEPTH_BUFFER_BIT; if (channelBits & FB_STENCIL_BIT) aspect |= GL_STENCIL_BUFFER_BIT; - renderManager_.CopyFramebufferToMemory(fb ? fb->framebuffer_ : nullptr, aspect, x, y, w, h, dataFormat, (uint8_t *)pixels, pixelStride, mode, tag); - return true; + return renderManager_.CopyFramebufferToMemory(fb ? fb->framebuffer_ : nullptr, aspect, x, y, w, h, dataFormat, (uint8_t *)pixels, pixelStride, mode, tag); } diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp index 620e857ff1e2..55298b3be452 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.cpp +++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp @@ -975,9 +975,8 @@ bool VulkanRenderManager::CopyFramebufferToMemory(VKRFramebuffer *src, VkImageAs } // Need to call this after FlushSync so the pixels are guaranteed to be ready in CPU-accessible VRAM. - queueRunner_.CopyReadbackBuffer(frameData_[vulkan_->GetCurFrame()], + return queueRunner_.CopyReadbackBuffer(frameData_[vulkan_->GetCurFrame()], mode == Draw::ReadbackMode::OLD_DATA_OK ? src : nullptr, w, h, srcFormat, destFormat, pixelStride, pixels); - return true; } void VulkanRenderManager::CopyImageToMemorySync(VkImage image, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag) { From ab63689cca04aa334f392dc3cd4ec8c86f0bb347 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 8 Feb 2023 15:37:06 +0100 Subject: [PATCH 6/6] Remove bad comment --- GPU/Common/FramebufferManagerCommon.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 2f4604f9e765..68bb9019684b 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -1041,7 +1041,6 @@ bool FramebufferManagerCommon::ShouldDownloadFramebufferDepth(const VirtualFrame void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb, bool isClearingDepth) { if (prevVfb) { - // TODO: Isn't this wrong? Shouldn't we download the prevVfb if anything? if (ShouldDownloadFramebufferColor(prevVfb) && !prevVfb->memoryUpdated) { ReadFramebufferToMemory(prevVfb, 0, 0, prevVfb->width, prevVfb->height, RASTER_COLOR, Draw::ReadbackMode::OLD_DATA_OK); prevVfb->usageFlags = (prevVfb->usageFlags | FB_USAGE_DOWNLOAD | FB_USAGE_FIRST_FRAME_SAVED) & ~FB_USAGE_DOWNLOAD_CLEAR;