From 4427cb4fc3c8ef10b6b3fdfb7104896fc9261b91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sat, 4 Feb 2023 23:40:36 +0100 Subject: [PATCH 1/3] Add parameter to thin3d::CopyFramebufferToMemory to specify blocking mode. Not yet implemented. --- Common/GPU/D3D11/thin3d_d3d11.cpp | 4 ++-- Common/GPU/D3D9/thin3d_d3d9.cpp | 4 ++-- Common/GPU/OpenGL/thin3d_gl.cpp | 4 ++-- Common/GPU/Vulkan/thin3d_vulkan.cpp | 4 ++-- Common/GPU/thin3d.h | 9 ++++++++- GPU/Common/DepthBufferCommon.cpp | 7 +++++-- GPU/Common/FramebufferManagerCommon.cpp | 12 ++++++------ GPU/Common/TextureCacheCommon.cpp | 5 +++-- GPU/GLES/FramebufferManagerGLES.cpp | 2 +- GPU/GLES/StencilBufferGLES.cpp | 4 ++-- 10 files changed, 33 insertions(+), 22 deletions(-) diff --git a/Common/GPU/D3D11/thin3d_d3d11.cpp b/Common/GPU/D3D11/thin3d_d3d11.cpp index 29bea57c327c..c0fcb9afbb3c 100644 --- a/Common/GPU/D3D11/thin3d_d3d11.cpp +++ b/Common/GPU/D3D11/thin3d_d3d11.cpp @@ -91,7 +91,7 @@ class D3D11DrawContext : public DrawContext { void CopyFramebufferImage(Framebuffer *src, int level, int x, int y, int z, Framebuffer *dst, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBits, const char *tag) override; bool BlitFramebuffer(Framebuffer *src, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dst, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter, const char *tag) override; - bool CopyFramebufferToMemorySync(Framebuffer *src, int channelBits, int x, int y, int w, int h, Draw::DataFormat format, void *pixels, int pixelStride, const char *tag) override; + bool CopyFramebufferToMemory(Framebuffer *src, int channelBits, int x, int y, int w, int h, Draw::DataFormat format, void *pixels, int pixelStride, ReadbackMode mode, const char *tag) override; // These functions should be self explanatory. void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp, const char *tag) override; @@ -1525,7 +1525,7 @@ bool D3D11DrawContext::BlitFramebuffer(Framebuffer *srcfb, int srcX1, int srcY1, return false; } -bool D3D11DrawContext::CopyFramebufferToMemorySync(Framebuffer *src, int channelBits, int bx, int by, int bw, int bh, Draw::DataFormat destFormat, void *pixels, int pixelStride, const char *tag) { +bool D3D11DrawContext::CopyFramebufferToMemory(Framebuffer *src, int channelBits, int bx, int by, int bw, int bh, Draw::DataFormat destFormat, void *pixels, int pixelStride, ReadbackMode mode, const char *tag) { D3D11Framebuffer *fb = (D3D11Framebuffer *)src; if (fb) { diff --git a/Common/GPU/D3D9/thin3d_d3d9.cpp b/Common/GPU/D3D9/thin3d_d3d9.cpp index b7e920d6041a..b0116c875a46 100644 --- a/Common/GPU/D3D9/thin3d_d3d9.cpp +++ b/Common/GPU/D3D9/thin3d_d3d9.cpp @@ -530,7 +530,7 @@ class D3D9Context : public DrawContext { // Not implemented } bool BlitFramebuffer(Framebuffer *src, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dst, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter, const char *tag) override; - bool CopyFramebufferToMemorySync(Framebuffer *src, int channelBits, int x, int y, int w, int h, Draw::DataFormat format, void *pixels, int pixelStride, const char *tag) override; + bool CopyFramebufferToMemory(Framebuffer *src, int channelBits, int x, int y, int w, int h, Draw::DataFormat format, void *pixels, int pixelStride, ReadbackMode mode, const char *tag) override; // These functions should be self explanatory. void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp, const char *tag) override; @@ -1426,7 +1426,7 @@ bool D3D9Context::BlitFramebuffer(Framebuffer *srcfb, int srcX1, int srcY1, int return SUCCEEDED(device_->StretchRect(srcSurf, &srcRect, dstSurf, &dstRect, (filter == FB_BLIT_LINEAR && channelBits == FB_COLOR_BIT) ? D3DTEXF_LINEAR : D3DTEXF_POINT)); } -bool D3D9Context::CopyFramebufferToMemorySync(Framebuffer *src, int channelBits, int bx, int by, int bw, int bh, Draw::DataFormat destFormat, void *pixels, int pixelStride, const char *tag) { +bool D3D9Context::CopyFramebufferToMemory(Framebuffer *src, int channelBits, int bx, int by, int bw, int bh, Draw::DataFormat destFormat, void *pixels, int pixelStride, ReadbackMode mode, const char *tag) { D3D9Framebuffer *fb = (D3D9Framebuffer *)src; if (fb) { diff --git a/Common/GPU/OpenGL/thin3d_gl.cpp b/Common/GPU/OpenGL/thin3d_gl.cpp index 480f0cc3bb13..77297a44520d 100644 --- a/Common/GPU/OpenGL/thin3d_gl.cpp +++ b/Common/GPU/OpenGL/thin3d_gl.cpp @@ -365,7 +365,7 @@ class OpenGLContext : public DrawContext { void CopyFramebufferImage(Framebuffer *src, int level, int x, int y, int z, Framebuffer *dst, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBits, const char *tag) override; bool BlitFramebuffer(Framebuffer *src, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dst, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter, const char *tag) override; - bool CopyFramebufferToMemorySync(Framebuffer *src, int channelBits, int x, int y, int w, int h, Draw::DataFormat format, void *pixels, int pixelStride, const char *tag) override; + bool CopyFramebufferToMemory(Framebuffer *src, int channelBits, int x, int y, int w, int h, Draw::DataFormat format, void *pixels, int pixelStride, ReadbackMode mode, const char *tag) override; // These functions should be self explanatory. void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp, const char *tag) override; @@ -988,7 +988,7 @@ static void LogReadPixelsError(GLenum error) { } #endif -bool OpenGLContext::CopyFramebufferToMemorySync(Framebuffer *src, int channelBits, int x, int y, int w, int h, Draw::DataFormat dataFormat, void *pixels, int pixelStride, const char *tag) { +bool OpenGLContext::CopyFramebufferToMemory(Framebuffer *src, int channelBits, int x, int y, int w, int h, Draw::DataFormat dataFormat, void *pixels, int pixelStride, ReadbackMode mode, const char *tag) { if (gl_extensions.IsGLES && (channelBits & FB_COLOR_BIT) == 0) { // Can't readback depth or stencil on GLES. return false; diff --git a/Common/GPU/Vulkan/thin3d_vulkan.cpp b/Common/GPU/Vulkan/thin3d_vulkan.cpp index 7046121f20b0..f65bb1ed2f15 100644 --- a/Common/GPU/Vulkan/thin3d_vulkan.cpp +++ b/Common/GPU/Vulkan/thin3d_vulkan.cpp @@ -414,7 +414,7 @@ class VKContext : public DrawContext { void CopyFramebufferImage(Framebuffer *src, int level, int x, int y, int z, Framebuffer *dst, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBits, const char *tag) override; bool BlitFramebuffer(Framebuffer *src, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dst, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter, const char *tag) override; - bool CopyFramebufferToMemorySync(Framebuffer *src, int channelBits, int x, int y, int w, int h, Draw::DataFormat format, void *pixels, int pixelStride, const char *tag) override; + bool CopyFramebufferToMemory(Framebuffer *src, int channelBits, int x, int y, int w, int h, Draw::DataFormat format, void *pixels, int pixelStride, ReadbackMode mode, const char *tag) override; DataFormat PreferredFramebufferReadbackFormat(Framebuffer *src) override; // These functions should be self explanatory. @@ -1632,7 +1632,7 @@ bool VKContext::BlitFramebuffer(Framebuffer *srcfb, int srcX1, int srcY1, int sr return true; } -bool VKContext::CopyFramebufferToMemorySync(Framebuffer *srcfb, int channelBits, int x, int y, int w, int h, Draw::DataFormat format, void *pixels, int pixelStride, const char *tag) { +bool VKContext::CopyFramebufferToMemory(Framebuffer *srcfb, int channelBits, int x, int y, int w, int h, Draw::DataFormat format, void *pixels, int pixelStride, ReadbackMode mode, const char *tag) { VKFramebuffer *src = (VKFramebuffer *)srcfb; int aspectMask = 0; diff --git a/Common/GPU/thin3d.h b/Common/GPU/thin3d.h index bf10cb38015a..3db2d7b8db3d 100644 --- a/Common/GPU/thin3d.h +++ b/Common/GPU/thin3d.h @@ -292,6 +292,11 @@ enum class Event { PRESENTED, }; +enum class ReadbackMode { + BLOCK, + OLD_DATA_OK, // Lets the backend return old results that won't need any waiting to get. +}; + constexpr uint32_t MAX_TEXTURE_SLOTS = 3; struct FramebufferDesc { @@ -693,7 +698,9 @@ class DrawContext { virtual void CopyFramebufferImage(Framebuffer *src, int level, int x, int y, int z, Framebuffer *dst, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBits, const char *tag) = 0; virtual bool BlitFramebuffer(Framebuffer *src, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dst, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter, const char *tag) = 0; - virtual bool CopyFramebufferToMemorySync(Framebuffer *src, int channelBits, int x, int y, int w, int h, Draw::DataFormat format, void *pixels, int pixelStride, const char *tag) { + + // If the backend doesn't support old data, it's "OK" to block. + virtual bool CopyFramebufferToMemory(Framebuffer *src, int channelBits, int x, int y, int w, int h, Draw::DataFormat format, void *pixels, int pixelStride, ReadbackMode mode, const char *tag) { return false; } virtual DataFormat PreferredFramebufferReadbackFormat(Framebuffer *src) { diff --git a/GPU/Common/DepthBufferCommon.cpp b/GPU/Common/DepthBufferCommon.cpp index 5d63385e266e..c628b61ee6c5 100644 --- a/GPU/Common/DepthBufferCommon.cpp +++ b/GPU/Common/DepthBufferCommon.cpp @@ -242,16 +242,19 @@ bool FramebufferManagerCommon::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, i }; draw_->DrawUP(positions, 3); - draw_->CopyFramebufferToMemorySync(blitFBO, FB_COLOR_BIT, x * scaleX, y * scaleY, w * scaleX, h * scaleY, DataFormat::R8G8B8A8_UNORM, convBuf_, destW, "ReadbackDepthbufferSync"); + draw_->CopyFramebufferToMemory(blitFBO, FB_COLOR_BIT, + x * scaleX, y * scaleY, w * scaleX, h * scaleY, + DataFormat::R8G8B8A8_UNORM, convBuf_, destW, ReadbackMode::BLOCK, "ReadbackDepthbufferSync"); textureCache_->ForgetLastTexture(); // TODO: Use 4444 (or better, R16_UNORM) so we can copy lines directly (instead of 32 -> 16 on CPU)? format16Bit = true; } else { - draw_->CopyFramebufferToMemorySync(fbo, FB_DEPTH_BIT, x, y, w, h, DataFormat::D32F, convBuf_, w, "ReadbackDepthbufferSync"); + draw_->CopyFramebufferToMemory(fbo, FB_DEPTH_BIT, x, y, w, h, DataFormat::D32F, convBuf_, w, ReadbackMode::BLOCK, "ReadbackDepthbufferSync"); format16Bit = false; } + // TODO: Move this conversion into the backends. if (format16Bit) { // In this case, we used the shader to apply depth scale factors. // This can be SSE'd or NEON'd very efficiently, though ideally we would avoid this conversion by using R16_UNORM for readback. diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index b47a856c51c5..91d3f3c69bce 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -2629,7 +2629,7 @@ bool FramebufferManagerCommon::GetFramebuffer(u32 fb_address, int fb_stride, GEB // TODO: Maybe should handle flipY inside CopyFramebufferToMemorySync somehow? bool flipY = (GetGPUBackend() == GPUBackend::OPENGL && !useBufferedRendering_) ? true : false; buffer.Allocate(w, h, GE_FORMAT_8888, flipY); - bool retval = draw_->CopyFramebufferToMemorySync(bound, Draw::FB_COLOR_BIT, 0, 0, w, h, Draw::DataFormat::R8G8B8A8_UNORM, buffer.GetData(), w, "GetFramebuffer"); + bool retval = draw_->CopyFramebufferToMemory(bound, Draw::FB_COLOR_BIT, 0, 0, w, h, Draw::DataFormat::R8G8B8A8_UNORM, buffer.GetData(), w, Draw::ReadbackMode::BLOCK, "GetFramebuffer"); gpuStats.numReadbacks++; // After a readback we'll have flushed and started over, need to dirty a bunch of things to be safe. gstate_c.Dirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS); @@ -2675,7 +2675,7 @@ bool FramebufferManagerCommon::GetDepthbuffer(u32 fb_address, int fb_stride, u32 buffer.Allocate(w, h, GPU_DBG_FORMAT_FLOAT, flipY); } // No need to free on failure, that's the caller's job (it likely will reuse a buffer.) - retval = draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_DEPTH_BIT, 0, 0, w, h, Draw::DataFormat::D32F, buffer.GetData(), w, "GetDepthBuffer"); + retval = draw_->CopyFramebufferToMemory(vfb->fbo, Draw::FB_DEPTH_BIT, 0, 0, w, h, Draw::DataFormat::D32F, buffer.GetData(), w, Draw::ReadbackMode::BLOCK, "GetDepthBuffer"); if (!retval) { // Try ReadbackDepthbufferSync, in case GLES. buffer.Allocate(w, h, GPU_DBG_FORMAT_16BIT, flipY); @@ -2716,7 +2716,7 @@ bool FramebufferManagerCommon::GetStencilbuffer(u32 fb_address, int fb_stride, G bool flipY = (GetGPUBackend() == GPUBackend::OPENGL && !useBufferedRendering_) ? true : false; // No need to free on failure, the caller/destructor will do that. Usually this is a reused buffer, anyway. buffer.Allocate(w, h, GPU_DBG_FORMAT_8BIT, flipY); - bool retval = draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_STENCIL_BIT, 0, 0, w,h, Draw::DataFormat::S8, buffer.GetData(), w, "GetStencilbuffer"); + bool retval = draw_->CopyFramebufferToMemory(vfb->fbo, Draw::FB_STENCIL_BIT, 0, 0, w,h, Draw::DataFormat::S8, buffer.GetData(), w, Draw::ReadbackMode::BLOCK, "GetStencilbuffer"); if (!retval) { // Try ReadbackStencilbufferSync, in case GLES. retval = ReadbackStencilbufferSync(vfb->fbo, 0, 0, w, h, buffer.GetData(), w); @@ -2734,7 +2734,7 @@ bool FramebufferManagerCommon::GetOutputFramebuffer(GPUDebugBuffer &buffer) { if (fmt != Draw::DataFormat::B8G8R8A8_UNORM) fmt = Draw::DataFormat::R8G8B8A8_UNORM; buffer.Allocate(w, h, fmt == Draw::DataFormat::R8G8B8A8_UNORM ? GPU_DBG_FORMAT_8888 : GPU_DBG_FORMAT_8888_BGRA, false); - bool retval = draw_->CopyFramebufferToMemorySync(nullptr, Draw::FB_COLOR_BIT, 0, 0, w, h, fmt, buffer.GetData(), w, "GetOutputFramebuffer"); + bool retval = draw_->CopyFramebufferToMemory(nullptr, Draw::FB_COLOR_BIT, 0, 0, w, h, fmt, buffer.GetData(), w, Draw::ReadbackMode::BLOCK, "GetOutputFramebuffer"); // That may have unbound the framebuffer, rebind to avoid crashes when debugging. RebindFramebuffer("RebindFramebuffer - GetOutputFramebuffer"); return retval; @@ -2792,7 +2792,7 @@ void FramebufferManagerCommon::ReadbackFramebufferSync(VirtualFramebuffer *vfb, x * vfb->renderScaleFactor, y * vfb->renderScaleFactor, w * vfb->renderScaleFactor, h * vfb->renderScaleFactor, (uint16_t *)destPtr, stride, w, h); } else { - draw_->CopyFramebufferToMemorySync(vfb->fbo, channel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT, x, y, w, h, destFormat, destPtr, stride, "ReadbackFramebufferSync"); + draw_->CopyFramebufferToMemory(vfb->fbo, channel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT, x, y, w, h, destFormat, destPtr, stride, Draw::ReadbackMode::BLOCK, "ReadbackFramebufferSync"); } char tag[128]; @@ -2803,7 +2803,7 @@ void FramebufferManagerCommon::ReadbackFramebufferSync(VirtualFramebuffer *vfb, } bool FramebufferManagerCommon::ReadbackStencilbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint8_t *pixels, int pixelsStride) { - return draw_->CopyFramebufferToMemorySync(fbo, Draw::FB_DEPTH_BIT, x, y, w, h, Draw::DataFormat::S8, pixels, pixelsStride, "ReadbackStencilbufferSync"); + return draw_->CopyFramebufferToMemory(fbo, Draw::FB_DEPTH_BIT, x, y, w, h, Draw::DataFormat::S8, pixels, pixelsStride, Draw::ReadbackMode::BLOCK, "ReadbackStencilbufferSync"); } void FramebufferManagerCommon::ReadFramebufferToMemory(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel) { diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 0d276ed39a6a..40824ecd49e7 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -28,6 +28,7 @@ #include "Common/StringUtils.h" #include "Common/TimeUtil.h" #include "Common/Math/math_util.h" +#include "Common/GPU/thin3d.h" #include "Core/Config.h" #include "Core/Debugger/MemBlockInfo.h" #include "Core/System.h" @@ -1183,12 +1184,12 @@ bool TextureCacheCommon::GetCurrentFramebufferTextureDebug(GPUDebugBuffer &buffe buffer.Allocate(desiredW, desiredH, GPU_DBG_FORMAT_FLOAT, false); if (w < desiredW || h < desiredH) buffer.ZeroBytes(); - retval = draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_DEPTH_BIT, x, y, w, h, Draw::DataFormat::D32F, buffer.GetData(), desiredW, "GetCurrentTextureDebug"); + retval = draw_->CopyFramebufferToMemory(vfb->fbo, Draw::FB_DEPTH_BIT, x, y, w, h, Draw::DataFormat::D32F, buffer.GetData(), desiredW, Draw::ReadbackMode::BLOCK, "GetCurrentTextureDebug"); } else { buffer.Allocate(desiredW, desiredH, GPU_DBG_FORMAT_8888, false); if (w < desiredW || h < desiredH) buffer.ZeroBytes(); - retval = draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_COLOR_BIT, x, y, w, h, Draw::DataFormat::R8G8B8A8_UNORM, buffer.GetData(), desiredW, "GetCurrentTextureDebug"); + retval = draw_->CopyFramebufferToMemory(vfb->fbo, Draw::FB_COLOR_BIT, x, y, w, h, Draw::DataFormat::R8G8B8A8_UNORM, buffer.GetData(), desiredW, Draw::ReadbackMode::BLOCK, "GetCurrentTextureDebug"); } // Vulkan requires us to re-apply all dynamic state for each command buffer, and the above will cause us to start a new cmdbuf. diff --git a/GPU/GLES/FramebufferManagerGLES.cpp b/GPU/GLES/FramebufferManagerGLES.cpp index 4c0b7480f9ab..d88800873e0c 100644 --- a/GPU/GLES/FramebufferManagerGLES.cpp +++ b/GPU/GLES/FramebufferManagerGLES.cpp @@ -68,6 +68,6 @@ bool FramebufferManagerGLES::GetOutputFramebuffer(GPUDebugBuffer &buffer) { int w, h; draw_->GetFramebufferDimensions(nullptr, &w, &h); buffer.Allocate(w, h, GPU_DBG_FORMAT_888_RGB, true); - draw_->CopyFramebufferToMemorySync(nullptr, Draw::FB_COLOR_BIT, 0, 0, w, h, Draw::DataFormat::R8G8B8_UNORM, buffer.GetData(), w, "GetOutputFramebuffer"); + draw_->CopyFramebufferToMemory(nullptr, Draw::FB_COLOR_BIT, 0, 0, w, h, Draw::DataFormat::R8G8B8_UNORM, buffer.GetData(), w, Draw::ReadbackMode::BLOCK, "GetOutputFramebuffer"); return true; } diff --git a/GPU/GLES/StencilBufferGLES.cpp b/GPU/GLES/StencilBufferGLES.cpp index 32cd92497d5d..0097e34c59d1 100644 --- a/GPU/GLES/StencilBufferGLES.cpp +++ b/GPU/GLES/StencilBufferGLES.cpp @@ -108,7 +108,7 @@ bool FramebufferManagerGLES::ReadbackStencilbufferSync(Draw::Framebuffer *fbo, i const bool useColorPath = gl_extensions.IsGLES; if (!useColorPath) { - return draw_->CopyFramebufferToMemorySync(fbo, FB_STENCIL_BIT, x, y, w, h, DataFormat::S8, pixels, pixelsStride, "ReadbackStencilbufferSync"); + return draw_->CopyFramebufferToMemory(fbo, FB_STENCIL_BIT, x, y, w, h, DataFormat::S8, pixels, pixelsStride, ReadbackMode::BLOCK, "ReadbackStencilbufferSync"); } // Unsupported below GLES 3.1 or without ARB_stencil_texturing. @@ -150,7 +150,7 @@ bool FramebufferManagerGLES::ReadbackStencilbufferSync(Draw::Framebuffer *fbo, i }; draw_->DrawUP(positions, 3); - draw_->CopyFramebufferToMemorySync(blitFBO, FB_COLOR_BIT, x, y, w, h, DataFormat::R8G8B8A8_UNORM, convBuf_, w, "ReadbackStencilbufferSync"); + draw_->CopyFramebufferToMemory(blitFBO, FB_COLOR_BIT, x, y, w, h, DataFormat::R8G8B8A8_UNORM, convBuf_, w, ReadbackMode::BLOCK, "ReadbackStencilbufferSync"); textureCache_->ForgetLastTexture(); From 7b5def9ff6d50b2044ba387981490046c3c13557 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sat, 4 Feb 2023 23:52:38 +0100 Subject: [PATCH 2/3] Use VMA to manage readback memory --- Common/GPU/Vulkan/VulkanQueueRunner.cpp | 65 ++++++------------------- Common/GPU/Vulkan/VulkanQueueRunner.h | 2 +- 2 files changed, 15 insertions(+), 52 deletions(-) diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.cpp b/Common/GPU/Vulkan/VulkanQueueRunner.cpp index 137703bb0dea..32d4713633ae 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.cpp +++ b/Common/GPU/Vulkan/VulkanQueueRunner.cpp @@ -71,11 +71,8 @@ void VulkanQueueRunner::ResizeReadbackBuffer(VkDeviceSize requiredSize) { if (readbackBuffer_ && requiredSize <= readbackBufferSize_) { return; } - if (readbackMemory_) { - vulkan_->Delete().QueueDeleteDeviceMemory(readbackMemory_); - } if (readbackBuffer_) { - vulkan_->Delete().QueueDeleteBuffer(readbackBuffer_); + vulkan_->Delete().QueueDeleteBufferAllocation(readbackBuffer_, readbackAllocation_); } readbackBufferSize_ = requiredSize; @@ -86,51 +83,21 @@ void VulkanQueueRunner::ResizeReadbackBuffer(VkDeviceSize requiredSize) { buf.size = readbackBufferSize_; buf.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT; - VkResult res = vkCreateBuffer(device, &buf, nullptr, &readbackBuffer_); - _assert_(res == VK_SUCCESS); - - VkMemoryRequirements reqs{}; - vkGetBufferMemoryRequirements(device, readbackBuffer_, &reqs); - - VkMemoryAllocateInfo allocInfo{ VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; - allocInfo.allocationSize = reqs.size; + VmaAllocationCreateInfo allocCreateInfo{}; + allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_TO_CPU; + VmaAllocationInfo allocInfo{}; - // For speedy readbacks, we want the CPU cache to be enabled. However on most hardware we then have to - // sacrifice coherency, which means manual flushing. But try to find such memory first! If no cached - // memory type is available we fall back to just coherent. - const VkFlags desiredTypes[] = { - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, - }; - VkFlags successTypeReqs = 0; - for (VkFlags typeReqs : desiredTypes) { - if (vulkan_->MemoryTypeFromProperties(reqs.memoryTypeBits, typeReqs, &allocInfo.memoryTypeIndex)) { - successTypeReqs = typeReqs; - break; - } - } - _assert_(successTypeReqs != 0); - readbackBufferIsCoherent_ = (successTypeReqs & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0; + VkResult res = vmaCreateBuffer(vulkan_->Allocator(), &buf, &allocCreateInfo, &readbackBuffer_, &readbackAllocation_, &allocInfo); + _assert_(res == VK_SUCCESS); - res = vkAllocateMemory(device, &allocInfo, nullptr, &readbackMemory_); - if (res != VK_SUCCESS) { - readbackMemory_ = VK_NULL_HANDLE; - vkDestroyBuffer(device, readbackBuffer_, nullptr); - readbackBuffer_ = VK_NULL_HANDLE; - return; - } - uint32_t offset = 0; - vkBindBufferMemory(device, readbackBuffer_, readbackMemory_, offset); + const VkMemoryType &memoryType = vulkan_->GetMemoryProperties().memoryTypes[allocInfo.memoryType]; + readbackBufferIsCoherent_ = (memoryType.propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0; } void VulkanQueueRunner::DestroyDeviceObjects() { INFO_LOG(G3D, "VulkanQueueRunner::DestroyDeviceObjects"); - if (readbackMemory_) { - vulkan_->Delete().QueueDeleteDeviceMemory(readbackMemory_); - } if (readbackBuffer_) { - vulkan_->Delete().QueueDeleteBuffer(readbackBuffer_); + vulkan_->Delete().QueueDeleteBufferAllocation(readbackBuffer_, readbackAllocation_); } readbackBufferSize_ = 0; @@ -2104,20 +2071,15 @@ void VulkanQueueRunner::PerformReadbackImage(const VKRStep &step, VkCommandBuffe } void VulkanQueueRunner::CopyReadbackBuffer(int width, int height, Draw::DataFormat srcFormat, Draw::DataFormat destFormat, int pixelStride, uint8_t *pixels) { - if (!readbackMemory_) + if (!readbackBuffer_) return; // Something has gone really wrong. // Read back to the requested address in ram from buffer. void *mappedData; const size_t srcPixelSize = DataFormatSizeInBytes(srcFormat); - - VkResult res = vkMapMemory(vulkan_->GetDevice(), readbackMemory_, 0, width * height * srcPixelSize, 0, &mappedData); + VkResult res = vmaMapMemory(vulkan_->Allocator(), readbackAllocation_, &mappedData); if (!readbackBufferIsCoherent_) { - VkMappedMemoryRange range{}; - range.memory = readbackMemory_; - range.offset = 0; - range.size = width * height * srcPixelSize; - vkInvalidateMappedMemoryRanges(vulkan_->GetDevice(), 1, &range); + vmaInvalidateAllocation(vulkan_->Allocator(), readbackAllocation_, 0, width * height * srcPixelSize); } if (res != VK_SUCCESS) { @@ -2148,5 +2110,6 @@ void VulkanQueueRunner::CopyReadbackBuffer(int width, int height, Draw::DataForm ERROR_LOG(G3D, "CopyReadbackBuffer: Unknown format"); _assert_msg_(false, "CopyReadbackBuffer: Unknown src format %d", (int)srcFormat); } - vkUnmapMemory(vulkan_->GetDevice(), readbackMemory_); + + vmaUnmapMemory(vulkan_->Allocator(), readbackAllocation_); } diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.h b/Common/GPU/Vulkan/VulkanQueueRunner.h index da46a07b0aba..235802c982ce 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.h +++ b/Common/GPU/Vulkan/VulkanQueueRunner.h @@ -323,7 +323,7 @@ class VulkanQueueRunner { // Readback buffer. Currently we only support synchronous readback, so we only really need one. // We size it generously. - VkDeviceMemory readbackMemory_ = VK_NULL_HANDLE; + VmaAllocation readbackAllocation_ = VK_NULL_HANDLE; VkBuffer readbackBuffer_ = VK_NULL_HANDLE; VkDeviceSize readbackBufferSize_ = 0; bool readbackBufferIsCoherent_ = false; From 31df6bf0fe7957313d4d5856cfe457d361f93e6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 5 Feb 2023 00:35:30 +0100 Subject: [PATCH 3/3] Move some code around, get rid of some unnecessary fences --- Common/GPU/Vulkan/VulkanFrameData.cpp | 17 +++---- Common/GPU/Vulkan/VulkanFrameData.h | 4 +- Common/GPU/Vulkan/VulkanQueueRunner.cpp | 66 +++++++++++++------------ Common/GPU/Vulkan/VulkanQueueRunner.h | 1 + 4 files changed, 47 insertions(+), 41 deletions(-) diff --git a/Common/GPU/Vulkan/VulkanFrameData.cpp b/Common/GPU/Vulkan/VulkanFrameData.cpp index fcb91c3cc1c8..192fe1f28ac8 100644 --- a/Common/GPU/Vulkan/VulkanFrameData.cpp +++ b/Common/GPU/Vulkan/VulkanFrameData.cpp @@ -36,11 +36,6 @@ void FrameData::Init(VulkanContext *vulkan, int index) { vulkan->SetDebugName(fence, VK_OBJECT_TYPE_FENCE, StringFromFormat("fence%d", index).c_str()); readyForFence = true; - // This fence is used for synchronizing readbacks. Does not need preinitialization. - // TODO: Put this in frameDataShared, only one is needed. - readbackFence = vulkan->CreateFence(false); - vulkan->SetDebugName(fence, VK_OBJECT_TYPE_FENCE, "readbackFence"); - VkQueryPoolCreateInfo query_ci{ VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO }; query_ci.queryCount = MAX_TIMESTAMP_QUERIES; query_ci.queryType = VK_QUERY_TYPE_TIMESTAMP; @@ -52,7 +47,6 @@ void FrameData::Destroy(VulkanContext *vulkan) { vkDestroyCommandPool(device, cmdPoolInit, nullptr); vkDestroyCommandPool(device, cmdPoolMain, nullptr); vkDestroyFence(device, fence, nullptr); - vkDestroyFence(device, readbackFence, nullptr); vkDestroyQueryPool(device, profile.queryPool, nullptr); } @@ -144,7 +138,7 @@ void FrameData::SubmitPending(VulkanContext *vulkan, FrameSubmitType type, Frame } if ((hasMainCommands || hasPresentCommands) && type == FrameSubmitType::Sync) { - fenceToTrigger = readbackFence; + fenceToTrigger = sharedData.readbackFence; } if (hasMainCommands) { @@ -206,8 +200,8 @@ void FrameData::SubmitPending(VulkanContext *vulkan, FrameSubmitType type, Frame if (type == FrameSubmitType::Sync) { // Hard stall of the GPU, not ideal, but necessary so the CPU has the contents of the readback. - vkWaitForFences(vulkan->GetDevice(), 1, &readbackFence, true, UINT64_MAX); - vkResetFences(vulkan->GetDevice(), 1, &readbackFence); + vkWaitForFences(vulkan->GetDevice(), 1, &sharedData.readbackFence, true, UINT64_MAX); + vkResetFences(vulkan->GetDevice(), 1, &sharedData.readbackFence); syncDone = true; } } @@ -219,10 +213,15 @@ void FrameDataShared::Init(VulkanContext *vulkan) { _dbg_assert_(res == VK_SUCCESS); res = vkCreateSemaphore(vulkan->GetDevice(), &semaphoreCreateInfo, nullptr, &renderingCompleteSemaphore); _dbg_assert_(res == VK_SUCCESS); + + // This fence is used for synchronizing readbacks. Does not need preinitialization. + readbackFence = vulkan->CreateFence(false); + vulkan->SetDebugName(readbackFence, VK_OBJECT_TYPE_FENCE, "readbackFence"); } void FrameDataShared::Destroy(VulkanContext *vulkan) { VkDevice device = vulkan->GetDevice(); vkDestroySemaphore(device, acquireSemaphore, nullptr); vkDestroySemaphore(device, renderingCompleteSemaphore, nullptr); + vkDestroyFence(device, readbackFence, nullptr); } diff --git a/Common/GPU/Vulkan/VulkanFrameData.h b/Common/GPU/Vulkan/VulkanFrameData.h index 88d4c185d222..148dc66338ff 100644 --- a/Common/GPU/Vulkan/VulkanFrameData.h +++ b/Common/GPU/Vulkan/VulkanFrameData.h @@ -30,6 +30,9 @@ struct FrameDataShared { VkSemaphore acquireSemaphore = VK_NULL_HANDLE; VkSemaphore renderingCompleteSemaphore = VK_NULL_HANDLE; + // For synchronous readbacks. + VkFence readbackFence = VK_NULL_HANDLE; + void Init(VulkanContext *vulkan); void Destroy(VulkanContext *vulkan); }; @@ -49,7 +52,6 @@ struct FrameData { bool readyForFence = true; VkFence fence = VK_NULL_HANDLE; - VkFence readbackFence = VK_NULL_HANDLE; // Strictly speaking we might only need one global of these. // These are on different threads so need separate pools. VkCommandPool cmdPoolInit = VK_NULL_HANDLE; // Written to from main thread diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.cpp b/Common/GPU/Vulkan/VulkanQueueRunner.cpp index 32d4713633ae..3f1d047f85a1 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.cpp +++ b/Common/GPU/Vulkan/VulkanQueueRunner.cpp @@ -67,39 +67,9 @@ void VulkanQueueRunner::CreateDeviceObjects() { #endif } -void VulkanQueueRunner::ResizeReadbackBuffer(VkDeviceSize requiredSize) { - if (readbackBuffer_ && requiredSize <= readbackBufferSize_) { - return; - } - if (readbackBuffer_) { - vulkan_->Delete().QueueDeleteBufferAllocation(readbackBuffer_, readbackAllocation_); - } - - readbackBufferSize_ = requiredSize; - - VkDevice device = vulkan_->GetDevice(); - - VkBufferCreateInfo buf{ VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; - buf.size = readbackBufferSize_; - buf.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT; - - VmaAllocationCreateInfo allocCreateInfo{}; - allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_TO_CPU; - VmaAllocationInfo allocInfo{}; - - VkResult res = vmaCreateBuffer(vulkan_->Allocator(), &buf, &allocCreateInfo, &readbackBuffer_, &readbackAllocation_, &allocInfo); - _assert_(res == VK_SUCCESS); - - const VkMemoryType &memoryType = vulkan_->GetMemoryProperties().memoryTypes[allocInfo.memoryType]; - readbackBufferIsCoherent_ = (memoryType.propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0; -} - void VulkanQueueRunner::DestroyDeviceObjects() { INFO_LOG(G3D, "VulkanQueueRunner::DestroyDeviceObjects"); - if (readbackBuffer_) { - vulkan_->Delete().QueueDeleteBufferAllocation(readbackBuffer_, readbackAllocation_); - } - readbackBufferSize_ = 0; + DestroyReadbackBuffer(); renderPasses_.IterateMut([&](const RPKey &rpkey, VKRRenderPass *rp) { _assert_(rp); @@ -1974,6 +1944,40 @@ void VulkanQueueRunner::SetupTransferDstWriteAfterWrite(VKRImage &img, VkImageAs ); } +void VulkanQueueRunner::ResizeReadbackBuffer(VkDeviceSize requiredSize) { + if (readbackBuffer_ && requiredSize <= readbackBufferSize_) { + return; + } + if (readbackBuffer_) { + vulkan_->Delete().QueueDeleteBufferAllocation(readbackBuffer_, readbackAllocation_); + } + + readbackBufferSize_ = requiredSize; + + VkDevice device = vulkan_->GetDevice(); + + VkBufferCreateInfo buf{ VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; + buf.size = readbackBufferSize_; + buf.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT; + + VmaAllocationCreateInfo allocCreateInfo{}; + allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_TO_CPU; + VmaAllocationInfo allocInfo{}; + + VkResult res = vmaCreateBuffer(vulkan_->Allocator(), &buf, &allocCreateInfo, &readbackBuffer_, &readbackAllocation_, &allocInfo); + _assert_(res == VK_SUCCESS); + + const VkMemoryType &memoryType = vulkan_->GetMemoryProperties().memoryTypes[allocInfo.memoryType]; + readbackBufferIsCoherent_ = (memoryType.propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0; +} + +void VulkanQueueRunner::DestroyReadbackBuffer() { + if (readbackBuffer_) { + vulkan_->Delete().QueueDeleteBufferAllocation(readbackBuffer_, readbackAllocation_); + } + readbackBufferSize_ = 0; +} + void VulkanQueueRunner::PerformReadback(const VKRStep &step, VkCommandBuffer cmd) { ResizeReadbackBuffer(sizeof(uint32_t) * step.readback.srcRect.extent.width * step.readback.srcRect.extent.height); diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.h b/Common/GPU/Vulkan/VulkanQueueRunner.h index 235802c982ce..bdfe9aad95dd 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.h +++ b/Common/GPU/Vulkan/VulkanQueueRunner.h @@ -298,6 +298,7 @@ class VulkanQueueRunner { void LogReadbackImage(const VKRStep &pass); void ResizeReadbackBuffer(VkDeviceSize requiredSize); + void DestroyReadbackBuffer(); void ApplyMGSHack(std::vector &steps); void ApplySonicHack(std::vector &steps);