From 5785cf40ad91e29bc7b5d247f11153afd02eae0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 16 Aug 2022 22:39:09 +0200 Subject: [PATCH 1/4] Clean up and comment framebuffer struct better, add bind sequence numbers --- GPU/Common/FramebufferManagerCommon.cpp | 30 +++++----- GPU/Common/FramebufferManagerCommon.h | 75 ++++++++++++++++++------- GPU/GLES/DepthBufferGLES.cpp | 2 +- GPU/ge_constants.h | 5 +- 4 files changed, 73 insertions(+), 39 deletions(-) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index afec95d06d0b..005d63a4fad3 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -275,7 +275,7 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame // As there are no clear "framebuffer width" and "framebuffer height" registers, // we need to infer the size of the current framebuffer somehow. int drawing_width, drawing_height; - EstimateDrawingSize(params.fb_address, params.fmt, params.viewportWidth, params.viewportHeight, params.regionWidth, params.regionHeight, params.scissorWidth, params.scissorHeight, std::max(params.fb_stride, 4), drawing_width, drawing_height); + EstimateDrawingSize(params.fb_address, params.fmt, params.viewportWidth, params.viewportHeight, params.regionWidth, params.regionHeight, params.scissorWidth, params.scissorHeight, std::max(params.fb_stride, (u16)4), drawing_width, drawing_height); gstate_c.SetCurRTOffset(0, 0); bool vfbFormatChanged = false; @@ -530,6 +530,9 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame NotifyRenderFramebufferUpdated(vfb, vfbFormatChanged); } + vfb->colorBindSeq = GetBindSeqCount(); + vfb->depthBindSeq = GetBindSeqCount(); + gstate_c.curRTWidth = vfb->width; gstate_c.curRTHeight = vfb->height; gstate_c.curRTRenderWidth = vfb->renderWidth; @@ -692,8 +695,7 @@ void FramebufferManagerCommon::NotifyRenderFramebufferUpdated(VirtualFramebuffer void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb, bool isClearingDepth) { if (ShouldDownloadFramebuffer(vfb) && !vfb->memoryUpdated) { ReadFramebufferToMemory(vfb, 0, 0, vfb->width, vfb->height); - vfb->usageFlags = (vfb->usageFlags | FB_USAGE_DOWNLOAD) & ~FB_USAGE_DOWNLOAD_CLEAR; - vfb->firstFrameSaved = true; + vfb->usageFlags = (vfb->usageFlags | FB_USAGE_DOWNLOAD | FB_USAGE_FIRST_FRAME_SAVED) & ~FB_USAGE_DOWNLOAD_CLEAR; } else { DownloadFramebufferOnSwitch(prevVfb); } @@ -1026,14 +1028,13 @@ void FramebufferManagerCommon::DrawFramebufferToOutput(const u8 *srcPixels, GEBu } void FramebufferManagerCommon::DownloadFramebufferOnSwitch(VirtualFramebuffer *vfb) { - if (vfb && vfb->safeWidth > 0 && vfb->safeHeight > 0 && !vfb->firstFrameSaved && !vfb->memoryUpdated) { + if (vfb && vfb->safeWidth > 0 && vfb->safeHeight > 0 && !(vfb->usageFlags & FB_USAGE_FIRST_FRAME_SAVED) && !vfb->memoryUpdated) { // Some games will draw to some memory once, and use it as a render-to-texture later. // To support this, we save the first frame to memory when we have a safe w/h. // Saving each frame would be slow. if (!g_Config.bDisableSlowFramebufEffects && !PSP_CoreParameter().compat.flags().DisableFirstFrameReadback) { ReadFramebufferToMemory(vfb, 0, 0, vfb->safeWidth, vfb->safeHeight); - vfb->usageFlags = (vfb->usageFlags | FB_USAGE_DOWNLOAD) & ~FB_USAGE_DOWNLOAD_CLEAR; - vfb->firstFrameSaved = true; + vfb->usageFlags = (vfb->usageFlags | FB_USAGE_DOWNLOAD | FB_USAGE_FIRST_FRAME_SAVED) & ~FB_USAGE_DOWNLOAD_CLEAR; vfb->safeWidth = 0; vfb->safeHeight = 0; } @@ -1198,8 +1199,7 @@ void FramebufferManagerCommon::DecimateFBOs() { if (ShouldDownloadFramebuffer(vfb) && age == 0 && !vfb->memoryUpdated) { ReadFramebufferToMemory(vfb, 0, 0, vfb->width, vfb->height); - vfb->usageFlags = (vfb->usageFlags | FB_USAGE_DOWNLOAD) & ~FB_USAGE_DOWNLOAD_CLEAR; - vfb->firstFrameSaved = true; + vfb->usageFlags = (vfb->usageFlags | FB_USAGE_DOWNLOAD | FB_USAGE_FIRST_FRAME_SAVED) & ~FB_USAGE_DOWNLOAD_CLEAR; } // Let's also "decimate" the usageFlags. @@ -1286,7 +1286,7 @@ void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, int w, } if (force1x && g_Config.iInternalResolution != 1) { - vfb->renderScaleFactor = 1.0f; + vfb->renderScaleFactor = 1; vfb->renderWidth = vfb->bufferWidth; vfb->renderHeight = vfb->bufferHeight; } else { @@ -1654,7 +1654,7 @@ VirtualFramebuffer *FramebufferManagerCommon::FindDownloadTempBuffer(VirtualFram nvfb->height = vfb->height; nvfb->renderWidth = vfb->bufferWidth; nvfb->renderHeight = vfb->bufferHeight; - nvfb->renderScaleFactor = 1.0f; // For readbacks we resize to the original size, of course. + nvfb->renderScaleFactor = 1; // For readbacks we resize to the original size, of course. nvfb->bufferWidth = vfb->bufferWidth; nvfb->bufferHeight = vfb->bufferHeight; nvfb->format = vfb->format; @@ -2023,7 +2023,7 @@ void FramebufferManagerCommon::ShowScreenResolution() { // * Video file recording(would probably be great if it was async.) // * Screenshots(benefit slightly from async.) // * Save state screenshots(could probably be async but need to manage the stall.) -bool FramebufferManagerCommon::GetFramebuffer(u32 fb_address, int fb_stride, GEBufferFormat format, GPUDebugBuffer &buffer, int maxRes) { +bool FramebufferManagerCommon::GetFramebuffer(u32 fb_address, int fb_stride, GEBufferFormat format, GPUDebugBuffer &buffer, int maxScaleFactor) { VirtualFramebuffer *vfb = currentRenderVfb_; if (!vfb) { vfb = GetVFBAt(fb_address); @@ -2042,9 +2042,9 @@ bool FramebufferManagerCommon::GetFramebuffer(u32 fb_address, int fb_stride, GEB Draw::Framebuffer *bound = nullptr; if (vfb->fbo) { - if (maxRes > 0 && vfb->renderWidth > vfb->width * maxRes) { - w = vfb->width * maxRes; - h = vfb->height * maxRes; + if (maxScaleFactor > 0 && vfb->renderWidth > vfb->width * maxScaleFactor) { + w = vfb->width * maxScaleFactor; + h = vfb->height * maxScaleFactor; Draw::Framebuffer *tempFBO = GetTempFBO(TempFBO::COPY, w, h); VirtualFramebuffer tempVfb = *vfb; @@ -2053,7 +2053,7 @@ bool FramebufferManagerCommon::GetFramebuffer(u32 fb_address, int fb_stride, GEB tempVfb.bufferHeight = vfb->height; tempVfb.renderWidth = w; tempVfb.renderHeight = h; - tempVfb.renderScaleFactor = (float)maxRes; + tempVfb.renderScaleFactor = maxScaleFactor; BlitFramebuffer(&tempVfb, 0, 0, vfb, 0, 0, vfb->width, vfb->height, 0, "Blit_GetFramebuffer"); bound = tempFBO; diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index 54884f35bcd5..a2d395ad4261 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -43,6 +43,7 @@ enum { FB_USAGE_DOWNLOAD = 16, FB_USAGE_DOWNLOAD_CLEAR = 32, FB_USAGE_BLUE_TO_ALPHA = 64, + FB_USAGE_FIRST_FRAME_SAVED = 128, }; enum { @@ -63,48 +64,76 @@ class ShaderWriter; // when such a situation is detected. In order to reliably detect this, we separately track depth buffers, // and they know which color buffer they were used with last. struct VirtualFramebuffer { + Draw::Framebuffer *fbo; + u32 fb_address; u32 z_address; // If 0, it's a "RAM" framebuffer. - int fb_stride; - int z_stride; - - GEBufferFormat format; // virtual, in reality they are all RGBA8888 for better quality but we can reinterpret that as necessary + u16 fb_stride; + u16 z_stride; // width/height: The detected size of the current framebuffer, in original PSP pixels. u16 width; u16 height; // bufferWidth/bufferHeight: The pre-scaling size of the buffer itself. May only be bigger than or equal to width/height. - // Actual physical buffer is this size times the render resolution multiplier. + // In original PSP pixels - actual framebuffer is this size times the render resolution multiplier. // The buffer may be used to render a width or height from 0 to these values without being recreated. u16 bufferWidth; u16 bufferHeight; // renderWidth/renderHeight: The scaled size we render at. May be scaled to render at higher resolutions. - // The physical buffer may be larger than renderWidth/renderHeight. + // These are simply bufferWidth/Height * renderScaleFactor and are thus redundant. u16 renderWidth; u16 renderHeight; - float renderScaleFactor; + // Attempt to keep track of a bounding rectangle of what's been actually drawn. However, right now these are not + // used in a very detailed way (they're only ever 0 or equal to width/height) + u16 drawnWidth; + u16 drawnHeight; + + // The dimensions at which we are confident that we can read back this buffer without stomping on irrelevant memory. + u16 safeWidth; + u16 safeHeight; + + // The scale factor at which we are rendering (to achieve higher resolution). + u8 renderScaleFactor; + + // The original PSP format of the framebuffer. + // In reality they are all RGBA8888 for better quality but this is what the PSP thinks it is. This is necessary + // when we need to interpret the bits directly (depal or buffer aliasing). + GEBufferFormat format; + + // The configured buffer format at the time of the latest/current draw. This will change first, then + // if different we'll "reinterpret" the framebuffer to match 'format' as needed. + GEBufferFormat drawnFormat; u16 usageFlags; + // These are used to track state to try to avoid buffer size shifting back and forth. + // Though that shouldn't really happen, should it, since we always grow, don't shrink? u16 newWidth; u16 newHeight; + // The frame number at which this was last resized. int lastFrameNewSize; - Draw::Framebuffer *fbo; - - u16 drawnWidth; - u16 drawnHeight; - GEBufferFormat drawnFormat; - u16 safeWidth; - u16 safeHeight; + // Tracking for downloads-to-CLUT. + u16 clutUpdatedBytes; + bool memoryUpdated; + // TODO: Fold into usageFlags? bool dirtyAfterDisplay; bool reallyDirtyAfterDisplay; // takes frame skipping into account + // Global sequence numbers for the last time these were bound. + // Not based on frames at all. Can be used to determine new-ness of one framebuffer over another, + // can even be within a frame. + int colorBindSeq; + int depthBindSeq; + + // These are mainly used for garbage collection purposes and similar. + // Cannot be used to determine new-ness against a similar other buffer, since they are + // only at frame granularity. int last_frame_used; int last_frame_attached; int last_frame_render; @@ -113,9 +142,6 @@ struct VirtualFramebuffer { int last_frame_failed; int last_frame_depth_updated; int last_frame_depth_render; - u32 clutUpdatedBytes; - bool memoryUpdated; - bool firstFrameSaved; }; struct TrackedDepthBuffer { @@ -132,9 +158,9 @@ struct TrackedDepthBuffer { struct FramebufferHeuristicParams { u32 fb_address; - int fb_stride; u32 z_address; - int z_stride; + u16 fb_stride; + u16 z_stride; GEBufferFormat fmt; bool isClearingDepth; bool isWritingDepth; @@ -412,6 +438,10 @@ class FramebufferManagerCommon { dstBuffer->reallyDirtyAfterDisplay = true; } + inline int GetBindSeqCount() { + return fbBindSeqCount_++; + } + PresentationCommon *presentation_ = nullptr; Draw::DrawContext *draw_ = nullptr; @@ -427,6 +457,8 @@ class FramebufferManagerCommon { GEBufferFormat displayFormat_ = GE_FORMAT_565; u32 prevDisplayFramebufPtr_ = 0; + int fbBindSeqCount_ = 0; + VirtualFramebuffer *displayFramebuf_ = nullptr; VirtualFramebuffer *prevDisplayFramebuf_ = nullptr; VirtualFramebuffer *prevPrevDisplayFramebuf_ = nullptr; @@ -451,7 +483,8 @@ class FramebufferManagerCommon { // Sampled in BeginFrame/UpdateSize for safety. float renderWidth_ = 0.0f; float renderHeight_ = 0.0f; - float renderScaleFactor_ = 1.0f; + + int renderScaleFactor_ = 1; int pixelWidth_ = 0; int pixelHeight_ = 0; int bloomHack_ = 0; @@ -481,7 +514,7 @@ class FramebufferManagerCommon { Draw::SamplerState *reinterpretSampler_ = nullptr; Draw::Buffer *reinterpretVBuf_ = nullptr; - // Common implementation of stencil buffer upload. Also not 100% optimal, but not perforamnce + // Common implementation of stencil buffer upload. Also not 100% optimal, but not performance // critical either. Draw::Pipeline *stencilUploadPipeline_ = nullptr; Draw::SamplerState *stencilUploadSampler_ = nullptr; diff --git a/GPU/GLES/DepthBufferGLES.cpp b/GPU/GLES/DepthBufferGLES.cpp index 6334fbd5ee79..e5bcc20bf935 100644 --- a/GPU/GLES/DepthBufferGLES.cpp +++ b/GPU/GLES/DepthBufferGLES.cpp @@ -84,7 +84,7 @@ void FramebufferManagerGLES::PackDepthbuffer(VirtualFramebuffer *vfb, int x, int // Pixel size always 4 here because we always request float const u32 bufSize = vfb->z_stride * (h - y) * 4; const u32 z_address = vfb->z_address; - const int packWidth = std::min(vfb->z_stride, std::min(x + w, (int)vfb->width)); + const int packWidth = std::min((int)vfb->z_stride, std::min(x + w, (int)vfb->width)); if (!convBuf_ || convBufSize_ < bufSize) { delete[] convBuf_; diff --git a/GPU/ge_constants.h b/GPU/ge_constants.h index 954934dcab08..78363c387968 100644 --- a/GPU/ge_constants.h +++ b/GPU/ge_constants.h @@ -17,6 +17,8 @@ #pragma once +#include + enum GECommand { GE_CMD_NOP = 0, GE_CMD_VADDR = 0x1, @@ -276,8 +278,7 @@ enum GECommand { GE_CMD_NOP_FF = 0xFF, }; -enum GEBufferFormat -{ +enum GEBufferFormat : uint8_t { GE_FORMAT_565 = 0, GE_FORMAT_5551 = 1, GE_FORMAT_4444 = 2, From 19367dd890a95dd3e85c8dd7c97657c922857a85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 16 Aug 2022 23:00:16 +0200 Subject: [PATCH 2/4] Comment updates --- GPU/Common/FramebufferManagerCommon.cpp | 2 ++ GPU/Common/FramebufferManagerCommon.h | 4 +++- GPU/Directx9/FramebufferManagerDX9.cpp | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 005d63a4fad3..73d55312e532 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -121,6 +121,7 @@ VirtualFramebuffer *FramebufferManagerCommon::GetVFBAt(u32 addr) const { VirtualFramebuffer *v = vfbs_[i]; if (v->fb_address == addr) { // Could check w too but whatever + // NOTE: This gets the OLDEST image at the address - is that good? if (match == nullptr || match->last_frame_render < v->last_frame_render) { match = v; } @@ -2083,6 +2084,7 @@ bool FramebufferManagerCommon::GetFramebuffer(u32 fb_address, int fb_stride, GEB bool FramebufferManagerCommon::GetDepthbuffer(u32 fb_address, int fb_stride, u32 z_address, int z_stride, GPUDebugBuffer &buffer) { VirtualFramebuffer *vfb = currentRenderVfb_; if (!vfb) { + // TODO: This is flawed, as it looks for color buffers at the address, not depth. vfb = GetVFBAt(fb_address); } diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index a2d395ad4261..d473ab68752a 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -334,8 +334,10 @@ class FramebufferManagerCommon { VirtualFramebuffer *GetCurrentRenderVFB() const { return currentRenderVfb_; } - // TODO: Break out into some form of FBO manager + + // This only checks for the color channel. VirtualFramebuffer *GetVFBAt(u32 addr) const; + VirtualFramebuffer *GetDisplayVFB() const { return GetVFBAt(displayFramebufPtr_); } diff --git a/GPU/Directx9/FramebufferManagerDX9.cpp b/GPU/Directx9/FramebufferManagerDX9.cpp index 610940584c5b..bba5e670e2e4 100644 --- a/GPU/Directx9/FramebufferManagerDX9.cpp +++ b/GPU/Directx9/FramebufferManagerDX9.cpp @@ -327,6 +327,7 @@ bool FramebufferManagerDX9::GetDepthbuffer(u32 fb_address, int fb_stride, u32 z_address, int z_stride, GPUDebugBuffer &buffer) { VirtualFramebuffer *vfb = currentRenderVfb_; if (!vfb) { + // TODO: This is flawed, as it looks for color buffers at the address, not depth. vfb = GetVFBAt(fb_address); } From a8b1d1191e0a54c5a7205cd6f89c9f6a440813b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 17 Aug 2022 00:07:05 +0200 Subject: [PATCH 3/4] Oops, remove wrong comments --- GPU/Common/FramebufferManagerCommon.cpp | 1 - GPU/Directx9/FramebufferManagerDX9.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 73d55312e532..9a34758bb6dc 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -2084,7 +2084,6 @@ bool FramebufferManagerCommon::GetFramebuffer(u32 fb_address, int fb_stride, GEB bool FramebufferManagerCommon::GetDepthbuffer(u32 fb_address, int fb_stride, u32 z_address, int z_stride, GPUDebugBuffer &buffer) { VirtualFramebuffer *vfb = currentRenderVfb_; if (!vfb) { - // TODO: This is flawed, as it looks for color buffers at the address, not depth. vfb = GetVFBAt(fb_address); } diff --git a/GPU/Directx9/FramebufferManagerDX9.cpp b/GPU/Directx9/FramebufferManagerDX9.cpp index bba5e670e2e4..610940584c5b 100644 --- a/GPU/Directx9/FramebufferManagerDX9.cpp +++ b/GPU/Directx9/FramebufferManagerDX9.cpp @@ -327,7 +327,6 @@ bool FramebufferManagerDX9::GetDepthbuffer(u32 fb_address, int fb_stride, u32 z_address, int z_stride, GPUDebugBuffer &buffer) { VirtualFramebuffer *vfb = currentRenderVfb_; if (!vfb) { - // TODO: This is flawed, as it looks for color buffers at the address, not depth. vfb = GetVFBAt(fb_address); } From 078fc881a772321bb3dec5090ba75606a5047d46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 17 Aug 2022 10:15:02 +0200 Subject: [PATCH 4/4] Revise comments according to feedback --- GPU/Common/FramebufferManagerCommon.cpp | 5 ++--- GPU/Common/FramebufferManagerCommon.h | 9 ++++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 9a34758bb6dc..78440f5441e8 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -120,9 +120,8 @@ VirtualFramebuffer *FramebufferManagerCommon::GetVFBAt(u32 addr) const { for (size_t i = 0; i < vfbs_.size(); ++i) { VirtualFramebuffer *v = vfbs_[i]; if (v->fb_address == addr) { - // Could check w too but whatever - // NOTE: This gets the OLDEST image at the address - is that good? - if (match == nullptr || match->last_frame_render < v->last_frame_render) { + // Could check w too but whatever (actually, might very well make sense to do so, depending on context). + if (!match || v->last_frame_render > match->last_frame_render) { match = v; } } diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index d473ab68752a..d69d99d231df 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -86,8 +86,8 @@ struct VirtualFramebuffer { u16 renderWidth; u16 renderHeight; - // Attempt to keep track of a bounding rectangle of what's been actually drawn. However, right now these are not - // used in a very detailed way (they're only ever 0 or equal to width/height) + // Attempt to keep track of a bounding rectangle of what's been actually drawn. Coarse, but might be smaller + // than width/height if framebuffer has been enlarged. In PSP pixels. u16 drawnWidth; u16 drawnHeight; @@ -110,7 +110,10 @@ struct VirtualFramebuffer { u16 usageFlags; // These are used to track state to try to avoid buffer size shifting back and forth. - // Though that shouldn't really happen, should it, since we always grow, don't shrink? + // You might think that doesn't happen since we mostly grow framebuffers, but we do resize down, + // if the size has shrunk for a while and the framebuffer is also larger than the stride. + // At this point, the "safe" size is probably a lie, and we have had various issues with readbacks, so this resizes down to avoid them. + // An example would be a game that always uses the address 0x00154000 for temp buffers, and uses it for a full-screen effect for 3 frames, then goes back to using it for character shadows or something much smaller. u16 newWidth; u16 newHeight;