diff --git a/GPU/Common/FramebufferCommon.cpp b/GPU/Common/FramebufferCommon.cpp index bc34a8b08d57..0f56e743daad 100644 --- a/GPU/Common/FramebufferCommon.cpp +++ b/GPU/Common/FramebufferCommon.cpp @@ -123,7 +123,8 @@ FramebufferManagerCommon::FramebufferManagerCommon() : usePostShader_(false), postShaderAtOutputResolution_(false), postShaderIsUpscalingFilter_(false), - hackForce04154000Download_(false) { + hackForce04154000Download_(false), + gameUsesSequentialCopies_(false) { UpdateSize(); } @@ -165,21 +166,18 @@ void FramebufferManagerCommon::SetDisplayFramebuffer(u32 framebuf, u32 stride, G } VirtualFramebuffer *FramebufferManagerCommon::GetVFBAt(u32 addr) { - VirtualFramebuffer *match = NULL; + VirtualFramebuffer *match = nullptr; for (size_t i = 0; i < vfbs_.size(); ++i) { VirtualFramebuffer *v = vfbs_[i]; if (MaskedEqual(v->fb_address, addr)) { // Could check w too but whatever - if (match == NULL || match->last_frame_render < v->last_frame_render) { + if (match == nullptr || match->last_frame_render < v->last_frame_render) { match = v; } } } - if (match != NULL) { - return match; - } - return 0; + return match; } bool FramebufferManagerCommon::MaskedEqual(u32 addr1, u32 addr2) { @@ -730,6 +728,91 @@ void FramebufferManagerCommon::FindTransferFramebuffers(VirtualFramebuffer *&dst } } +VirtualFramebuffer *FramebufferManagerCommon::FindDownloadTempBuffer(VirtualFramebuffer *vfb) { + // For now we'll keep these on the same struct as the ones that can get displayed + // (and blatantly copy work already done above while at it). + VirtualFramebuffer *nvfb = 0; + + // We maintain a separate vector of framebuffer objects for blitting. + for (size_t i = 0; i < bvfbs_.size(); ++i) { + VirtualFramebuffer *v = bvfbs_[i]; + if (v->fb_address == vfb->fb_address && v->format == vfb->format) { + if (v->bufferWidth == vfb->bufferWidth && v->bufferHeight == vfb->bufferHeight) { + nvfb = v; + v->fb_stride = vfb->fb_stride; + v->width = vfb->width; + v->height = vfb->height; + break; + } + } + } + + // Create a new fbo if none was found for the size + if (!nvfb) { + nvfb = new VirtualFramebuffer(); + nvfb->fbo = nullptr; + nvfb->fb_address = vfb->fb_address; + nvfb->fb_stride = vfb->fb_stride; + nvfb->z_address = vfb->z_address; + nvfb->z_stride = vfb->z_stride; + nvfb->width = vfb->width; + nvfb->height = vfb->height; + nvfb->renderWidth = vfb->bufferWidth; + nvfb->renderHeight = vfb->bufferHeight; + nvfb->bufferWidth = vfb->bufferWidth; + nvfb->bufferHeight = vfb->bufferHeight; + nvfb->format = vfb->format; + nvfb->drawnWidth = vfb->drawnWidth; + nvfb->drawnHeight = vfb->drawnHeight; + nvfb->drawnFormat = vfb->format; + nvfb->colorDepth = vfb->colorDepth; + + if (!CreateDownloadTempBuffer(nvfb)) { + delete nvfb; + return nullptr; + } + + bvfbs_.push_back(nvfb); + } else { + UpdateDownloadTempBuffer(nvfb); + } + + nvfb->usageFlags |= FB_USAGE_RENDERTARGET; + nvfb->last_frame_render = gpuStats.numFlips; + nvfb->dirtyAfterDisplay = true; + + return nvfb; +} + +void FramebufferManagerCommon::OptimizeDownloadRange(VirtualFramebuffer * vfb, int & x, int & y, int & w, int & h) { + if (gameUsesSequentialCopies_) { + // Ignore the x/y/etc., read the entire thing. + x = 0; + y = 0; + w = vfb->width; + h = vfb->height; + } + if (x == 0 && y == 0 && w == vfb->width && h == vfb->height) { + // Mark it as fully downloaded until next render to it. + vfb->memoryUpdated = true; + } else { + // Let's try to set the flag eventually, if the game copies a lot. + // Some games copy subranges very frequently. + const static int FREQUENT_SEQUENTIAL_COPIES = 3; + static int frameLastCopy = 0; + static u32 bufferLastCopy = 0; + static int copiesThisFrame = 0; + if (frameLastCopy != gpuStats.numFlips || bufferLastCopy != vfb->fb_address) { + frameLastCopy = gpuStats.numFlips; + bufferLastCopy = vfb->fb_address; + copiesThisFrame = 0; + } + if (++copiesThisFrame > FREQUENT_SEQUENTIAL_COPIES) { + gameUsesSequentialCopies_ = true; + } + } +} + bool FramebufferManagerCommon::NotifyBlockTransferBefore(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp, u32 skipDrawReason) { if (!useBufferedRendering_ || updateVRAM_) { return false; diff --git a/GPU/Common/FramebufferCommon.h b/GPU/Common/FramebufferCommon.h index badd9f6befbe..e2b4d3187529 100644 --- a/GPU/Common/FramebufferCommon.h +++ b/GPU/Common/FramebufferCommon.h @@ -57,6 +57,7 @@ struct VirtualFramebuffer { int last_frame_render; int last_frame_displayed; int last_frame_clut; + u32 clutUpdatedBytes; bool memoryUpdated; bool depthUpdated; @@ -168,6 +169,7 @@ class FramebufferManagerCommon { void NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int w, int h, int bpp, u32 skipDrawReason); virtual void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) = 0; + virtual void DownloadFramebufferForClut(u32 fb_address, u32 loadBytes) = 0; virtual void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) = 0; virtual void DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) = 0; virtual void DrawFramebufferToOutput(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader) = 0; @@ -244,11 +246,16 @@ class FramebufferManagerCommon { bool ShouldDownloadFramebuffer(const VirtualFramebuffer *vfb) const; void FindTransferFramebuffers(VirtualFramebuffer *&dstBuffer, VirtualFramebuffer *&srcBuffer, u32 dstBasePtr, int dstStride, int &dstX, int &dstY, u32 srcBasePtr, int srcStride, int &srcX, int &srcY, int &srcWidth, int &srcHeight, int &dstWidth, int &dstHeight, int bpp) const; + VirtualFramebuffer *FindDownloadTempBuffer(VirtualFramebuffer *vfb); + virtual bool CreateDownloadTempBuffer(VirtualFramebuffer *nvfb) = 0; + virtual void UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) = 0; + void OptimizeDownloadRange(VirtualFramebuffer *vfb, int &x, int &y, int &w, int &h); void UpdateFramebufUsage(VirtualFramebuffer *vfb); void SetColorUpdated(VirtualFramebuffer *dstBuffer, int skipDrawReason) { dstBuffer->memoryUpdated = false; + dstBuffer->clutUpdatedBytes = 0; dstBuffer->dirtyAfterDisplay = true; dstBuffer->drawnWidth = dstBuffer->width; dstBuffer->drawnHeight = dstBuffer->height; @@ -278,9 +285,11 @@ class FramebufferManagerCommon { bool postShaderIsUpscalingFilter_; std::vector vfbs_; + std::vector bvfbs_; // blitting framebuffers (for download) std::set> knownFramebufferRAMCopies_; bool hackForce04154000Download_; + bool gameUsesSequentialCopies_; // Sampled in BeginFrame for safety. float renderWidth_; diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 215730f6c657..61305cb8036d 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -213,13 +213,24 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) { if (Memory::IsVRAMAddress(clutAddr)) { // Clear the uncached bit, etc. to match framebuffers. const u32 clutFramebufAddr = clutAddr & 0x3FFFFFFF; + const u32 clutFramebufEnd = clutFramebufAddr + loadBytes; + static const u32 MAX_CLUT_OFFSET = 4096; + clutRenderOffset_ = MAX_CLUT_OFFSET; for (size_t i = 0, n = fbCache_.size(); i < n; ++i) { auto framebuffer = fbCache_[i]; - if ((framebuffer->fb_address | 0x04000000) == clutFramebufAddr) { + const u32 fb_address = framebuffer->fb_address | 0x04000000; + const u32 bpp = framebuffer->drawnFormat == GE_FORMAT_8888 ? 4 : 2; + bool match = fb_address + framebuffer->fb_stride * bpp > clutFramebufAddr && fb_address < clutFramebufEnd; + u32 offset = clutFramebufAddr - fb_address; + if (match && offset < clutRenderOffset_) { framebuffer->last_frame_clut = gpuStats.numFlips; framebuffer->usageFlags |= FB_USAGE_CLUT; clutRenderAddress_ = framebuffer->fb_address; + clutRenderOffset_ = offset; + if (offset == 0) { + break; + } } } } @@ -227,32 +238,36 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) { // It's possible for a game to (successfully) access outside valid memory. u32 bytes = Memory::ValidSize(clutAddr, loadBytes); if (clutRenderAddress_ != 0xFFFFFFFF && !g_Config.bDisableSlowFramebufEffects) { - gpu->PerformMemoryDownload(clutAddr, bytes); - } - -#ifdef _M_SSE - int numBlocks = bytes / 16; - if (bytes == loadBytes) { - const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr); - __m128i *dest = (__m128i *)clutBufRaw_; - for (int i = 0; i < numBlocks; i++, source += 2, dest += 2) { - __m128i data1 = _mm_loadu_si128(source); - __m128i data2 = _mm_loadu_si128(source + 1); - _mm_store_si128(dest, data1); - _mm_store_si128(dest + 1, data2); + DownloadFramebufferForClut(clutRenderAddress_, clutRenderOffset_ + bytes); + Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes); + if (bytes < loadBytes) { + memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes); } } else { +#ifdef _M_SSE + int numBlocks = bytes / 16; + if (bytes == loadBytes) { + const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr); + __m128i *dest = (__m128i *)clutBufRaw_; + for (int i = 0; i < numBlocks; i++, source += 2, dest += 2) { + __m128i data1 = _mm_loadu_si128(source); + __m128i data2 = _mm_loadu_si128(source + 1); + _mm_store_si128(dest, data1); + _mm_store_si128(dest + 1, data2); + } + } else { + Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes); + if (bytes < loadBytes) { + memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes); + } + } +#else Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes); if (bytes < loadBytes) { memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes); } - } -#else - Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes); - if (bytes < clutTotalBytes_) { - memset((u8 *)clutBufRaw_ + bytes, 0x00, clutTotalBytes_ - bytes); - } #endif + } } else { memset(clutBufRaw_, 0x00, loadBytes); } diff --git a/GPU/Common/TextureCacheCommon.h b/GPU/Common/TextureCacheCommon.h index 8376093561d0..6df502572ba0 100644 --- a/GPU/Common/TextureCacheCommon.h +++ b/GPU/Common/TextureCacheCommon.h @@ -139,6 +139,8 @@ class TextureCacheCommon { virtual bool AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 texaddrOffset = 0) = 0; virtual void DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer) = 0; + virtual void DownloadFramebufferForClut(u32 clutAddr, u32 bytes) = 0; + TexCache cache; std::vector fbCache_; @@ -155,6 +157,7 @@ class TextureCacheCommon { u32 clutTotalBytes_; u32 clutMaxBytes_; u32 clutRenderAddress_; + u32 clutRenderOffset_; int standardScaleFactor_; }; diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index da5ce0cc94c2..776fec39300c 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -92,8 +92,7 @@ namespace DX9 { convBuf(0), stencilUploadPS_(nullptr), stencilUploadVS_(nullptr), - stencilUploadFailed_(false), - gameUsesSequentialCopies_(false) { + stencilUploadFailed_(false) { } FramebufferManagerDX9::~FramebufferManagerDX9() { @@ -847,116 +846,67 @@ namespace DX9 { } void FramebufferManagerDX9::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) { -#if 0 - if (sync) { - PackFramebufferAsync_(NULL); // flush async just in case when we go for synchronous update - } -#endif - if (vfb) { // We'll pseudo-blit framebuffers here to get a resized version of vfb. - // For now we'll keep these on the same struct as the ones that can get displayed - // (and blatantly copy work already done above while at it). - VirtualFramebuffer *nvfb = 0; - - // We maintain a separate vector of framebuffer objects for blitting. - for (size_t i = 0; i < bvfbs_.size(); ++i) { - VirtualFramebuffer *v = bvfbs_[i]; - if (v->fb_address == vfb->fb_address && v->format == vfb->format) { - if (v->bufferWidth == vfb->bufferWidth && v->bufferHeight == vfb->bufferHeight) { - nvfb = v; - v->fb_stride = vfb->fb_stride; - v->width = vfb->width; - v->height = vfb->height; - break; - } - } - } + VirtualFramebuffer *nvfb = FindDownloadTempBuffer(vfb); + OptimizeDownloadRange(vfb, x, y, w, h); + BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0); - // Create a new fbo if none was found for the size - if(!nvfb) { - nvfb = new VirtualFramebuffer(); - nvfb->fbo_dx9 = nullptr; - nvfb->fb_address = vfb->fb_address; - nvfb->fb_stride = vfb->fb_stride; - nvfb->z_address = vfb->z_address; - nvfb->z_stride = vfb->z_stride; - nvfb->width = vfb->width; - nvfb->height = vfb->height; - nvfb->renderWidth = vfb->bufferWidth; - nvfb->renderHeight = vfb->bufferHeight; - nvfb->bufferWidth = vfb->bufferWidth; - nvfb->bufferHeight = vfb->bufferHeight; - nvfb->format = vfb->format; - nvfb->drawnWidth = vfb->drawnWidth; - nvfb->drawnHeight = vfb->drawnHeight; - nvfb->drawnFormat = vfb->format; - nvfb->usageFlags = FB_USAGE_RENDERTARGET; - nvfb->dirtyAfterDisplay = true; - - nvfb->colorDepth = FBO_8888; + PackFramebufferDirectx9_(nvfb, x, y, w, h); - textureCache_->ForgetLastTexture(); - nvfb->fbo_dx9 = fbo_create(nvfb->width, nvfb->height, 1, true, (FBOColorDepth)nvfb->colorDepth); - if (!(nvfb->fbo_dx9)) { - ERROR_LOG(SCEGE, "Error creating FBO! %i x %i", nvfb->renderWidth, nvfb->renderHeight); - delete nvfb; - return; - } + textureCache_->ForgetLastTexture(); + RebindFramebuffer(); + } + } - nvfb->last_frame_render = gpuStats.numFlips; - bvfbs_.push_back(nvfb); - fbo_bind_as_render_target(nvfb->fbo_dx9); - ClearBuffer(); - } else { - nvfb->usageFlags |= FB_USAGE_RENDERTARGET; - gstate_c.textureChanged = true; - nvfb->last_frame_render = gpuStats.numFlips; - nvfb->dirtyAfterDisplay = true; - -#if 0 - if (nvfb->fbo) { - fbo_bind_as_render_target(nvfb->fbo); + void FramebufferManagerDX9::DownloadFramebufferForClut(u32 fb_address, u32 loadBytes) { + VirtualFramebuffer *vfb = GetVFBAt(fb_address); + if (vfb && vfb->fb_stride != 0) { + const u32 bpp = vfb->drawnFormat == GE_FORMAT_8888 ? 4 : 2; + int x = 0; + int y = 0; + int pixels = loadBytes / bpp; + // The height will be 1 for each stride or part thereof. + int w = std::min(pixels % vfb->fb_stride, (int)vfb->width); + int h = std::min((pixels + vfb->fb_stride - 1) / vfb->fb_stride, (int)vfb->height); + + // No need to download if we already have it. + if (!vfb->memoryUpdated && vfb->clutUpdatedBytes < loadBytes) { + // We intentionally don't call OptimizeDownloadRange() here - we don't want to over download. + // CLUT framebuffers are often incorrectly estimated in size. + if (x == 0 && y == 0 && w == vfb->width && h == vfb->height) { + vfb->memoryUpdated = true; } + vfb->clutUpdatedBytes = loadBytes; - // Some tiled mobile GPUs benefit IMMENSELY from clearing an FBO before rendering - // to it. This broke stuff before, so now it only clears on the first use of an - // FBO in a frame. This means that some games won't be able to avoid the on-some-GPUs - // performance-crushing framebuffer reloads from RAM, but we'll have to live with that. - if (nvfb->last_frame_render != gpuStats.numFlips) { - ClearBuffer(); - } -#endif - } + // We'll pseudo-blit framebuffers here to get a resized version of vfb. + VirtualFramebuffer *nvfb = FindDownloadTempBuffer(vfb); + BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0); - if (gameUsesSequentialCopies_) { - // Ignore the x/y/etc., read the entire thing. - x = 0; - y = 0; - w = vfb->width; - h = vfb->height; - } - if (x == 0 && y == 0 && w == vfb->width && h == vfb->height) { - vfb->memoryUpdated = true; - } else { - const static int FREQUENT_SEQUENTIAL_COPIES = 3; - static int frameLastCopy = 0; - static u32 bufferLastCopy = 0; - static int copiesThisFrame = 0; - if (frameLastCopy != gpuStats.numFlips || bufferLastCopy != vfb->fb_address) { - frameLastCopy = gpuStats.numFlips; - bufferLastCopy = vfb->fb_address; - copiesThisFrame = 0; - } - if (++copiesThisFrame > FREQUENT_SEQUENTIAL_COPIES) { - gameUsesSequentialCopies_ = true; - } + PackFramebufferDirectx9_(nvfb, x, y, w, h); + + textureCache_->ForgetLastTexture(); + RebindFramebuffer(); } - BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0); + } + } - PackFramebufferDirectx9_(nvfb, x, y, w, h); - RebindFramebuffer(); + bool FramebufferManagerDX9::CreateDownloadTempBuffer(VirtualFramebuffer *nvfb) { + nvfb->colorDepth = FBO_8888; + + nvfb->fbo_dx9 = fbo_create(nvfb->width, nvfb->height, 1, true, (FBOColorDepth)nvfb->colorDepth); + if (!(nvfb->fbo_dx9)) { + ERROR_LOG(SCEGE, "Error creating FBO! %i x %i", nvfb->renderWidth, nvfb->renderHeight); + return false; } + + fbo_bind_as_render_target(nvfb->fbo_dx9); + ClearBuffer(); + return true; + } + + void FramebufferManagerDX9::UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) { + // Nothing to do here. } void FramebufferManagerDX9::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp) { @@ -1142,10 +1092,6 @@ namespace DX9 { } resized_ = false; } -#if 0 - // We flush to memory last requested framebuffer, if any - PackFramebufferAsync_(NULL); -#endif } void FramebufferManagerDX9::DeviceLost() { diff --git a/GPU/Directx9/FramebufferDX9.h b/GPU/Directx9/FramebufferDX9.h index 31dc9fe320b1..4ae60bf15653 100644 --- a/GPU/Directx9/FramebufferDX9.h +++ b/GPU/Directx9/FramebufferDX9.h @@ -73,7 +73,8 @@ class FramebufferManagerDX9 : public FramebufferManagerCommon { void BindFramebufferColor(int stage, VirtualFramebuffer *framebuffer, int flags); - virtual void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override; + void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override; + void DownloadFramebufferForClut(u32 fb_address, u32 loadBytes) override; std::vector GetFramebufferList(); @@ -105,6 +106,8 @@ class FramebufferManagerDX9 : public FramebufferManagerCommon { virtual void NotifyRenderFramebufferCreated(VirtualFramebuffer *vfb) override; virtual void NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb, bool isClearingDepth) override; virtual void NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) override; + virtual bool CreateDownloadTempBuffer(VirtualFramebuffer *nvfb) override; + virtual void UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) override; private: void CompileDraw2DProgram(); @@ -135,7 +138,6 @@ class FramebufferManagerDX9 : public FramebufferManagerCommon { std::vector extraFBOs_; bool resized_; - bool gameUsesSequentialCopies_; struct TempFBO { FBO_DX9 *fbo; @@ -146,7 +148,6 @@ class FramebufferManagerDX9 : public FramebufferManagerCommon { int last_frame_used; }; - std::vector bvfbs_; // blitting FBOs std::map tempFBOs_; std::map offscreenSurfaces_; diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp index fa31672e22e9..475ecc53540a 100644 --- a/GPU/Directx9/GPU_DX9.cpp +++ b/GPU/Directx9/GPU_DX9.cpp @@ -476,6 +476,7 @@ void DIRECTX9_GPU::CheckGPUFeatures() { features |= GPU_SUPPORTS_BLEND_MINMAX; features |= GPU_SUPPORTS_TEXTURE_LOD_CONTROL; + features |= GPU_PREFER_CPU_DOWNLOAD; if (!PSP_CoreParameter().compat.flags().NoDepthRounding) { features |= GPU_ROUND_DEPTH_TO_16BIT; diff --git a/GPU/Directx9/TextureCacheDX9.cpp b/GPU/Directx9/TextureCacheDX9.cpp index a5f5933ddb65..dde7b8a48df4 100644 --- a/GPU/Directx9/TextureCacheDX9.cpp +++ b/GPU/Directx9/TextureCacheDX9.cpp @@ -804,6 +804,10 @@ void TextureCacheDX9::ApplyTexture() { nextTexture_ = nullptr; } +void TextureCacheDX9::DownloadFramebufferForClut(u32 clutAddr, u32 bytes) { + framebufferManager_->DownloadFramebufferForClut(clutAddr, bytes); +} + class TextureShaderApplierDX9 { public: struct Pos { diff --git a/GPU/Directx9/TextureCacheDX9.h b/GPU/Directx9/TextureCacheDX9.h index c3ce5d7bc03d..8a3aefca13b7 100644 --- a/GPU/Directx9/TextureCacheDX9.h +++ b/GPU/Directx9/TextureCacheDX9.h @@ -72,6 +72,9 @@ class TextureCacheDX9 : public TextureCacheCommon { void ApplyTexture(); +protected: + void DownloadFramebufferForClut(u32 clutAddr, u32 bytes) override; + private: void Decimate(); // Run this once per frame to get rid of old textures. void DeleteTexture(TexCache::iterator it); diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index dd5585222f61..ffabb0791fee 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -291,7 +291,6 @@ FramebufferManager::FramebufferManager() : textureCache_(nullptr), shaderManager_(nullptr), resized_(false), - gameUsesSequentialCopies_(false), pixelBufObj_(nullptr), currentPBO_(0) { @@ -1217,123 +1216,8 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s if (vfb) { // We'll pseudo-blit framebuffers here to get a resized version of vfb. - - // For now we'll keep these on the same struct as the ones that can get displayed - // (and blatantly copy work already done above while at it). - VirtualFramebuffer *nvfb = 0; - - // We maintain a separate vector of framebuffer objects for blitting. - for (size_t i = 0; i < bvfbs_.size(); ++i) { - VirtualFramebuffer *v = bvfbs_[i]; - if (v->fb_address == vfb->fb_address && v->format == vfb->format) { - if (v->bufferWidth == vfb->bufferWidth && v->bufferHeight == vfb->bufferHeight) { - nvfb = v; - v->fb_stride = vfb->fb_stride; - v->width = vfb->width; - v->height = vfb->height; - break; - } - } - } - - // Create a new fbo if none was found for the size - if (!nvfb) { - nvfb = new VirtualFramebuffer(); - nvfb->fbo = 0; - nvfb->fb_address = vfb->fb_address; - nvfb->fb_stride = vfb->fb_stride; - nvfb->z_address = vfb->z_address; - nvfb->z_stride = vfb->z_stride; - nvfb->width = vfb->width; - nvfb->height = vfb->height; - nvfb->renderWidth = vfb->bufferWidth; - nvfb->renderHeight = vfb->bufferHeight; - nvfb->bufferWidth = vfb->bufferWidth; - nvfb->bufferHeight = vfb->bufferHeight; - nvfb->format = vfb->format; - nvfb->drawnWidth = vfb->drawnWidth; - nvfb->drawnHeight = vfb->drawnHeight; - nvfb->drawnFormat = vfb->format; - nvfb->usageFlags = FB_USAGE_RENDERTARGET; - nvfb->dirtyAfterDisplay = true; - - // When updating VRAM, it need to be exact format. - switch (vfb->format) { - case GE_FORMAT_4444: - nvfb->colorDepth = FBO_4444; - break; - case GE_FORMAT_5551: - nvfb->colorDepth = FBO_5551; - break; - case GE_FORMAT_565: - nvfb->colorDepth = FBO_565; - break; - case GE_FORMAT_8888: - default: - nvfb->colorDepth = FBO_8888; - break; - } - if (gstate_c.Supports(GPU_PREFER_CPU_DOWNLOAD)) { - nvfb->colorDepth = vfb->colorDepth; - } - - textureCache_->ForgetLastTexture(); - nvfb->fbo = fbo_create(nvfb->width, nvfb->height, 1, false, (FBOColorDepth)nvfb->colorDepth); - if (!(nvfb->fbo)) { - ERROR_LOG(SCEGE, "Error creating FBO! %i x %i", nvfb->renderWidth, nvfb->renderHeight); - delete nvfb; - return; - } - - nvfb->last_frame_render = gpuStats.numFlips; - bvfbs_.push_back(nvfb); - fbo_bind_as_render_target(nvfb->fbo); - ClearBuffer(); - glDisable(GL_DITHER); - } else { - nvfb->usageFlags |= FB_USAGE_RENDERTARGET; - textureCache_->ForgetLastTexture(); - nvfb->last_frame_render = gpuStats.numFlips; - nvfb->dirtyAfterDisplay = true; - - if (gl_extensions.IsGLES) { - if (nvfb->fbo) { - fbo_bind_as_render_target(nvfb->fbo); - } - - // Some tiled mobile GPUs benefit IMMENSELY from clearing an FBO before rendering - // to it. This broke stuff before, so now it only clears on the first use of an - // FBO in a frame. This means that some games won't be able to avoid the on-some-GPUs - // performance-crushing framebuffer reloads from RAM, but we'll have to live with that. - if (nvfb->last_frame_render != gpuStats.numFlips) { - ClearBuffer(); - } - } - } - - if (gameUsesSequentialCopies_) { - // Ignore the x/y/etc., read the entire thing. - x = 0; - y = 0; - w = vfb->width; - h = vfb->height; - } - if (x == 0 && y == 0 && w == vfb->width && h == vfb->height) { - vfb->memoryUpdated = true; - } else { - const static int FREQUENT_SEQUENTIAL_COPIES = 3; - static int frameLastCopy = 0; - static u32 bufferLastCopy = 0; - static int copiesThisFrame = 0; - if (frameLastCopy != gpuStats.numFlips || bufferLastCopy != vfb->fb_address) { - frameLastCopy = gpuStats.numFlips; - bufferLastCopy = vfb->fb_address; - copiesThisFrame = 0; - } - if (++copiesThisFrame > FREQUENT_SEQUENTIAL_COPIES) { - gameUsesSequentialCopies_ = true; - } - } + VirtualFramebuffer *nvfb = FindDownloadTempBuffer(vfb); + OptimizeDownloadRange(vfb, x, y, w, h); BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0); // PackFramebufferSync_() - Synchronous pixel data transfer using glReadPixels @@ -1352,10 +1236,93 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s } } + textureCache_->ForgetLastTexture(); RebindFramebuffer(); } } +void FramebufferManager::DownloadFramebufferForClut(u32 fb_address, u32 loadBytes) { + PROFILE_THIS_SCOPE("gpu-readback"); + // Flush async just in case. + PackFramebufferAsync_(nullptr); + + VirtualFramebuffer *vfb = GetVFBAt(fb_address); + if (vfb && vfb->fb_stride != 0) { + const u32 bpp = vfb->drawnFormat == GE_FORMAT_8888 ? 4 : 2; + int x = 0; + int y = 0; + int pixels = loadBytes / bpp; + // The height will be 1 for each stride or part thereof. + int w = std::min(pixels % vfb->fb_stride, (int)vfb->width); + int h = std::min((pixels + vfb->fb_stride - 1) / vfb->fb_stride, (int)vfb->height); + + // No need to download if we already have it. + if (!vfb->memoryUpdated && vfb->clutUpdatedBytes < loadBytes) { + // We intentionally don't call OptimizeDownloadRange() here - we don't want to over download. + // CLUT framebuffers are often incorrectly estimated in size. + if (x == 0 && y == 0 && w == vfb->width && h == vfb->height) { + vfb->memoryUpdated = true; + } + vfb->clutUpdatedBytes = loadBytes; + + // We'll pseudo-blit framebuffers here to get a resized version of vfb. + VirtualFramebuffer *nvfb = FindDownloadTempBuffer(vfb); + BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0); + + PackFramebufferSync_(nvfb, x, y, w, h); + + textureCache_->ForgetLastTexture(); + RebindFramebuffer(); + } + } +} + +bool FramebufferManager::CreateDownloadTempBuffer(VirtualFramebuffer *nvfb) { + // When updating VRAM, it need to be exact format. + if (!gstate_c.Supports(GPU_PREFER_CPU_DOWNLOAD)) { + switch (nvfb->format) { + case GE_FORMAT_4444: + nvfb->colorDepth = FBO_4444; + break; + case GE_FORMAT_5551: + nvfb->colorDepth = FBO_5551; + break; + case GE_FORMAT_565: + nvfb->colorDepth = FBO_565; + break; + case GE_FORMAT_8888: + default: + nvfb->colorDepth = FBO_8888; + break; + } + } + + nvfb->fbo = fbo_create(nvfb->width, nvfb->height, 1, false, (FBOColorDepth)nvfb->colorDepth); + if (!(nvfb->fbo)) { + ERROR_LOG(SCEGE, "Error creating FBO! %i x %i", nvfb->renderWidth, nvfb->renderHeight); + return false; + } + + fbo_bind_as_render_target(nvfb->fbo); + ClearBuffer(); + glDisable(GL_DITHER); + return true; +} + +void FramebufferManager::UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) { + _assert_msg_(G3D, nvfb->fbo, "Expecting a valid nvfb in UpdateDownloadTempBuffer"); + + // Discard the previous contents of this buffer where possible. + if (gl_extensions.GLES3 && glInvalidateFramebuffer != nullptr) { + fbo_bind_as_render_target(nvfb->fbo); + GLenum attachments[3] = { GL_COLOR_ATTACHMENT0, GL_STENCIL_ATTACHMENT, GL_DEPTH_ATTACHMENT }; + glInvalidateFramebuffer(GL_FRAMEBUFFER, 3, attachments); + } else if (gl_extensions.IsGLES) { + fbo_bind_as_render_target(nvfb->fbo); + ClearBuffer(); + } +} + void FramebufferManager::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp) { if (!dst->fbo || !src->fbo || !useBufferedRendering_) { // This can happen if they recently switched from non-buffered. @@ -1795,6 +1762,7 @@ void FramebufferManager::PackFramebufferSync_(VirtualFramebuffer *vfb, int x, in bool convert = vfb->format != GE_FORMAT_8888 || UseBGRA8888(); const int dstBpp = vfb->format == GE_FORMAT_8888 ? 4 : 2; + const int packWidth = x + w < vfb->width ? x + w : vfb->width; if (!convert) { packed = (GLubyte *)Memory::GetPointer(fb_address); @@ -1819,11 +1787,11 @@ void FramebufferManager::PackFramebufferSync_(VirtualFramebuffer *vfb, int x, in } int byteOffset = y * vfb->fb_stride * 4; - SafeGLReadPixels(0, y, vfb->fb_stride, h, glfmt, GL_UNSIGNED_BYTE, packed + byteOffset); + SafeGLReadPixels(0, y, h == 1 ? packWidth : vfb->fb_stride, h, glfmt, GL_UNSIGNED_BYTE, packed + byteOffset); if (convert) { int dstByteOffset = y * vfb->fb_stride * dstBpp; - ConvertFromRGBA8888(Memory::GetPointer(fb_address + dstByteOffset), packed + byteOffset, vfb->fb_stride, vfb->fb_stride, vfb->width, h, vfb->format); + ConvertFromRGBA8888(Memory::GetPointer(fb_address + dstByteOffset), packed + byteOffset, vfb->fb_stride, vfb->fb_stride, packWidth, h, vfb->format); } } diff --git a/GPU/GLES/Framebuffer.h b/GPU/GLES/Framebuffer.h index 13363b5a2d6d..8f9ab9647892 100644 --- a/GPU/GLES/Framebuffer.h +++ b/GPU/GLES/Framebuffer.h @@ -101,7 +101,8 @@ class FramebufferManager : public FramebufferManagerCommon { void BindFramebufferColor(int stage, u32 fbRawAddress, VirtualFramebuffer *framebuffer, int flags); // Reads a rectangular subregion of a framebuffer to the right position in its backing memory. - virtual void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override; + void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override; + void DownloadFramebufferForClut(u32 fb_address, u32 loadBytes) override; std::vector GetFramebufferList(); @@ -135,6 +136,8 @@ class FramebufferManager : public FramebufferManagerCommon { virtual void NotifyRenderFramebufferCreated(VirtualFramebuffer *vfb) override; virtual void NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb, bool isClearingDepth) override; virtual void NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) override; + virtual bool CreateDownloadTempBuffer(VirtualFramebuffer *nvfb) override; + virtual void UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) override; private: void UpdatePostShaderUniforms(int bufferWidth, int bufferHeight, int renderWidth, int renderHeight); @@ -171,14 +174,12 @@ class FramebufferManager : public FramebufferManagerCommon { std::vector extraFBOs_; bool resized_; - bool gameUsesSequentialCopies_; struct TempFBO { FBO *fbo; int last_frame_used; }; - std::vector bvfbs_; // blitting framebuffers (for download) std::map tempFBOs_; // Not used under ES currently. diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index 4d7e77ab529b..000f7231e8d0 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -879,6 +879,10 @@ void TextureCache::ApplyTexture() { nextTexture_ = nullptr; } +void TextureCache::DownloadFramebufferForClut(u32 clutAddr, u32 bytes) { + framebufferManager_->DownloadFramebufferForClut(clutAddr, bytes); +} + class TextureShaderApplier { public: struct Pos { diff --git a/GPU/GLES/TextureCache.h b/GPU/GLES/TextureCache.h index 903edb184aca..5d04585afe5b 100644 --- a/GPU/GLES/TextureCache.h +++ b/GPU/GLES/TextureCache.h @@ -87,6 +87,9 @@ class TextureCache : public TextureCacheCommon { void ApplyTexture(); +protected: + void DownloadFramebufferForClut(u32 clutAddr, u32 bytes) override; + private: void Decimate(); // Run this once per frame to get rid of old textures. void DeleteTexture(TexCache::iterator it); diff --git a/Windows/GEDebugger/TabState.cpp b/Windows/GEDebugger/TabState.cpp index 47dfaf14fa18..d54473103749 100644 --- a/Windows/GEDebugger/TabState.cpp +++ b/Windows/GEDebugger/TabState.cpp @@ -70,6 +70,7 @@ enum CmdFormatType { CMD_FMT_TEXWRAP, CMD_FMT_TEXFILTER, CMD_FMT_TEXMAPMODE, + CMD_FMT_SHADEMODEL, }; struct TabStateRow { @@ -114,8 +115,7 @@ static const TabStateRow stateLightingRows[] = { { L"Material specular", GE_CMD_MATERIALSPECULAR, CMD_FMT_HEX }, { L"Mat. specular coef", GE_CMD_MATERIALSPECULARCOEF, CMD_FMT_FLOAT24 }, { L"Reverse normals", GE_CMD_REVERSENORMAL, CMD_FMT_FLAG }, - // TODO: Format? - { L"Shade model", GE_CMD_SHADEMODE, CMD_FMT_NUM }, + { L"Shade model", GE_CMD_SHADEMODE, CMD_FMT_SHADEMODEL }, // TODO: Format? { L"Light mode", GE_CMD_LIGHTMODE, CMD_FMT_NUM, GE_CMD_LIGHTINGENABLE }, { L"Light type 0", GE_CMD_LIGHTTYPE0, CMD_FMT_NUM, GE_CMD_LIGHTENABLE0 }, @@ -460,6 +460,16 @@ void FormatStateRow(wchar_t *dest, const TabStateRow &info, u32 value, bool enab } break; + case CMD_FMT_SHADEMODEL: + if (value == 0) { + swprintf(dest, L"flat"); + } else if (value == 1) { + swprintf(dest, L"gouraud"); + } else { + swprintf(dest, L"%06x", value); + } + break; + case CMD_FMT_STENCILOP: { static const char *stencilOps[] = { "KEEP", "ZERO", "REPLACE", "INVERT", "INCREMENT", "DECREMENT" };