diff --git a/GPU/Common/FramebufferCommon.cpp b/GPU/Common/FramebufferCommon.cpp index d0ae91c2d005..582644a4a658 100644 --- a/GPU/Common/FramebufferCommon.cpp +++ b/GPU/Common/FramebufferCommon.cpp @@ -172,24 +172,20 @@ void FramebufferManagerCommon::SetDisplayFramebuffer(u32 framebuf, u32 stride, G } VirtualFramebuffer *FramebufferManagerCommon::GetVFBAt(u32 addr) { + addr &= 0x3FFFFFFF; VirtualFramebuffer *match = nullptr; for (size_t i = 0; i < vfbs_.size(); ++i) { VirtualFramebuffer *v = vfbs_[i]; - if (MaskedEqual(v->fb_address, addr)) { + if (v->fb_address == addr) { // Could check w too but whatever if (match == nullptr || match->last_frame_render < v->last_frame_render) { match = v; } } } - return match; } -bool FramebufferManagerCommon::MaskedEqual(u32 addr1, u32 addr2) { - return (addr1 & 0x03FFFFFF) == (addr2 & 0x03FFFFFF); -} - u32 FramebufferManagerCommon::FramebufferByteSize(const VirtualFramebuffer *vfb) const { return vfb->fb_stride * vfb->height * (vfb->format == GE_FORMAT_8888 ? 4 : 2); } @@ -253,11 +249,10 @@ void FramebufferManagerCommon::EstimateDrawingSize(u32 fb_address, GEBufferForma if (viewport_width != region_width) { // The majority of the time, these are equal. If not, let's check what we know. - const u32 fb_normalized_address = fb_address | 0x44000000; u32 nearest_address = 0xFFFFFFFF; for (size_t i = 0; i < vfbs_.size(); ++i) { - const u32 other_address = vfbs_[i]->fb_address | 0x44000000; - if (other_address > fb_normalized_address && other_address < nearest_address) { + const u32 other_address = vfbs_[i]->fb_address & 0x3FFFFFFF; + if (other_address > fb_address && other_address < nearest_address) { nearest_address = other_address; } } @@ -266,7 +261,7 @@ void FramebufferManagerCommon::EstimateDrawingSize(u32 fb_address, GEBufferForma // This catches some cases where we can know this. // Hmm. The problem is that we could only catch it for the first of two buffers... const u32 bpp = fb_format == GE_FORMAT_8888 ? 4 : 2; - int avail_height = (nearest_address - fb_normalized_address) / (fb_stride * bpp); + int avail_height = (nearest_address - fb_address) / (fb_stride * bpp); if (avail_height < drawing_height && avail_height == region_height) { drawing_width = std::min(region_width, fb_stride); drawing_height = avail_height; @@ -282,11 +277,10 @@ void FramebufferManagerCommon::EstimateDrawingSize(u32 fb_address, GEBufferForma } void GetFramebufferHeuristicInputs(FramebufferHeuristicParams *params, const GPUgstate &gstate) { - params->fb_addr = gstate.getFrameBufAddress(); - params->fb_address = gstate.getFrameBufRawAddress(); + params->fb_address = (gstate.getFrameBufRawAddress() & 0x3FFFFFFF) | 0x04000000; // GetFramebufferHeuristicInputs is only called from rendering, and thus, it's VRAM. params->fb_stride = gstate.FrameBufStride(); - params->z_address = gstate.getDepthBufRawAddress(); + params->z_address = (gstate.getDepthBufRawAddress() & 0x3FFFFFFF) | 0x04000000; params->z_stride = gstate.DepthBufStride(); params->fmt = gstate.FrameBufFormat(); @@ -440,9 +434,9 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame SetColorUpdated(vfb, skipDrawReason); u32 byteSize = FramebufferByteSize(vfb); - u32 fb_address_mem = (params.fb_address & 0x3FFFFFFF) | 0x04000000; - if (Memory::IsVRAMAddress(fb_address_mem) && fb_address_mem + byteSize > framebufRangeEnd_) { - framebufRangeEnd_ = fb_address_mem + byteSize; + // FB heuristics always produce an address in VRAM (this is during rendering) so we don't need to poke in the 0x04000000 flag here. + if (Memory::IsVRAMAddress(params.fb_address) && params.fb_address + byteSize > framebufRangeEnd_) { + framebufRangeEnd_ = params.fb_address + byteSize; } ResizeFramebufFBO(vfb, drawing_width, drawing_height, true); @@ -456,8 +450,8 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame currentRenderVfb_ = vfb; if (useBufferedRendering_ && !g_Config.bDisableSlowFramebufEffects) { - gpu->PerformMemoryUpload(fb_address_mem, byteSize); - NotifyStencilUpload(fb_address_mem, byteSize, true); + gpu->PerformMemoryUpload(params.fb_address, byteSize); + NotifyStencilUpload(params.fb_address, byteSize, true); // TODO: Is it worth trying to upload the depth buffer? } @@ -680,7 +674,8 @@ void FramebufferManagerCommon::NotifyVideoUpload(u32 addr, int size, int width, } void FramebufferManagerCommon::UpdateFromMemory(u32 addr, int size, bool safe) { - addr &= ~0x40000000; + // Take off the uncached flag from the address. Not to be confused with the start of VRAM. + addr &= 0x3FFFFFFF; // TODO: Could go through all FBOs, but probably not important? // TODO: Could also check for inner changes, but video is most important. bool isDisplayBuf = addr == DisplayFramebufAddr() || addr == PrevDisplayFramebufAddr(); @@ -691,7 +686,7 @@ void FramebufferManagerCommon::UpdateFromMemory(u32 addr, int size, bool safe) { for (size_t i = 0; i < vfbs_.size(); ++i) { VirtualFramebuffer *vfb = vfbs_[i]; - if (MaskedEqual(vfb->fb_address, addr)) { + if (vfb->fb_address == addr) { FlushBeforeCopy(); if (useBufferedRendering_ && vfb->fbo) { @@ -700,7 +695,7 @@ void FramebufferManagerCommon::UpdateFromMemory(u32 addr, int size, bool safe) { // If we're not rendering to it, format may be wrong. Use displayFormat_ instead. fmt = displayFormat_; } - DrawPixels(vfb, 0, 0, Memory::GetPointer(addr | 0x04000000), fmt, vfb->fb_stride, vfb->width, vfb->height); + DrawPixels(vfb, 0, 0, Memory::GetPointer(addr), fmt, vfb->fb_stride, vfb->width, vfb->height); SetColorUpdated(vfb, gstate_c.skipDrawReason); } else { INFO_LOG(FRAMEBUF, "Invalidating FBO for %08x (%i x %i x %i)", vfb->fb_address, vfb->width, vfb->height, vfb->format); @@ -866,11 +861,13 @@ void FramebufferManagerCommon::CopyDisplayToOutput() { VirtualFramebuffer *vfb = GetVFBAt(displayFramebufPtr_); if (!vfb) { - // Let's search for a framebuf within this range. - const u32 addr = (displayFramebufPtr_ & 0x03FFFFFF) | 0x04000000; + // Let's search for a framebuf within this range. Note that we also look for + // "framebuffers" sitting in RAM so we only take off the kernel and uncached bits of the address + // when comparing. + const u32 addr = displayFramebufPtr_ & 0x3FFFFFFF; for (size_t i = 0; i < vfbs_.size(); ++i) { VirtualFramebuffer *v = vfbs_[i]; - const u32 v_addr = (v->fb_address & 0x03FFFFFF) | 0x04000000; + const u32 v_addr = v->fb_address & 0x3FFFFFFF; const u32 v_size = FramebufferByteSize(v); if (addr >= v_addr && addr < v_addr + v_size) { const u32 dstBpp = v->format == GE_FORMAT_8888 ? 4 : 2; @@ -1241,7 +1238,8 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size, continue; } - const u32 vfb_address = (0x04000000 | vfb->fb_address) & 0x3FFFFFFF; + // We only remove the kernel and uncached bits when comparing. + const u32 vfb_address = vfb->fb_address & 0x3FFFFFFF; const u32 vfb_size = FramebufferByteSize(vfb); const u32 vfb_bpp = vfb->format == GE_FORMAT_8888 ? 4 : 2; const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp; @@ -1352,7 +1350,7 @@ void FramebufferManagerCommon::FindTransferFramebuffers(VirtualFramebuffer *&dst for (size_t i = 0; i < vfbs_.size(); ++i) { VirtualFramebuffer *vfb = vfbs_[i]; - const u32 vfb_address = (0x04000000 | vfb->fb_address) & 0x3FFFFFFF; + const u32 vfb_address = vfb->fb_address & 0x3FFFFFFF; const u32 vfb_size = FramebufferByteSize(vfb); const u32 vfb_bpp = vfb->format == GE_FORMAT_8888 ? 4 : 2; const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp; @@ -1915,7 +1913,7 @@ bool FramebufferManagerCommon::GetFramebuffer(u32 fb_address, int fb_stride, GEB if (!vfb) { // If there's no vfb and we're drawing there, must be memory? - buffer = GPUDebugBuffer(Memory::GetPointer(fb_address | 0x04000000), fb_stride, 512, format); + buffer = GPUDebugBuffer(Memory::GetPointer(fb_address), fb_stride, 512, format); return true; } @@ -1969,7 +1967,7 @@ bool FramebufferManagerCommon::GetDepthbuffer(u32 fb_address, int fb_stride, u32 if (!vfb) { // If there's no vfb and we're drawing there, must be memory? - buffer = GPUDebugBuffer(Memory::GetPointer(z_address | 0x04000000), z_stride, 512, GPU_DBG_FORMAT_16BIT); + buffer = GPUDebugBuffer(Memory::GetPointer(z_address), z_stride, 512, GPU_DBG_FORMAT_16BIT); return true; } @@ -2005,7 +2003,7 @@ bool FramebufferManagerCommon::GetStencilbuffer(u32 fb_address, int fb_stride, G if (!vfb) { // If there's no vfb and we're drawing there, must be memory? // TODO: Actually get the stencil. - buffer = GPUDebugBuffer(Memory::GetPointer(fb_address | 0x04000000), fb_stride, 512, GPU_DBG_FORMAT_8888); + buffer = GPUDebugBuffer(Memory::GetPointer(fb_address), fb_stride, 512, GPU_DBG_FORMAT_8888); return true; } @@ -2057,7 +2055,7 @@ void FramebufferManagerCommon::PackFramebufferSync_(VirtualFramebuffer *vfb, int return; } - const u32 fb_address = (0x04000000) | vfb->fb_address; + const u32 fb_address = vfb->fb_address & 0x3FFFFFFF; Draw::DataFormat destFormat = GEFormatToThin3D(vfb->format); const int dstBpp = (int)DataFormatSizeInBytes(destFormat); diff --git a/GPU/Common/FramebufferCommon.h b/GPU/Common/FramebufferCommon.h index 4f19debd01f8..9f3f6d7251cd 100644 --- a/GPU/Common/FramebufferCommon.h +++ b/GPU/Common/FramebufferCommon.h @@ -119,7 +119,6 @@ struct VirtualFramebuffer { }; struct FramebufferHeuristicParams { - u32 fb_addr; u32 fb_address; int fb_stride; u32 z_address; @@ -243,10 +242,10 @@ class FramebufferManagerCommon { size_t NumVFBs() const { return vfbs_.size(); } u32 PrevDisplayFramebufAddr() { - return prevDisplayFramebuf_ ? (0x04000000 | prevDisplayFramebuf_->fb_address) : 0; + return prevDisplayFramebuf_ ? prevDisplayFramebuf_->fb_address : 0; } u32 DisplayFramebufAddr() { - return displayFramebuf_ ? (0x04000000 | displayFramebuf_->fb_address) : 0; + return displayFramebuf_ ? displayFramebuf_->fb_address : 0; } u32 DisplayFramebufStride() { @@ -332,7 +331,6 @@ class FramebufferManagerCommon { void EstimateDrawingSize(u32 fb_address, GEBufferFormat fb_format, int viewport_width, int viewport_height, int region_width, int region_height, int scissor_width, int scissor_height, int fb_stride, int &drawing_width, int &drawing_height); u32 FramebufferByteSize(const VirtualFramebuffer *vfb) const; - static bool MaskedEqual(u32 addr1, u32 addr2); void NotifyRenderFramebufferCreated(VirtualFramebuffer *vfb); void NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged); diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 1409a6b403fe..2da10add47f4 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -635,9 +635,10 @@ void TextureCacheCommon::HandleTextureChange(TexCacheEntry *const entry, const c } void TextureCacheCommon::NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffer, FramebufferNotification msg) { - // Must be in VRAM so | 0x04000000 it is. Also, ignore memory mirrors. + // Mask to ignore the Z memory mirrors if the address is in VRAM. // These checks are mainly to reduce scanning all textures. - const u32 addr = (address | 0x04000000) & 0x3F9FFFFF; + const u32 mirrorMask = 0x00600000; + const u32 addr = Memory::IsVRAMAddress(address) ? (address & ~mirrorMask) : address; const u32 bpp = framebuffer->format == GE_FORMAT_8888 ? 4 : 2; const u64 cacheKey = (u64)addr << 32; // If it has a clut, those are the low 32 bits, so it'll be inside this range. @@ -749,10 +750,13 @@ bool TextureCacheCommon::AttachFramebuffer(TexCacheEntry *entry, u32 address, Vi AttachedFramebufferInfo fbInfo = { 0 }; - const u64 mirrorMask = 0x00600000; - // Must be in VRAM so | 0x04000000 it is. Also, ignore memory mirrors. - const u32 addr = (address | 0x04000000) & 0x3FFFFFFF & ~mirrorMask; - const u32 texaddr = ((entry->addr + texaddrOffset) & ~mirrorMask); + const u32 mirrorMask = 0x00600000; + u32 addr = address & 0x3FFFFFFF; + u32 texaddr = entry->addr + texaddrOffset; + if (entry->addr & 0x04000000) { + addr &= ~mirrorMask; + texaddr &= ~mirrorMask; + } const bool noOffset = texaddr == addr; const bool exactMatch = noOffset && entry->format < 4; const u32 w = 1 << ((entry->dim >> 0) & 0xf); @@ -990,7 +994,7 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) { clutRenderOffset_ = MAX_CLUT_OFFSET; for (size_t i = 0, n = fbCache_.size(); i < n; ++i) { auto framebuffer = fbCache_[i]; - const u32 fb_address = framebuffer->fb_address | 0x04000000; + const u32 fb_address = framebuffer->fb_address & 0x3FFFFFFF; const u32 bpp = framebuffer->drawnFormat == GE_FORMAT_8888 ? 4 : 2; u32 offset = clutFramebufAddr - fb_address; diff --git a/GPU/D3D11/FramebufferManagerD3D11.cpp b/GPU/D3D11/FramebufferManagerD3D11.cpp index bb6ee6b660cd..01bfd06fb63f 100644 --- a/GPU/D3D11/FramebufferManagerD3D11.cpp +++ b/GPU/D3D11/FramebufferManagerD3D11.cpp @@ -687,7 +687,7 @@ void FramebufferManagerD3D11::PackDepthbuffer(VirtualFramebuffer *vfb, int x, in return; } - const u32 z_address = (0x04000000) | vfb->z_address; + const u32 z_address = vfb->z_address; // TODO } diff --git a/GPU/D3D11/StencilBufferD3D11.cpp b/GPU/D3D11/StencilBufferD3D11.cpp index f489f5838a83..7c9e791ccab4 100644 --- a/GPU/D3D11/StencilBufferD3D11.cpp +++ b/GPU/D3D11/StencilBufferD3D11.cpp @@ -71,6 +71,7 @@ VS_OUT main(VS_IN In) { // TODO : If SV_StencilRef is available (D3D11.3) then this can be done in a single pass. bool FramebufferManagerD3D11::NotifyStencilUpload(u32 addr, int size, bool skipZero) { + addr &= 0x3FFFFFFF; if (!MayIntersectFramebuffer(addr)) { return false; } @@ -78,7 +79,7 @@ bool FramebufferManagerD3D11::NotifyStencilUpload(u32 addr, int size, bool skipZ VirtualFramebuffer *dstBuffer = 0; for (size_t i = 0; i < vfbs_.size(); ++i) { VirtualFramebuffer *vfb = vfbs_[i]; - if (MaskedEqual(vfb->fb_address, addr)) { + if (vfb->fb_address == addr) { dstBuffer = vfb; } } diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 88bddb3beeb0..75a3beeb3ac5 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -585,7 +585,7 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = { return; } - const u32 fb_address = (0x04000000) | vfb->fb_address; + const u32 fb_address = vfb->fb_address & 0x3FFFFFFF; const int dstBpp = vfb->format == GE_FORMAT_8888 ? 4 : 2; // We always need to convert from the framebuffer native format. @@ -627,7 +627,7 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = { } // We always read the depth buffer in 24_8 format. - const u32 z_address = (0x04000000) | vfb->z_address; + const u32 z_address = vfb->z_address; DEBUG_LOG(FRAMEBUF, "Reading depthbuffer to mem at %08x for vfb=%08x", z_address, vfb->fb_address); @@ -732,7 +732,7 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = { if (!vfb) { // If there's no vfb and we're drawing there, must be memory? - buffer = GPUDebugBuffer(Memory::GetPointer(fb_address | 0x04000000), fb_stride, 512, fb_format); + buffer = GPUDebugBuffer(Memory::GetPointer(fb_address), fb_stride, 512, fb_format); return true; } LPDIRECT3DSURFACE9 renderTarget = vfb->fbo ? (LPDIRECT3DSURFACE9)draw_->GetFramebufferAPITexture(vfb->fbo, Draw::FB_COLOR_BIT | Draw::FB_SURFACE_BIT, 0) : nullptr; @@ -809,7 +809,7 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = { if (!vfb) { // If there's no vfb and we're drawing there, must be memory? - buffer = GPUDebugBuffer(Memory::GetPointer(z_address | 0x04000000), z_stride, 512, GPU_DBG_FORMAT_16BIT); + buffer = GPUDebugBuffer(Memory::GetPointer(z_address), z_stride, 512, GPU_DBG_FORMAT_16BIT); return true; } @@ -847,7 +847,7 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = { if (!vfb) { // If there's no vfb and we're drawing there, must be memory? - buffer = GPUDebugBuffer(Memory::GetPointer(vfb->z_address | 0x04000000), vfb->z_stride, 512, GPU_DBG_FORMAT_16BIT); + buffer = GPUDebugBuffer(Memory::GetPointer(vfb->z_address), vfb->z_stride, 512, GPU_DBG_FORMAT_16BIT); return true; } diff --git a/GPU/Directx9/StencilBufferDX9.cpp b/GPU/Directx9/StencilBufferDX9.cpp index 6f51582cf2fc..c6823cba632d 100644 --- a/GPU/Directx9/StencilBufferDX9.cpp +++ b/GPU/Directx9/StencilBufferDX9.cpp @@ -65,6 +65,7 @@ static const char *stencil_vs = "}\n"; bool FramebufferManagerDX9::NotifyStencilUpload(u32 addr, int size, bool skipZero) { + addr &= 0x3FFFFFFF; if (!MayIntersectFramebuffer(addr)) { return false; } @@ -72,7 +73,7 @@ bool FramebufferManagerDX9::NotifyStencilUpload(u32 addr, int size, bool skipZer VirtualFramebuffer *dstBuffer = 0; for (size_t i = 0; i < vfbs_.size(); ++i) { VirtualFramebuffer *vfb = vfbs_[i]; - if (MaskedEqual(vfb->fb_address, addr)) { + if (vfb->fb_address == addr) { dstBuffer = vfb; } } diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp index a2bb10886012..30f71fdd99cb 100644 --- a/GPU/GLES/DrawEngineGLES.cpp +++ b/GPU/GLES/DrawEngineGLES.cpp @@ -311,7 +311,7 @@ void DrawEngineGLES::DoFlush() { gstate_c.Clean(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS); textureNeedsApply = true; } else if (gstate.getTextureAddress(0) == ((gstate.getFrameBufRawAddress() | 0x04000000) & 0x3FFFFFFF)) { - // This catches the case of clearing a texture. + // This catches the case of clearing a texture. (#10957) gstate_c.Dirty(DIRTY_TEXTURE_IMAGE); } diff --git a/GPU/GLES/StencilBufferGLES.cpp b/GPU/GLES/StencilBufferGLES.cpp index dee08b457779..17d7469dfa1f 100644 --- a/GPU/GLES/StencilBufferGLES.cpp +++ b/GPU/GLES/StencilBufferGLES.cpp @@ -62,6 +62,7 @@ static const char *stencil_vs = "}\n"; bool FramebufferManagerGLES::NotifyStencilUpload(u32 addr, int size, bool skipZero) { + addr &= 0x3FFFFFFF; if (!MayIntersectFramebuffer(addr)) { return false; } @@ -69,7 +70,7 @@ bool FramebufferManagerGLES::NotifyStencilUpload(u32 addr, int size, bool skipZe VirtualFramebuffer *dstBuffer = 0; for (size_t i = 0; i < vfbs_.size(); ++i) { VirtualFramebuffer *vfb = vfbs_[i]; - if (MaskedEqual(vfb->fb_address, addr)) { + if (vfb->fb_address == addr) { dstBuffer = vfb; } } diff --git a/GPU/Vulkan/StencilBufferVulkan.cpp b/GPU/Vulkan/StencilBufferVulkan.cpp index cf046fe43ef3..bbf4a6abef2b 100644 --- a/GPU/Vulkan/StencilBufferVulkan.cpp +++ b/GPU/Vulkan/StencilBufferVulkan.cpp @@ -97,6 +97,7 @@ void main() { // messing about with bitplane textures and the like. Or actually, maybe not... Let's start with // the traditional approach. bool FramebufferManagerVulkan::NotifyStencilUpload(u32 addr, int size, bool skipZero) { + addr &= 0x3FFFFFFF; if (!MayIntersectFramebuffer(addr)) { return false; } @@ -104,7 +105,7 @@ bool FramebufferManagerVulkan::NotifyStencilUpload(u32 addr, int size, bool skip VirtualFramebuffer *dstBuffer = 0; for (size_t i = 0; i < vfbs_.size(); ++i) { VirtualFramebuffer *vfb = vfbs_[i]; - if (MaskedEqual(vfb->fb_address, addr)) { + if (vfb->fb_address == addr) { dstBuffer = vfb; } }