Skip to content

Commit

Permalink
GPU: Normalize framebuf addresses.
Browse files Browse the repository at this point in the history
In VRAM, always store without mirror.  In RAM, always store without
cache/kernel bits.
  • Loading branch information
unknownbrackets committed Oct 3, 2022
1 parent 73040eb commit 58a4376
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 33 deletions.
14 changes: 10 additions & 4 deletions Core/Debugger/MemBlockInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -399,12 +399,18 @@ void FlushPendingMemInfo() {
pendingNotifyMaxAddr2 = 0;
}

static inline uint32_t NormalizeAddress(uint32_t addr) {
if ((addr & 0x3F000000) == 0x04000000)
return addr & 0x041FFFFF;
return addr & 0x3FFFFFFF;
}

void NotifyMemInfoPC(MemBlockFlags flags, uint32_t start, uint32_t size, uint32_t pc, const char *tagStr, size_t strLength) {
if (size == 0) {
return;
}
// Clear the uncached and kernel bits.
start &= ~0xC0000000;
start = NormalizeAddress(start);

bool needFlush = false;
// When the setting is off, we skip smaller info to keep things fast.
Expand Down Expand Up @@ -450,7 +456,7 @@ void NotifyMemInfo(MemBlockFlags flags, uint32_t start, uint32_t size, const cha
}

std::vector<MemBlockInfo> FindMemInfo(uint32_t start, uint32_t size) {
start &= ~0xC0000000;
start = NormalizeAddress(start);

if (pendingNotifyMinAddr1 < start + size && pendingNotifyMaxAddr1 >= start)
FlushPendingMemInfo();
Expand All @@ -466,7 +472,7 @@ std::vector<MemBlockInfo> FindMemInfo(uint32_t start, uint32_t size) {
}

std::vector<MemBlockInfo> FindMemInfoByFlag(MemBlockFlags flags, uint32_t start, uint32_t size) {
start &= ~0xC0000000;
start = NormalizeAddress(start);

if (pendingNotifyMinAddr1 < start + size && pendingNotifyMaxAddr1 >= start)
FlushPendingMemInfo();
Expand All @@ -486,7 +492,7 @@ std::vector<MemBlockInfo> FindMemInfoByFlag(MemBlockFlags flags, uint32_t start,
}

static const char *FindWriteTagByFlag(MemBlockFlags flags, uint32_t start, uint32_t size) {
start &= ~0xC0000000;
start = NormalizeAddress(start);

if (pendingNotifyMinAddr1 < start + size && pendingNotifyMaxAddr1 >= start)
FlushPendingMemInfo();
Expand Down
44 changes: 32 additions & 12 deletions GPU/Common/FramebufferManagerCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,9 @@ void FramebufferManagerCommon::BeginFrame() {
}

void FramebufferManagerCommon::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {
displayFramebufPtr_ = framebuf;
displayFramebufPtr_ = framebuf & 0x3FFFFFFF;
if (Memory::IsVRAMAddress(displayFramebufPtr_))
displayFramebufPtr_ = framebuf & 0x041FFFFF;
displayStride_ = stride;
displayFormat_ = format;
GPUDebug::NotifyDisplay(framebuf, stride, format);
Expand All @@ -121,6 +123,8 @@ void FramebufferManagerCommon::SetDisplayFramebuffer(u32 framebuf, u32 stride, G

VirtualFramebuffer *FramebufferManagerCommon::GetVFBAt(u32 addr) const {
addr &= 0x3FFFFFFF;
if (Memory::IsVRAMAddress(addr))
addr &= 0x041FFFFF;
VirtualFramebuffer *match = nullptr;
for (auto vfb : vfbs_) {
if (vfb->fb_address == addr) {
Expand All @@ -134,6 +138,9 @@ VirtualFramebuffer *FramebufferManagerCommon::GetVFBAt(u32 addr) const {
}

VirtualFramebuffer *FramebufferManagerCommon::GetExactVFB(u32 addr, int stride, GEBufferFormat format) const {
addr &= 0x3FFFFFFF;
if (Memory::IsVRAMAddress(addr))
addr &= 0x041FFFFF;
VirtualFramebuffer *newest = nullptr;
for (auto vfb : vfbs_) {
if (vfb->fb_address == addr && vfb->fb_stride == stride && vfb->fb_format == format) {
Expand All @@ -150,6 +157,9 @@ VirtualFramebuffer *FramebufferManagerCommon::GetExactVFB(u32 addr, int stride,
}

VirtualFramebuffer *FramebufferManagerCommon::ResolveVFB(u32 addr, int stride, GEBufferFormat format) {
addr &= 0x3FFFFFFF;
if (Memory::IsVRAMAddress(addr))
addr &= 0x041FFFFF;
// Find the newest one matching addr and stride.
VirtualFramebuffer *newest = nullptr;
for (auto vfb : vfbs_) {
Expand Down Expand Up @@ -235,7 +245,7 @@ void FramebufferManagerCommon::EstimateDrawingSize(u32 fb_address, int fb_stride
// The majority of the time, these are equal. If not, let's check what we know.
u32 nearest_address = 0xFFFFFFFF;
for (auto vfb : vfbs_) {
const u32 other_address = vfb->fb_address & 0x3FFFFFFF;
const u32 other_address = vfb->fb_address;
if (other_address > fb_address && other_address < nearest_address) {
nearest_address = other_address;
}
Expand Down Expand Up @@ -296,10 +306,11 @@ void FramebufferManagerCommon::EstimateDrawingSize(u32 fb_address, int fb_stride
}

void GetFramebufferHeuristicInputs(FramebufferHeuristicParams *params, const GPUgstate &gstate) {
params->fb_address = (gstate.getFrameBufRawAddress() & 0x3FFFFFFF) | 0x04000000; // GetFramebufferHeuristicInputs is only called from rendering, and thus, it's VRAM.
// GetFramebufferHeuristicInputs is only called from rendering, and thus, it's VRAM.
params->fb_address = gstate.getFrameBufRawAddress() | 0x04000000;
params->fb_stride = gstate.FrameBufStride();

params->z_address = (gstate.getDepthBufRawAddress() & 0x3FFFFFFF) | 0x04000000;
params->z_address = gstate.getDepthBufRawAddress() | 0x04000000;
params->z_stride = gstate.DepthBufStride();

if (params->z_address == params->fb_address) {
Expand Down Expand Up @@ -555,7 +566,7 @@ void FramebufferManagerCommon::SetDepthFrameBuffer(bool isClearingDepth) {
bool newlyUsingDepth = (currentRenderVfb_->usageFlags & FB_USAGE_RENDER_DEPTH) == 0;
currentRenderVfb_->usageFlags |= FB_USAGE_RENDER_DEPTH;

uint32_t boundDepthBuffer = gstate.getDepthBufAddress() & 0x3FFFFFFF;
uint32_t boundDepthBuffer = gstate.getDepthBufRawAddress();
if (currentRenderVfb_->z_address != boundDepthBuffer) {
WARN_LOG_N_TIMES(z_reassign, 5, G3D, "Framebuffer at %08x/%d has switched associated depth buffer from %08x to %08x, updating.",
currentRenderVfb_->fb_address, currentRenderVfb_->fb_stride, currentRenderVfb_->z_address, boundDepthBuffer);
Expand Down Expand Up @@ -1034,6 +1045,8 @@ void FramebufferManagerCommon::NotifyVideoUpload(u32 addr, int size, int stride,
void FramebufferManagerCommon::UpdateFromMemory(u32 addr, int size) {
// Take off the uncached flag from the address. Not to be confused with the start of VRAM.
addr &= 0x3FFFFFFF;
if (Memory::IsVRAMAddress(addr))
addr &= 0x041FFFFF;
// TODO: Could go through all FBOs, but probably not important?
// TODO: Could also check for inner changes, but video is most important.
// TODO: This shouldn't care if it's a display framebuf or not, should work exactly the same.
Expand Down Expand Up @@ -1354,9 +1367,9 @@ void FramebufferManagerCommon::CopyDisplayToOutput(bool reallyDirty) {
// Let's search for a framebuf within this range. Note that we also look for
// "framebuffers" sitting in RAM (created from block transfer or similar) so we only take off the kernel
// and uncached bits of the address when comparing.
const u32 addr = fbaddr & 0x3FFFFFFF;
const u32 addr = fbaddr;
for (auto v : vfbs_) {
const u32 v_addr = v->fb_address & 0x3FFFFFFF;
const u32 v_addr = v->fb_address;
const u32 v_size = ColorBufferByteSize(v);

if (v->fb_format != displayFormat_ || v->fb_stride != displayStride_) {
Expand Down Expand Up @@ -1628,6 +1641,10 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size,

dst &= 0x3FFFFFFF;
src &= 0x3FFFFFFF;
if (Memory::IsVRAMAddress(dst))
dst &= 0x041FFFFF;
if (Memory::IsVRAMAddress(src))
src &= 0x041FFFFF;

// TODO: Merge the below into FindTransferFramebuffer.
// Or at least this should be like the other ones, gathering possible candidates
Expand All @@ -1645,7 +1662,7 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size,
}

// We only remove the kernel and uncached bits when comparing.
const u32 vfb_address = vfb->fb_address & 0x3FFFFFFF;
const u32 vfb_address = vfb->fb_address;
const u32 vfb_size = ColorBufferByteSize(vfb);
const u32 vfb_bpp = BufferFormatBytesPerPixel(vfb->fb_format);
const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp;
Expand Down Expand Up @@ -1758,6 +1775,8 @@ std::string BlockTransferRect::ToString() const {
// for depth data yet.
bool FramebufferManagerCommon::FindTransferFramebuffer(u32 basePtr, int stride_pixels, int x_pixels, int y, int w_pixels, int h, int bpp, bool destination, BlockTransferRect *rect) {
basePtr &= 0x3FFFFFFF;
if (Memory::IsVRAMAddress(basePtr))
basePtr &= 0x041FFFFF;
rect->vfb = nullptr;

if (!stride_pixels) {
Expand All @@ -1781,11 +1800,11 @@ bool FramebufferManagerCommon::FindTransferFramebuffer(u32 basePtr, int stride_p
// Check for easily detected depth copies for logging purposes.
// Depth copies are not that useful though because you manually need to account for swizzle, so
// not sure if games will use them.
if ((vfb->z_address & 0x3FFFFFFF) == basePtr) {
if (vfb->z_address == basePtr) {
WARN_LOG_N_TIMES(z_xfer, 5, G3D, "FindTransferFramebuffer: found matching depth buffer, %08x (dest=%d, bpp=%d)", basePtr, (int)destination, bpp);
}

const u32 vfb_address = vfb->fb_address & 0x3FFFFFFF;
const u32 vfb_address = vfb->fb_address;
const u32 vfb_size = ColorBufferByteSize(vfb);

if (basePtr < vfb_address || basePtr >= vfb_address + vfb_size) {
Expand Down Expand Up @@ -1898,7 +1917,8 @@ VirtualFramebuffer *FramebufferManagerCommon::CreateRAMFramebuffer(uint32_t fbAd
// create a new one each frame.
VirtualFramebuffer *vfb = new VirtualFramebuffer{};
vfb->fbo = nullptr;
vfb->fb_address = fbAddress; // NOTE - not necessarily in VRAM!
uint32_t mask = Memory::IsVRAMAddress(fbAddress) ? 0x041FFFFF : 0x3FFFFFFF;
vfb->fb_address = fbAddress & mask; // NOTE - not necessarily in VRAM!
vfb->fb_stride = stride;
vfb->z_address = 0; // marks that if anyone tries to render to this framebuffer, it should be dropped and recreated.
vfb->z_stride = 0;
Expand Down Expand Up @@ -2543,7 +2563,7 @@ void FramebufferManagerCommon::PackFramebufferSync(VirtualFramebuffer *vfb, int
return;
}

const u32 fb_address = vfb->fb_address & 0x3FFFFFFF;
const u32 fb_address = vfb->fb_address;

Draw::DataFormat destFormat = channel == RASTER_COLOR ? GEFormatToThin3D(vfb->fb_format) : GEFormatToThin3D(GE_FORMAT_DEPTH16);
const int dstBpp = (int)DataFormatSizeInBytes(destFormat);
Expand Down
4 changes: 3 additions & 1 deletion GPU/Common/FramebufferManagerCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,9 @@ class FramebufferManagerCommon {

bool MayIntersectFramebuffer(u32 start) const {
// Clear the cache/kernel bits.
start = start & 0x3FFFFFFF;
start &= 0x3FFFFFFF;
if (Memory::IsVRAMAddress(start))
start &= 0x041FFFFF;
// Most games only have two framebuffers at the start.
if (start >= framebufRangeEnd_ || start < PSP_GetVidMemBase()) {
return false;
Expand Down
20 changes: 9 additions & 11 deletions GPU/Common/TextureCacheCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,7 @@ bool TextureCacheCommon::GetBestFramebufferCandidate(const TextureDefinition &en
// Should avoid problems when pingponging two nearby buffers, like in Wipeout Pure in #15927.
if (candidate.channel == RASTER_COLOR &&
(candidate.match.yOffset != 0 || candidate.match.xOffset != 0) &&
(candidate.fb->fb_address & 0x1FFFFF) == (gstate.getFrameBufAddress() & 0x1FFFFF)) {
candidate.fb->fb_address == (gstate.getFrameBufRawAddress() | 0x04000000)) {
relevancy -= 2;
}

Expand Down Expand Up @@ -845,10 +845,8 @@ void TextureCacheCommon::HandleTextureChange(TexCacheEntry *const entry, const c
}

void TextureCacheCommon::NotifyFramebuffer(VirtualFramebuffer *framebuffer, FramebufferNotification msg) {
const u32 mirrorMask = 0x00600000;
const u32 fb_addr = framebuffer->fb_address;

const u32 z_addr = framebuffer->z_address & ~mirrorMask; // Probably unnecessary.
const u32 z_addr = framebuffer->z_address;

const u32 fb_bpp = BufferFormatBytesPerPixel(framebuffer->fb_format);
const u32 z_bpp = 2; // No other format exists.
Expand Down Expand Up @@ -931,7 +929,7 @@ bool TextureCacheCommon::MatchFramebuffer(
uint32_t fb_stride_in_bytes = fb_stride * BufferFormatBytesPerPixel(fb_format);
uint32_t tex_stride_in_bytes = entry.bufw * textureBitsPerPixel[entry.format] / 8; // Note, we're looking up bits here so need to divide by 8.

u32 addr = fb_address & 0x3FFFFFFF;
u32 addr = fb_address;
u32 texaddr = entry.addr + texaddrOffset;

bool texInVRAM = Memory::IsVRAMAddress(texaddr);
Expand All @@ -943,10 +941,10 @@ bool TextureCacheCommon::MatchFramebuffer(
}

if (texInVRAM) {
const u32 mirrorMask = 0x00600000;
const u32 mirrorMask = 0x041FFFFF;

addr &= ~mirrorMask;
texaddr &= ~mirrorMask;
addr &= mirrorMask;
texaddr &= mirrorMask;
}

const bool noOffset = texaddr == addr;
Expand Down Expand Up @@ -1205,8 +1203,8 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) {

if (Memory::IsValidAddress(clutAddr)) {
if (Memory::IsVRAMAddress(clutAddr)) {
// Clear the uncached bit, etc. to match framebuffers.
const u32 clutLoadAddr = clutAddr & 0x3FFFFFFF;
// Clear the uncached and mirror bits, etc. to match framebuffers.
const u32 clutLoadAddr = clutAddr & 0x041FFFFF;
const u32 clutLoadEnd = clutLoadAddr + loadBytes;
static const u32 MAX_CLUT_OFFSET = 4096;

Expand All @@ -1217,7 +1215,7 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) {

VirtualFramebuffer *chosenFramebuffer = nullptr;
for (VirtualFramebuffer *framebuffer : framebuffers) {
const u32 fb_address = framebuffer->fb_address & 0x3FFFFFFF;
const u32 fb_address = framebuffer->fb_address;
const u32 fb_bpp = BufferFormatBytesPerPixel(framebuffer->fb_format);
int offset = clutLoadAddr - fb_address;

Expand Down
2 changes: 1 addition & 1 deletion GPU/Directx9/FramebufferManagerDX9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@
return;
}

const u32 fb_address = vfb->fb_address & 0x3FFFFFFF;
const u32 fb_address = vfb->fb_address;
const int dstBpp = vfb->fb_format == GE_FORMAT_8888 ? 4 : 2;

// We always need to convert from the framebuffer native format.
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUState.h
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ struct GPUgstate {
GEBufferFormat FrameBufFormat() const { return static_cast<GEBufferFormat>(framebufpixformat & 3); }
int FrameBufStride() const { return fbwidth&0x7FC; }
u32 getDepthBufRawAddress() const { return zbptr & 0x1FFFF0; }
u32 getDepthBufAddress() const { return 0x44000000 | getDepthBufRawAddress(); }
u32 getDepthBufAddress() const { return 0x44600000 | getDepthBufRawAddress(); }
int DepthBufStride() const { return zbwidth&0x7FC; }

// Pixel Pipeline
Expand Down
6 changes: 3 additions & 3 deletions GPU/Software/BinManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ void BinManager::MarkPendingWrites(const Rasterizer::RasterizerState &state) {
DrawingCoords scissorTL(gstate.getScissorX1(), gstate.getScissorY1());
DrawingCoords scissorBR(std::min(gstate.getScissorX2(), gstate.getRegionX2()), std::min(gstate.getScissorY2(), gstate.getRegionY2()));

constexpr uint32_t mirrorMask = 0x0FFFFFFF & ~0x00600000;
constexpr uint32_t mirrorMask = 0x041FFFFF;
const uint32_t bpp = state.pixelID.FBFormat() == GE_FORMAT_8888 ? 4 : 2;
pendingWrites_[0].Expand(gstate.getFrameBufAddress() & mirrorMask, bpp, gstate.FrameBufStride(), scissorTL, scissorBR);
if (state.pixelID.depthWrite)
Expand Down Expand Up @@ -538,7 +538,7 @@ bool BinManager::HasPendingWrite(uint32_t start, uint32_t stride, uint32_t w, ui
if (!Memory::IsVRAMAddress(start))
return false;
// Ignore mirrors for overlap detection.
start &= 0x0FFFFFFF & ~0x00600000;
start &= 0x041FFFFF;

uint32_t size = stride * (h - 1) + w;
for (const auto &range : pendingWrites_) {
Expand Down Expand Up @@ -569,7 +569,7 @@ bool BinManager::HasPendingWrite(uint32_t start, uint32_t stride, uint32_t w, ui
bool BinManager::HasPendingRead(uint32_t start, uint32_t stride, uint32_t w, uint32_t h) {
if (Memory::IsVRAMAddress(start)) {
// Ignore VRAM mirrors.
start &= 0x0FFFFFFF & ~0x00600000;
start &= 0x041FFFFF;
} else {
// Ignore only regular RAM mirrors.
start &= 0x3FFFFFFF;
Expand Down

0 comments on commit 58a4376

Please sign in to comment.