From 0ed2c0335049522ed64587dcb5db0875ccea8ecd Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Mon, 11 Jun 2018 14:54:42 -0700 Subject: [PATCH 1/4] GE Debugger: Prevent hang on shutdown. Since we're blocking the Emu thread, we can't use a hook from the Emu thread to wake up. The change to lifecycle callbacks caused this. --- Core/Core.cpp | 16 ++++++++++++---- Core/Core.h | 5 +++++ GPU/Debugger/Stepping.cpp | 10 ++++------ GPU/Debugger/Stepping.h | 2 +- Windows/GEDebugger/GEDebugger.cpp | 2 +- 5 files changed, 23 insertions(+), 12 deletions(-) diff --git a/Core/Core.cpp b/Core/Core.cpp index 5e32d29af4bf..bb02a1c08d71 100644 --- a/Core/Core.cpp +++ b/Core/Core.cpp @@ -54,7 +54,8 @@ static std::condition_variable m_InactiveCond; static std::mutex m_hInactiveMutex; static bool singleStepPending = false; static int steppingCounter = 0; -static std::set shutdownFuncs; +static std::set lifecycleFuncs; +static std::set stopFuncs; static bool windowHidden = false; static double lastActivity = 0.0; static double lastKeepAwake = 0.0; @@ -76,17 +77,24 @@ void Core_NotifyActivity() { } void Core_ListenLifecycle(CoreLifecycleFunc func) { - shutdownFuncs.insert(func); + lifecycleFuncs.insert(func); } void Core_NotifyLifecycle(CoreLifecycle stage) { - for (auto it = shutdownFuncs.begin(); it != shutdownFuncs.end(); ++it) { - (*it)(stage); + for (auto func : lifecycleFuncs) { + func(stage); } } +void Core_ListenStopRequest(CoreStopRequestFunc func) { + stopFuncs.insert(func); +} + void Core_Stop() { Core_UpdateState(CORE_POWERDOWN); + for (auto func : stopFuncs) { + func(); + } } bool Core_IsStepping() { diff --git a/Core/Core.h b/Core/Core.h index 0b87f2b393e2..69749f0d7032 100644 --- a/Core/Core.h +++ b/Core/Core.h @@ -51,10 +51,15 @@ enum class CoreLifecycle { MEMORY_REINITED, }; +// Callback is called on the Emu thread. typedef void (* CoreLifecycleFunc)(CoreLifecycle stage); void Core_ListenLifecycle(CoreLifecycleFunc func); void Core_NotifyLifecycle(CoreLifecycle stage); +// Callback is executed on requesting thread. +typedef void (* CoreStopRequestFunc)(); +void Core_ListenStopRequest(CoreStopRequestFunc callback); + bool Core_IsStepping(); bool Core_IsActive(); diff --git a/GPU/Debugger/Stepping.cpp b/GPU/Debugger/Stepping.cpp index ea93778cff42..c765d15b7688 100644 --- a/GPU/Debugger/Stepping.cpp +++ b/GPU/Debugger/Stepping.cpp @@ -197,12 +197,10 @@ void ResumeFromStepping() { SetPauseAction(PAUSE_CONTINUE, false); } -void ForceUnpause(CoreLifecycle stage) { - if (stage == CoreLifecycle::STOPPING) { - SetPauseAction(PAUSE_CONTINUE, false); - actionComplete = true; - actionWait.notify_all(); - } +void ForceUnpause() { + SetPauseAction(PAUSE_CONTINUE, false); + actionComplete = true; + actionWait.notify_all(); } } // namespace diff --git a/GPU/Debugger/Stepping.h b/GPU/Debugger/Stepping.h index 3e9b5086ccbc..d99e8af9040b 100644 --- a/GPU/Debugger/Stepping.h +++ b/GPU/Debugger/Stepping.h @@ -38,5 +38,5 @@ namespace GPUStepping { bool GPU_SetCmdValue(u32 op); void ResumeFromStepping(); - void ForceUnpause(CoreLifecycle stage); + void ForceUnpause(); }; diff --git a/Windows/GEDebugger/GEDebugger.cpp b/Windows/GEDebugger/GEDebugger.cpp index e77238ba3b94..ad68557f6489 100644 --- a/Windows/GEDebugger/GEDebugger.cpp +++ b/Windows/GEDebugger/GEDebugger.cpp @@ -67,7 +67,7 @@ void CGEDebugger::Init() { CGEDebugger::CGEDebugger(HINSTANCE _hInstance, HWND _hParent) : Dialog((LPCSTR)IDD_GEDEBUGGER, _hInstance, _hParent) { GPUBreakpoints::Init(); - Core_ListenLifecycle(ForceUnpause); + Core_ListenStopRequest(ForceUnpause); // minimum size = a little more than the default RECT windowRect; From ccef997a7a1d5126f592d091cc7381f6c0041e09 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Mon, 11 Jun 2018 14:56:25 -0700 Subject: [PATCH 2/4] GE Debugger: Fix asserts when reading debug tex. This is unfortunate, but right now BuildTexture() applies the sampling settings which happen within a render pass. So we must have a render pass. --- GPU/GLES/TextureCacheGLES.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/GPU/GLES/TextureCacheGLES.cpp b/GPU/GLES/TextureCacheGLES.cpp index 8f6368c66bb6..e390ed322a4a 100644 --- a/GPU/GLES/TextureCacheGLES.cpp +++ b/GPU/GLES/TextureCacheGLES.cpp @@ -841,6 +841,8 @@ bool TextureCacheGLES::GetCurrentTextureDebug(GPUDebugBuffer &buffer, int level) // Apply texture may need to rebuild the texture if we're about to render, or bind a framebuffer. TexCacheEntry *entry = nextTexture_; + // We might need a render pass to set the sampling params, unfortunately. Otherwise BuildTexture may crash. + framebufferManagerGL_->RebindFramebuffer(); ApplyTexture(); // TODO: Centralize? From f7443aaa156c95d9db673892fc7dcb68b188ca5f Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Mon, 11 Jun 2018 15:06:40 -0700 Subject: [PATCH 3/4] GE Debugger: Use a class for dump execution. --- GPU/Debugger/Record.cpp | 97 +++++++++++++++++++++++++---------------- 1 file changed, 59 insertions(+), 38 deletions(-) diff --git a/GPU/Debugger/Record.cpp b/GPU/Debugger/Record.cpp index ba986679540c..1d42d937b1f3 100644 --- a/GPU/Debugger/Record.cpp +++ b/GPU/Debugger/Record.cpp @@ -86,12 +86,35 @@ static std::vector lastRegisters; static std::vector lastTextures; // TODO: Maybe move execute to another file? -static u32 execMemcpyDest; -static u32 execListBuf; -static u32 execListPos; -static u32 execListID; -static const int LIST_BUF_SIZE = 256 * 1024; -static std::vector execListQueue; +class DumpExecute { +public: + ~DumpExecute(); + + bool Run(); + +private: + bool SubmitCmds(void *p, u32 sz); + void SubmitListEnd(); + + void Init(u32 ptr, u32 sz); + void Registers(u32 ptr, u32 sz); + void Vertices(u32 ptr, u32 sz); + void Indices(u32 ptr, u32 sz); + void Clut(u32 ptr, u32 sz); + void TransferSrc(u32 ptr, u32 sz); + void Memset(u32 ptr, u32 sz); + void MemcpyDest(u32 ptr, u32 sz); + void Memcpy(u32 ptr, u32 sz); + void Texture(int level, u32 ptr, u32 sz); + void Display(u32 ptr, u32 sz); + + u32 execMemcpyDest = 0; + u32 execListBuf = 0; + u32 execListPos = 0; + u32 execListID = 0; + const int LIST_BUF_SIZE = 256 * 1024; + std::vector execListQueue; +}; // This class maps pushbuffer (dump data) sections to PSP memory. // Dumps can be larger than available PSP memory, because they include generated data too. @@ -731,7 +754,7 @@ void NotifyFrame() { } } -static bool ExecuteSubmitCmds(void *p, u32 sz) { +bool DumpExecute::SubmitCmds(void *p, u32 sz) { if (execListBuf == 0) { u32 allocSize = LIST_BUF_SIZE; execListBuf = userMemory.Alloc(allocSize, "List buf"); @@ -781,7 +804,7 @@ static bool ExecuteSubmitCmds(void *p, u32 sz) { return true; } -static void ExecuteSubmitListEnd() { +void DumpExecute::SubmitListEnd() { if (execListPos == 0) { return; } @@ -800,16 +823,16 @@ static void ExecuteSubmitListEnd() { CoreTiming::ForceCheck(); } -static void ExecuteInit(u32 ptr, u32 sz) { +void DumpExecute::Init(u32 ptr, u32 sz) { gstate.Restore((u32_le *)(pushbuf.data() + ptr)); gpu->ReapplyGfxState(); } -static void ExecuteRegisters(u32 ptr, u32 sz) { - ExecuteSubmitCmds(pushbuf.data() + ptr, sz); +void DumpExecute::Registers(u32 ptr, u32 sz) { + SubmitCmds(pushbuf.data() + ptr, sz); } -static void ExecuteVertices(u32 ptr, u32 sz) { +void DumpExecute::Vertices(u32 ptr, u32 sz) { u32 psp = execMapping.Map(ptr, sz); if (psp == 0) { ERROR_LOG(SYSTEM, "Unable to allocate for vertices"); @@ -820,7 +843,7 @@ static void ExecuteVertices(u32 ptr, u32 sz) { execListQueue.push_back((GE_CMD_VADDR << 24) | (psp & 0x00FFFFFF)); } -static void ExecuteIndices(u32 ptr, u32 sz) { +void DumpExecute::Indices(u32 ptr, u32 sz) { u32 psp = execMapping.Map(ptr, sz); if (psp == 0) { ERROR_LOG(SYSTEM, "Unable to allocate for indices"); @@ -831,7 +854,7 @@ static void ExecuteIndices(u32 ptr, u32 sz) { execListQueue.push_back((GE_CMD_IADDR << 24) | (psp & 0x00FFFFFF)); } -static void ExecuteClut(u32 ptr, u32 sz) { +void DumpExecute::Clut(u32 ptr, u32 sz) { u32 psp = execMapping.Map(ptr, sz); if (psp == 0) { ERROR_LOG(SYSTEM, "Unable to allocate for clut"); @@ -842,7 +865,7 @@ static void ExecuteClut(u32 ptr, u32 sz) { execListQueue.push_back((GE_CMD_CLUTADDR << 24) | (psp & 0x00FFFFFF)); } -static void ExecuteTransferSrc(u32 ptr, u32 sz) { +void DumpExecute::TransferSrc(u32 ptr, u32 sz) { u32 psp = execMapping.Map(ptr, sz); if (psp == 0) { ERROR_LOG(SYSTEM, "Unable to allocate for transfer"); @@ -853,7 +876,7 @@ static void ExecuteTransferSrc(u32 ptr, u32 sz) { execListQueue.push_back(((GE_CMD_TRANSFERSRC) << 24) | (psp & 0x00FFFFFF)); } -static void ExecuteMemset(u32 ptr, u32 sz) { +void DumpExecute::Memset(u32 ptr, u32 sz) { struct MemsetCommand { u32 dest; int value; @@ -867,18 +890,18 @@ static void ExecuteMemset(u32 ptr, u32 sz) { } } -static void ExecuteMemcpyDest(u32 ptr, u32 sz) { +void DumpExecute::MemcpyDest(u32 ptr, u32 sz) { execMemcpyDest = *(const u32 *)(pushbuf.data() + ptr); } -static void ExecuteMemcpy(u32 ptr, u32 sz) { +void DumpExecute::Memcpy(u32 ptr, u32 sz) { if (Memory::IsVRAMAddress(execMemcpyDest)) { Memory::MemcpyUnchecked(execMemcpyDest, pushbuf.data() + ptr, sz); gpu->PerformMemoryUpload(execMemcpyDest, sz); } } -static void ExecuteTexture(int level, u32 ptr, u32 sz) { +void DumpExecute::Texture(int level, u32 ptr, u32 sz) { u32 psp = execMapping.Map(ptr, sz); if (psp == 0) { ERROR_LOG(SYSTEM, "Unable to allocate for texture"); @@ -889,7 +912,7 @@ static void ExecuteTexture(int level, u32 ptr, u32 sz) { execListQueue.push_back(((GE_CMD_TEXADDR0 + level) << 24) | (psp & 0x00FFFFFF)); } -static void ExecuteDisplay(u32 ptr, u32 sz) { +void DumpExecute::Display(u32 ptr, u32 sz) { struct DisplayBufData { PSPPointer topaddr; u32 linesize, pixelFormat; @@ -901,7 +924,7 @@ static void ExecuteDisplay(u32 ptr, u32 sz) { __DisplaySetFramebuf(disp->topaddr.ptr, disp->linesize, disp->pixelFormat, 0); } -static void ExecuteFree() { +DumpExecute::~DumpExecute() { execMemcpyDest = 0; if (execListBuf) { userMemory.Free(execListBuf); @@ -914,43 +937,43 @@ static void ExecuteFree() { pushbuf.clear(); } -static bool ExecuteCommands() { +bool DumpExecute::Run() { for (const Command &cmd : commands) { switch (cmd.type) { case CommandType::INIT: - ExecuteInit(cmd.ptr, cmd.sz); + Init(cmd.ptr, cmd.sz); break; case CommandType::REGISTERS: - ExecuteRegisters(cmd.ptr, cmd.sz); + Registers(cmd.ptr, cmd.sz); break; case CommandType::VERTICES: - ExecuteVertices(cmd.ptr, cmd.sz); + Vertices(cmd.ptr, cmd.sz); break; case CommandType::INDICES: - ExecuteIndices(cmd.ptr, cmd.sz); + Indices(cmd.ptr, cmd.sz); break; case CommandType::CLUT: - ExecuteClut(cmd.ptr, cmd.sz); + Clut(cmd.ptr, cmd.sz); break; case CommandType::TRANSFERSRC: - ExecuteTransferSrc(cmd.ptr, cmd.sz); + TransferSrc(cmd.ptr, cmd.sz); break; case CommandType::MEMSET: - ExecuteMemset(cmd.ptr, cmd.sz); + Memset(cmd.ptr, cmd.sz); break; case CommandType::MEMCPYDEST: - ExecuteMemcpyDest(cmd.ptr, cmd.sz); + MemcpyDest(cmd.ptr, cmd.sz); break; case CommandType::MEMCPYDATA: - ExecuteMemcpy(cmd.ptr, cmd.sz); + Memcpy(cmd.ptr, cmd.sz); break; case CommandType::TEXTURE0: @@ -961,11 +984,11 @@ static bool ExecuteCommands() { case CommandType::TEXTURE5: case CommandType::TEXTURE6: case CommandType::TEXTURE7: - ExecuteTexture((int)cmd.type - (int)CommandType::TEXTURE0, cmd.ptr, cmd.sz); + Texture((int)cmd.type - (int)CommandType::TEXTURE0, cmd.ptr, cmd.sz); break; case CommandType::DISPLAY: - ExecuteDisplay(cmd.ptr, cmd.sz); + Display(cmd.ptr, cmd.sz); break; default: @@ -974,7 +997,7 @@ static bool ExecuteCommands() { } } - ExecuteSubmitListEnd(); + SubmitListEnd(); return true; } @@ -1028,13 +1051,11 @@ bool RunMountedReplay(const std::string &filename) { if (truncated) { ERROR_LOG(SYSTEM, "Truncated GE dump"); - ExecuteFree(); return false; } - bool success = ExecuteCommands(); - ExecuteFree(); - return success; + DumpExecute executor; + return executor.Run(); } }; From 582bc2d60ae80e6e8523776d99751890d363ed45 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Mon, 11 Jun 2018 15:08:27 -0700 Subject: [PATCH 4/4] GE Debugger: Stall less liberally on GE dump exec. This makes it easier to compare performance and flushing bugs. --- GPU/Debugger/Record.cpp | 101 +++++++++++++++++++++++++++++----------- 1 file changed, 73 insertions(+), 28 deletions(-) diff --git a/GPU/Debugger/Record.cpp b/GPU/Debugger/Record.cpp index 1d42d937b1f3..fb130ef34a9b 100644 --- a/GPU/Debugger/Record.cpp +++ b/GPU/Debugger/Record.cpp @@ -17,6 +17,7 @@ #include #include +#include #include #include #include "base/stringutil.h" @@ -93,6 +94,7 @@ class DumpExecute { bool Run(); private: + void SyncStall(); bool SubmitCmds(void *p, u32 sz); void SubmitListEnd(); @@ -114,6 +116,7 @@ class DumpExecute { u32 execListID = 0; const int LIST_BUF_SIZE = 256 * 1024; std::vector execListQueue; + u16 lastBufw_[8]{}; }; // This class maps pushbuffer (dump data) sections to PSP memory. @@ -125,7 +128,7 @@ class DumpExecute { class BufMapping { public: // Returns a pointer to contiguous memory for this access, or else 0 (failure). - u32 Map(u32 bufpos, u32 sz); + u32 Map(u32 bufpos, u32 sz, const std::function &flush); // Clear and reset allocations made. void Reset() { @@ -140,8 +143,8 @@ class BufMapping { } protected: - u32 MapSlab(u32 bufpos); - u32 MapExtra(u32 bufpos, u32 sz); + u32 MapSlab(u32 bufpos, const std::function &flush); + u32 MapExtra(u32 bufpos, u32 sz, const std::function &flush); enum { // These numbers kept low because we only have 24 MB of user memory to map into. @@ -212,20 +215,20 @@ class BufMapping { static BufMapping execMapping; -u32 BufMapping::Map(u32 bufpos, u32 sz) { +u32 BufMapping::Map(u32 bufpos, u32 sz, const std::function &flush) { int slab1 = bufpos / SLAB_SIZE; int slab2 = (bufpos + sz - 1) / SLAB_SIZE; if (slab1 == slab2) { // Doesn't straddle, so we can just map to a slab. - return MapSlab(bufpos); + return MapSlab(bufpos, flush); } else { // We need contiguous, so we'll just allocate separately. - return MapExtra(bufpos, sz); + return MapExtra(bufpos, sz, flush); } } -u32 BufMapping::MapSlab(u32 bufpos) { +u32 BufMapping::MapSlab(u32 bufpos, const std::function &flush) { u32 slab_pos = (bufpos / SLAB_SIZE) * SLAB_SIZE; int best = 0; @@ -239,6 +242,9 @@ u32 BufMapping::MapSlab(u32 bufpos) { } } + // Stall before mapping a new slab. + flush(); + // Okay, we need to allocate. if (!slabs_[best].Setup(slab_pos)) { return 0; @@ -246,7 +252,7 @@ u32 BufMapping::MapSlab(u32 bufpos) { return slabs_[best].Ptr(bufpos); } -u32 BufMapping::MapExtra(u32 bufpos, u32 sz) { +u32 BufMapping::MapExtra(u32 bufpos, u32 sz, const std::function &flush) { for (int i = 0; i < EXTRA_COUNT; ++i) { // Might be likely to reuse larger buffers straddling slabs. if (extra_[i].Matches(bufpos, sz)) { @@ -254,6 +260,9 @@ u32 BufMapping::MapExtra(u32 bufpos, u32 sz) { } } + // Stall first, so we don't stomp existing RAM. + flush(); + int i = extraOffset_; extraOffset_ = (extraOffset_ + 1) % EXTRA_COUNT; @@ -754,6 +763,17 @@ void NotifyFrame() { } } +void DumpExecute::SyncStall() { + gpu->UpdateStall(execListID, execListPos); + s64 listTicks = gpu->GetListTicks(execListID); + if (listTicks != -1) { + currentMIPS->downcount -= listTicks - CoreTiming::GetTicks(); + } + + // Make sure downcount doesn't overflow. + CoreTiming::ForceCheck(); +} + bool DumpExecute::SubmitCmds(void *p, u32 sz) { if (execListBuf == 0) { u32 allocSize = LIST_BUF_SIZE; @@ -784,22 +804,42 @@ bool DumpExecute::SubmitCmds(void *p, u32 sz) { Memory::Write_U32((GE_CMD_JUMP << 24) | (execListBuf & 0x00FFFFFF), execListPos + 4); execListPos = execListBuf; + + // Don't continue until we've stalled. + SyncStall(); } Memory::MemcpyUnchecked(execListPos, execListQueue.data(), pendingSize); execListPos += pendingSize; + u32 writePos = execListPos; Memory::MemcpyUnchecked(execListPos, p, sz); execListPos += sz; - execListQueue.clear(); - gpu->UpdateStall(execListID, execListPos); - s64 listTicks = gpu->GetListTicks(execListID); - if (listTicks != -1) { - currentMIPS->downcount -= listTicks - CoreTiming::GetTicks(); + // TODO: Unfortunate. Maybe Texture commands should contain the bufw instead. + // The goal here is to realistically combine prims in dumps. Stalling for the bufw flushes. + u32_le *ops = (u32_le *)Memory::GetPointer(writePos); + for (u32 i = 0; i < sz / 4; ++i) { + u32 cmd = ops[i] >> 24; + if (cmd >= GE_CMD_TEXBUFWIDTH0 && cmd <= GE_CMD_TEXBUFWIDTH7) { + int level = cmd - GE_CMD_TEXBUFWIDTH0; + u16 bufw = ops[i] & 0xFFFF; + + // NOP the address part of the command to avoid a flush too. + if (bufw == lastBufw_[level]) + ops[i] = GE_CMD_NOP << 24; + else + ops[i] = (gstate.texbufwidth[level] & 0xFFFF0000) | bufw; + lastBufw_[level] = bufw; + } + + // Since we're here anyway, also NOP out texture addresses. + // This makes Step Tex not hit phantom textures. + if (cmd >= GE_CMD_TEXADDR0 && cmd <= GE_CMD_TEXADDR7) { + ops[i] = GE_CMD_NOP << 24; + } } - // Make sure downcount doesn't overflow. - CoreTiming::ForceCheck(); + execListQueue.clear(); return true; } @@ -814,13 +854,8 @@ void DumpExecute::SubmitListEnd() { Memory::Write_U32(GE_CMD_END << 24, execListPos + 4); execListPos += 8; - gpu->UpdateStall(execListID, execListPos); - currentMIPS->downcount -= gpu->GetListTicks(execListID) - CoreTiming::GetTicks(); - + SyncStall(); gpu->ListSync(execListID, 0); - - // Make sure downcount doesn't overflow. - CoreTiming::ForceCheck(); } void DumpExecute::Init(u32 ptr, u32 sz) { @@ -833,7 +868,7 @@ void DumpExecute::Registers(u32 ptr, u32 sz) { } void DumpExecute::Vertices(u32 ptr, u32 sz) { - u32 psp = execMapping.Map(ptr, sz); + u32 psp = execMapping.Map(ptr, sz, std::bind(&DumpExecute::SyncStall, this)); if (psp == 0) { ERROR_LOG(SYSTEM, "Unable to allocate for vertices"); return; @@ -844,7 +879,7 @@ void DumpExecute::Vertices(u32 ptr, u32 sz) { } void DumpExecute::Indices(u32 ptr, u32 sz) { - u32 psp = execMapping.Map(ptr, sz); + u32 psp = execMapping.Map(ptr, sz, std::bind(&DumpExecute::SyncStall, this)); if (psp == 0) { ERROR_LOG(SYSTEM, "Unable to allocate for indices"); return; @@ -855,7 +890,7 @@ void DumpExecute::Indices(u32 ptr, u32 sz) { } void DumpExecute::Clut(u32 ptr, u32 sz) { - u32 psp = execMapping.Map(ptr, sz); + u32 psp = execMapping.Map(ptr, sz, std::bind(&DumpExecute::SyncStall, this)); if (psp == 0) { ERROR_LOG(SYSTEM, "Unable to allocate for clut"); return; @@ -866,12 +901,15 @@ void DumpExecute::Clut(u32 ptr, u32 sz) { } void DumpExecute::TransferSrc(u32 ptr, u32 sz) { - u32 psp = execMapping.Map(ptr, sz); + u32 psp = execMapping.Map(ptr, sz, std::bind(&DumpExecute::SyncStall, this)); if (psp == 0) { ERROR_LOG(SYSTEM, "Unable to allocate for transfer"); return; } + // Need to sync in order to access gstate.transfersrcw. + SyncStall(); + execListQueue.push_back((gstate.transfersrcw & 0xFF00FFFF) | ((psp >> 8) & 0x00FF0000)); execListQueue.push_back(((GE_CMD_TRANSFERSRC) << 24) | (psp & 0x00FFFFFF)); } @@ -886,6 +924,7 @@ void DumpExecute::Memset(u32 ptr, u32 sz) { const MemsetCommand *data = (const MemsetCommand *)(pushbuf.data() + ptr); if (Memory::IsVRAMAddress(data->dest)) { + SyncStall(); gpu->PerformMemorySet(data->dest, (u8)data->value, data->sz); } } @@ -896,20 +935,23 @@ void DumpExecute::MemcpyDest(u32 ptr, u32 sz) { void DumpExecute::Memcpy(u32 ptr, u32 sz) { if (Memory::IsVRAMAddress(execMemcpyDest)) { + SyncStall(); Memory::MemcpyUnchecked(execMemcpyDest, pushbuf.data() + ptr, sz); gpu->PerformMemoryUpload(execMemcpyDest, sz); } } void DumpExecute::Texture(int level, u32 ptr, u32 sz) { - u32 psp = execMapping.Map(ptr, sz); + u32 psp = execMapping.Map(ptr, sz, std::bind(&DumpExecute::SyncStall, this)); if (psp == 0) { ERROR_LOG(SYSTEM, "Unable to allocate for texture"); return; } - execListQueue.push_back((gstate.texbufwidth[level] & 0xFF00FFFF) | ((psp >> 8) & 0x00FF0000)); - execListQueue.push_back(((GE_CMD_TEXADDR0 + level) << 24) | (psp & 0x00FFFFFF)); + u32 bufwCmd = GE_CMD_TEXBUFWIDTH0 + level; + u32 addrCmd = GE_CMD_TEXADDR0 + level; + execListQueue.push_back((bufwCmd << 24) | ((psp >> 8) & 0x00FF0000) | lastBufw_[level]); + execListQueue.push_back((addrCmd << 24) | (psp & 0x00FFFFFF)); } void DumpExecute::Display(u32 ptr, u32 sz) { @@ -920,6 +962,9 @@ void DumpExecute::Display(u32 ptr, u32 sz) { DisplayBufData *disp = (DisplayBufData *)(pushbuf.data() + ptr); + // Sync up drawing. + SyncStall(); + __DisplaySetFramebuf(disp->topaddr.ptr, disp->linesize, disp->pixelFormat, 1); __DisplaySetFramebuf(disp->topaddr.ptr, disp->linesize, disp->pixelFormat, 0); }