From b4e1f6d902c5ed19f5bab3804e6be4e6b30671e0 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Tue, 6 Sep 2022 18:54:52 -0700 Subject: [PATCH 1/8] GPU: Fix crash on overlap copy w/o cur target. --- GPU/Common/FramebufferManagerCommon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index d907f1ac2f10..3cbe6543f090 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -810,7 +810,7 @@ void FramebufferManagerCommon::CopyToColorFromOverlappingFramebuffers(VirtualFra } } - if (dst != currentRenderVfb_ && tookActions) { + if (currentRenderVfb_ && dst != currentRenderVfb_ && tookActions) { // Will probably just change the name of the current renderpass, since one was started by the reinterpret itself. draw_->BindFramebufferAsRenderTarget(currentRenderVfb_->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, "After Reinterpret"); } From d249674858ab20ebb38dc12ff1d4aa13eb83b285 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Mon, 5 Sep 2022 22:38:21 -0700 Subject: [PATCH 2/8] softgpu: Fix immediate rendering crash. --- GPU/GPUCommon.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 534a813459b6..fa59e485cc55 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -2410,7 +2410,8 @@ void GPUCommon::Execute_ImmVertexAlphaPrim(u32 op, u32 diff) { void GPUCommon::FlushImm() { SetDrawType(DRAW_PRIM, immPrim_); - framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); + if (framebufferManager_) + framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { // No idea how many cycles to skip, heh. return; From 531c7e452d82a94050110457932c9d496e948e20 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Mon, 5 Sep 2022 22:45:01 -0700 Subject: [PATCH 3/8] GE Debugger: Count imm prims as prims. --- GPU/Debugger/Debugger.cpp | 3 ++- Windows/GEDebugger/TabState.cpp | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/GPU/Debugger/Debugger.cpp b/GPU/Debugger/Debugger.cpp index 439c763e8e74..fb88fdca2fba 100644 --- a/GPU/Debugger/Debugger.cpp +++ b/GPU/Debugger/Debugger.cpp @@ -73,6 +73,7 @@ void SetBreakNext(BreakNext next) { GPUBreakpoints::AddCmdBreakpoint(GE_CMD_PRIM, true); GPUBreakpoints::AddCmdBreakpoint(GE_CMD_BEZIER, true); GPUBreakpoints::AddCmdBreakpoint(GE_CMD_SPLINE, true); + GPUBreakpoints::AddCmdBreakpoint(GE_CMD_VAP, true); } else if (next == BreakNext::CURVE) { GPUBreakpoints::AddCmdBreakpoint(GE_CMD_BEZIER, true); GPUBreakpoints::AddCmdBreakpoint(GE_CMD_SPLINE, true); @@ -111,7 +112,7 @@ bool NotifyCommand(u32 pc) { } bool process = true; - if (cmd == GE_CMD_PRIM || cmd == GE_CMD_BEZIER || cmd == GE_CMD_SPLINE) { + if (cmd == GE_CMD_PRIM || cmd == GE_CMD_BEZIER || cmd == GE_CMD_SPLINE || cmd == GE_CMD_VAP) { primsThisFrame++; if (!restrictPrimRanges.empty()) { diff --git a/Windows/GEDebugger/TabState.cpp b/Windows/GEDebugger/TabState.cpp index 5ad864ed3f59..da1baa10bf62 100644 --- a/Windows/GEDebugger/TabState.cpp +++ b/Windows/GEDebugger/TabState.cpp @@ -266,6 +266,14 @@ static const TabStateRow stateSettingsRows[] = { { L"Dither 1", GE_CMD_DITH1, CMD_FMT_HEX, GE_CMD_DITHERENABLE }, { L"Dither 2", GE_CMD_DITH2, CMD_FMT_HEX, GE_CMD_DITHERENABLE }, { L"Dither 3", GE_CMD_DITH3, CMD_FMT_HEX, GE_CMD_DITHERENABLE }, + { L"Imm vertex XY", GE_CMD_VSCX, CMD_FMT_F16_XY, 0, GE_CMD_VSCY }, + { L"Imm vertex Z", GE_CMD_VSCZ, CMD_FMT_HEX }, + { L"Imm vertex tex STQ", GE_CMD_VTCS, CMD_FMT_XYZ, 0, GE_CMD_VTCT, GE_CMD_VTCQ }, + { L"Imm vertex color0", GE_CMD_VCV, CMD_FMT_HEX }, + { L"Imm vertex color1", GE_CMD_VSCV, CMD_FMT_HEX }, + { L"Imm vertex fog", GE_CMD_VFC, CMD_FMT_HEX }, + // TODO: Format? + { L"Imm vertex prim", GE_CMD_VAP, CMD_FMT_HEX }, }; // TODO: Commands not present in the above lists (some because they don't have meaningful values...): From ceb2af369c3c90435bf58e2d8dfe77d58589c817 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Mon, 5 Sep 2022 22:45:34 -0700 Subject: [PATCH 4/8] GPU: Handle immediate prims more accurately. This allows for lines, points, textures, and similar things. Also corrects offset handling. Still some flags on VAP that seemingly don't work, and this doesn't consider the texture flag on it. --- GPU/GPUCommon.cpp | 44 ++++++++++++++++++++++++++++++---------- GPU/GPUCommon.h | 4 ++-- GPU/Software/SoftGpu.cpp | 1 + 3 files changed, 36 insertions(+), 13 deletions(-) diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index fa59e485cc55..a4e765ea036c 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -1530,6 +1530,7 @@ void GPUCommon::Execute_End(u32 op, u32 diff) { break; default: + FlushImm(); currentList->subIntrToken = prev & 0xFFFF; UpdateState(GPUSTATE_DONE); // Since we marked done, we have to restore the context now before the next list runs. @@ -1651,6 +1652,7 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) { u32 count = data & 0xFFFF; if (count == 0) return; + FlushImm(); // Upper bits are ignored. GEPrimitiveType prim = static_cast((data >> 16) & 7); @@ -2379,13 +2381,25 @@ void GPUCommon::Execute_ImmVertexAlphaPrim(u32 op, u32 diff) { return; } + int prim = (op >> 8) & 0x7; + if (prim != GE_PRIM_KEEP_PREVIOUS) { + // Flush before changing the prim type. Only continue can be used to continue a prim. + FlushImm(); + } + TransformedVertex &v = immBuffer_[immCount_++]; - // Formula deduced from ThrillVille's clear. - int offsetX = gstate.getOffsetX16(); - int offsetY = gstate.getOffsetY16(); - v.x = ((gstate.imm_vscx & 0xFFFFFF) - offsetX) / 16.0f; - v.y = ((gstate.imm_vscy & 0xFFFFFF) - offsetY) / 16.0f; + // ThrillVille does a clear with this, additional parameters found via tests. + // The current vtype affects how the coordinate is processed. + if (gstate.isModeThrough()) { + v.x = ((int)(gstate.imm_vscx & 0xFFFF) - 0x8000) / 16.0f; + v.y = ((int)(gstate.imm_vscy & 0xFFFF) - 0x8000) / 16.0f; + } else { + int offsetX = gstate.getOffsetX16(); + int offsetY = gstate.getOffsetY16(); + v.x = ((int)(gstate.imm_vscx & 0xFFFF) - offsetX) / 16.0f; + v.y = ((int)(gstate.imm_vscy & 0xFFFF) - offsetY) / 16.0f; + } v.z = gstate.imm_vscz & 0xFFFF; v.pos_w = 1.0f; v.u = getFloat24(gstate.imm_vtcs); @@ -2394,26 +2408,29 @@ void GPUCommon::Execute_ImmVertexAlphaPrim(u32 op, u32 diff) { v.color0_32 = (gstate.imm_cv & 0xFFFFFF) | (gstate.imm_ap << 24); v.fog = 0.0f; // we have no information about the scale here v.color1_32 = gstate.imm_scv & 0xFFFFFF; - int prim = (op >> 8) & 0x7; if (prim != GE_PRIM_KEEP_PREVIOUS) { immPrim_ = (GEPrimitiveType)prim; - } else if (prim == GE_PRIM_KEEP_PREVIOUS && immCount_ == 2) { + } else if (prim == GE_PRIM_KEEP_PREVIOUS && immPrim_ != GE_PRIM_INVALID) { + static constexpr int flushPrimCount[] = { 1, 2, 0, 3, 0, 0, 2, 0 }; // Instead of finding a proper point to flush, we just emit a full rectangle every time one // is finished. - FlushImm(); - // Need to reset immCount_ here. If we do it in FlushImm it could get skipped by gstate_c.skipDrawReason. - immCount_ = 0; + if (immCount_ == flushPrimCount[immPrim_ & 7]) + FlushImm(); } else { ERROR_LOG_REPORT_ONCE(imm_draw_prim, G3D, "Immediate draw: Unexpected primitive %d at count %d", prim, immCount_); } } void GPUCommon::FlushImm() { + if (immCount_ == 0 || immPrim_ == GE_PRIM_INVALID) + return; + SetDrawType(DRAW_PRIM, immPrim_); if (framebufferManager_) framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { // No idea how many cycles to skip, heh. + immCount_ = 0; return; } UpdateUVScaleOffset(); @@ -2423,23 +2440,28 @@ void GPUCommon::FlushImm() { // through vertices. // Since the only known use is Thrillville and it only uses it to clear, we just use color and pos. struct ImmVertex { + float uv[2]; uint32_t color; float xyz[3]; }; ImmVertex temp[MAX_IMMBUFFER_SIZE]; for (int i = 0; i < immCount_; i++) { + // Since we're sending through, scale back up to w/h. + temp[i].uv[0] = immBuffer_[i].u * gstate.getTextureWidth(0); + temp[i].uv[1] = immBuffer_[i].v * gstate.getTextureHeight(0); temp[i].color = immBuffer_[i].color0_32; temp[i].xyz[0] = immBuffer_[i].pos[0]; temp[i].xyz[1] = immBuffer_[i].pos[1]; temp[i].xyz[2] = immBuffer_[i].pos[2]; } - int vtype = GE_VTYPE_POS_FLOAT | GE_VTYPE_COL_8888 | GE_VTYPE_THROUGH; + int vtype = GE_VTYPE_TC_FLOAT | GE_VTYPE_POS_FLOAT | GE_VTYPE_COL_8888 | GE_VTYPE_THROUGH; int bytesRead; uint32_t vertTypeID = GetVertTypeID(vtype, 0); drawEngineCommon_->DispatchSubmitImm(temp, nullptr, immPrim_, immCount_, vertTypeID, gstate.getCullMode(), &bytesRead); // TOOD: In the future, make a special path for these. // drawEngineCommon_->DispatchSubmitImm(immBuffer_, immCount_); + immCount_ = 0; } void GPUCommon::ExecuteOp(u32 op, u32 diff) { diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h index f389b788e2be..f2c64d95b872 100644 --- a/GPU/GPUCommon.h +++ b/GPU/GPUCommon.h @@ -286,6 +286,7 @@ class GPUCommon : public GPUInterface, public GPUDebugInterface { void UpdatePC(u32 currentPC, u32 newPC); void UpdateState(GPURunState state); void FastLoadBoneMatrix(u32 target); + void FlushImm(); // TODO: Unify this. virtual void FinishDeferred() {} @@ -352,13 +353,12 @@ class GPUCommon : public GPUInterface, public GPUDebugInterface { TransformedVertex immBuffer_[MAX_IMMBUFFER_SIZE]; int immCount_ = 0; - GEPrimitiveType immPrim_; + GEPrimitiveType immPrim_ = GE_PRIM_INVALID; std::string reportingPrimaryInfo_; std::string reportingFullInfo_; private: - void FlushImm(); void CheckDepthUsage(VirtualFramebuffer *vfb); void DoBlockTransfer(u32 skipDrawReason); void DoExecuteCall(u32 target); diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 7b93e52c6f67..a6fc160af38e 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -831,6 +831,7 @@ void SoftGPU::Execute_Prim(u32 op, u32 diff) { GEPrimitiveType prim = static_cast((op >> 16) & 7); if (count == 0) return; + FlushImm(); if (!Memory::IsValidAddress(gstate_c.vertexAddr)) { ERROR_LOG_REPORT(G3D, "Software: Bad vertex address %08x!", gstate_c.vertexAddr); From f27426714306e07f33b9ff588e2e63062fdae222 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Mon, 5 Sep 2022 23:13:16 -0700 Subject: [PATCH 5/8] GPU: Allow usage of texturing in immediate verts. And respect the other flags that I can reproduce working in a test. I can't seem to get the fog to work at all, or the shading mode, or the secondary color. Maybe depends on other flags or bits in other regs... --- GPU/GPUCommon.cpp | 34 +++++++++++++++++++++++++++++++++- GPU/GPUCommon.h | 1 + GPU/Software/TransformUnit.cpp | 4 +++- 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index a4e765ea036c..a47e9edea8a5 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -2410,6 +2410,8 @@ void GPUCommon::Execute_ImmVertexAlphaPrim(u32 op, u32 diff) { v.color1_32 = gstate.imm_scv & 0xFFFFFF; if (prim != GE_PRIM_KEEP_PREVIOUS) { immPrim_ = (GEPrimitiveType)prim; + // Flags seem to only be respected from the first prim. + immFlags_ = op & 0x00FFF800; } else if (prim == GE_PRIM_KEEP_PREVIOUS && immPrim_ != GE_PRIM_INVALID) { static constexpr int flushPrimCount[] = { 1, 2, 0, 3, 0, 0, 2, 0 }; // Instead of finding a proper point to flush, we just emit a full rectangle every time one @@ -2456,12 +2458,42 @@ void GPUCommon::FlushImm() { } int vtype = GE_VTYPE_TC_FLOAT | GE_VTYPE_POS_FLOAT | GE_VTYPE_COL_8888 | GE_VTYPE_THROUGH; + static constexpr int GE_IMM_CULLENABLE = 0x00080000; + static constexpr int GE_IMM_CULLFACE = 0x00100000; + static constexpr int GE_IMM_TEXTURE = 0x00200000; + static constexpr int GE_IMM_DITHER = 0x00800000; + + bool texturing = (immFlags_ & GE_IMM_TEXTURE) != 0; + bool prevTexturing = gstate.isTextureMapEnabled(); + bool cullEnable = (immFlags_ & GE_IMM_CULLENABLE) != 0; + bool prevCullEnable = gstate.isCullEnabled(); + int cullMode = (immFlags_ & GE_IMM_CULLFACE) != 0 ? 1 : 0; + bool prevDither = gstate.isDitherEnabled(); + bool dither = (immFlags_ & GE_IMM_DITHER) != 0; + // Some notes say there's a flag to control this, but it seems to be flat regardless. + GEShadeMode prevShadeMode = gstate.getShadeMode(); + + if (texturing != prevTexturing || cullEnable != prevCullEnable || dither != prevDither || prevShadeMode != GE_SHADE_FLAT) { + DispatchFlush(); + gstate.textureMapEnable = (GE_CMD_TEXTUREMAPENABLE << 24) | (int)texturing; + gstate.cullfaceEnable = (GE_CMD_CULLFACEENABLE << 24) | (int)cullEnable; + gstate.shademodel = (GE_CMD_SHADEMODE << 24) | GE_SHADE_FLAT; + gstate.ditherEnable = (GE_CMD_DITHERENABLE << 24) | (int)dither; + gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE); + } + int bytesRead; uint32_t vertTypeID = GetVertTypeID(vtype, 0); - drawEngineCommon_->DispatchSubmitImm(temp, nullptr, immPrim_, immCount_, vertTypeID, gstate.getCullMode(), &bytesRead); + drawEngineCommon_->DispatchSubmitImm(temp, nullptr, immPrim_, immCount_, vertTypeID, cullMode, &bytesRead); // TOOD: In the future, make a special path for these. // drawEngineCommon_->DispatchSubmitImm(immBuffer_, immCount_); immCount_ = 0; + + gstate.textureMapEnable = (GE_CMD_TEXTUREMAPENABLE << 24) | (int)prevTexturing; + gstate.cullfaceEnable = (GE_CMD_CULLFACEENABLE << 24) | (int)prevCullEnable; + gstate.shademodel = (GE_CMD_SHADEMODE << 24) | prevShadeMode; + gstate.ditherEnable = (GE_CMD_DITHERENABLE << 24) | (int)prevDither; + gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE); } void GPUCommon::ExecuteOp(u32 op, u32 diff) { diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h index f2c64d95b872..3a5674127f28 100644 --- a/GPU/GPUCommon.h +++ b/GPU/GPUCommon.h @@ -354,6 +354,7 @@ class GPUCommon : public GPUInterface, public GPUDebugInterface { TransformedVertex immBuffer_[MAX_IMMBUFFER_SIZE]; int immCount_ = 0; GEPrimitiveType immPrim_ = GE_PRIM_INVALID; + uint32_t immFlags_ = 0; std::string reportingPrimaryInfo_; std::string reportingFullInfo_; diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index db50f48bb672..cbfb754b7763 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -71,10 +71,12 @@ void SoftwareDrawEngine::DispatchSubmitPrim(const void *verts, const void *inds, } void SoftwareDrawEngine::DispatchSubmitImm(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) { - _assert_msg_(cullMode == gstate.getCullMode(), "Mixed cull mode not supported."); + int flipCull = cullMode != gstate.getCullMode() ? 1 : 0; // TODO: For now, just setting all dirty. transformUnit.SetDirty(SoftDirty(-1)); + gstate.cullmode ^= flipCull; transformUnit.SubmitPrimitive(verts, inds, prim, vertexCount, vertTypeID, bytesRead, this); + gstate.cullmode ^= flipCull; // TODO: Should really clear, but the vertex type is faked so things might need resetting... transformUnit.SetDirty(SoftDirty(-1)); } From 7a83f8bab5b40a1f0c67b37e431179527619ba30 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Tue, 6 Sep 2022 19:45:03 -0700 Subject: [PATCH 6/8] softgpu: Use vertType prim override for flags. These parameters are a real shame, was so clean before... --- GPU/Common/VertexDecoderCommon.h | 3 +++ GPU/Software/BinManager.cpp | 4 ++-- GPU/Software/BinManager.h | 2 +- GPU/Software/Clipper.cpp | 6 +++--- GPU/Software/FuncId.cpp | 6 +++--- GPU/Software/FuncId.h | 2 +- GPU/Software/Rasterizer.cpp | 6 +++--- GPU/Software/Rasterizer.h | 2 +- GPU/Software/TransformUnit.cpp | 10 +++++----- 9 files changed, 22 insertions(+), 19 deletions(-) diff --git a/GPU/Common/VertexDecoderCommon.h b/GPU/Common/VertexDecoderCommon.h index 3d6377a86c41..5ec55ebe4cc4 100644 --- a/GPU/Common/VertexDecoderCommon.h +++ b/GPU/Common/VertexDecoderCommon.h @@ -28,6 +28,7 @@ #include "GPU/ge_constants.h" #include "GPU/Common/ShaderCommon.h" #include "GPU/GPUCommon.h" +#include "GPU/GPUState.h" #if PPSSPP_ARCH(ARM) #include "Common/ArmEmitter.h" @@ -299,6 +300,8 @@ class VertexReader { bool hasNormal() const { return decFmt_.nrmfmt != 0; } bool hasUV() const { return decFmt_.uvfmt != 0; } bool isThrough() const { return (vtype_ & GE_VTYPE_THROUGH) != 0; } + bool skinningEnabled() const { return vertTypeIsSkinningEnabled(vtype_); } + int numBoneWeights() const { return vertTypeGetNumBoneWeights(vtype_); } void Goto(int index) { data_ = base_ + index * decFmt_.stride; } diff --git a/GPU/Software/BinManager.cpp b/GPU/Software/BinManager.cpp index 7f8f1ad453ea..faaf7b03a8f4 100644 --- a/GPU/Software/BinManager.cpp +++ b/GPU/Software/BinManager.cpp @@ -161,13 +161,13 @@ BinManager::~BinManager() { } } -void BinManager::UpdateState() { +void BinManager::UpdateState(bool throughMode) { PROFILE_THIS_SCOPE("bin_state"); if (HasDirty(SoftDirty::PIXEL_ALL | SoftDirty::SAMPLER_ALL | SoftDirty::RAST_ALL)) { if (states_.Full()) Flush("states"); stateIndex_ = (int)states_.Push(RasterizerState()); - ComputeRasterizerState(&states_[stateIndex_]); + ComputeRasterizerState(&states_[stateIndex_], throughMode); states_[stateIndex_].samplerID.cached.clut = cluts_[clutIndex_].readable; ClearDirty(SoftDirty::PIXEL_ALL | SoftDirty::SAMPLER_ALL | SoftDirty::RAST_ALL); diff --git a/GPU/Software/BinManager.h b/GPU/Software/BinManager.h index 01b6837d2087..1d628aa8e467 100644 --- a/GPU/Software/BinManager.h +++ b/GPU/Software/BinManager.h @@ -181,7 +181,7 @@ class BinManager { BinManager(); ~BinManager(); - void UpdateState(); + void UpdateState(bool throughMode); void UpdateClut(const void *src); const Rasterizer::RasterizerState &State() { diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp index 3627db0e3d18..166947ef146f 100644 --- a/GPU/Software/Clipper.cpp +++ b/GPU/Software/Clipper.cpp @@ -132,7 +132,7 @@ static inline bool CheckOutsideZ(ClipCoords p, int &pos, int &neg) { } void ProcessRect(const VertexData &v0, const VertexData &v1, BinManager &binner) { - if (!gstate.isModeThrough()) { + if (!binner.State().throughMode) { // We may discard the entire rect based on depth values. int outsidePos = 0, outsideNeg = 0; CheckOutsideZ(v0.clippos, outsidePos, outsideNeg); @@ -181,7 +181,7 @@ void ProcessPoint(const VertexData &v0, BinManager &binner) { } void ProcessLine(const VertexData &v0, const VertexData &v1, BinManager &binner) { - if (gstate.isModeThrough()) { + if (binner.State().throughMode) { // Actually, should clip this one too so we don't need to do bounds checks in the rasterizer. binner.AddLine(v0, v1); return; @@ -221,7 +221,7 @@ void ProcessLine(const VertexData &v0, const VertexData &v1, BinManager &binner) void ProcessTriangle(const VertexData &v0, const VertexData &v1, const VertexData &v2, const VertexData &provoking, BinManager &binner) { int mask = 0; - if (!gstate.isModeThrough()) { + if (!binner.State().throughMode) { mask |= CalcClipMask(v0.clippos); mask |= CalcClipMask(v1.clippos); mask |= CalcClipMask(v2.clippos); diff --git a/GPU/Software/FuncId.cpp b/GPU/Software/FuncId.cpp index 729591212b1e..084c8978296d 100644 --- a/GPU/Software/FuncId.cpp +++ b/GPU/Software/FuncId.cpp @@ -48,11 +48,11 @@ static inline PixelBlendFactor OptimizeAlphaFactor(uint32_t color) { return PixelBlendFactor::FIX; } -void ComputePixelFuncID(PixelFuncID *id) { +void ComputePixelFuncID(PixelFuncID *id, bool throughMode) { id->fullKey = 0; // TODO: Could this be minz > 0x0000 || maxz < 0xFFFF? Maybe unsafe, depending on verts... - id->applyDepthRange = !gstate.isModeThrough(); + id->applyDepthRange = !throughMode; // Dither happens even in clear mode. id->dithering = gstate.isDitherEnabled(); id->fbFormat = gstate.FrameBufFormat(); @@ -162,7 +162,7 @@ void ComputePixelFuncID(PixelFuncID *id) { } id->applyLogicOp = gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_COPY; - id->applyFog = gstate.isFogEnabled() && !gstate.isModeThrough(); + id->applyFog = gstate.isFogEnabled() && !throughMode; } // Cache some values for later convenience. diff --git a/GPU/Software/FuncId.h b/GPU/Software/FuncId.h index f847522e25e4..72207ebd5abc 100644 --- a/GPU/Software/FuncId.h +++ b/GPU/Software/FuncId.h @@ -243,7 +243,7 @@ struct hash { }; -void ComputePixelFuncID(PixelFuncID *id); +void ComputePixelFuncID(PixelFuncID *id, bool throughMode); std::string DescribePixelFuncID(const PixelFuncID &id); void ComputeSamplerID(SamplerID *id); diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 48e51cd9b26b..a387e3b943f4 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -101,8 +101,8 @@ static inline Vec4 Interpolate(const float &c0, const float &c1, const fl return Interpolate(c0, c1, c2, w0.Cast(), w1.Cast(), w2.Cast(), wsum_recip); } -void ComputeRasterizerState(RasterizerState *state) { - ComputePixelFuncID(&state->pixelID); +void ComputeRasterizerState(RasterizerState *state, bool throughMode) { + ComputePixelFuncID(&state->pixelID, throughMode); state->drawPixel = Rasterizer::GetSingleFunc(state->pixelID); state->enableTextures = gstate.isTextureMapEnabled() && !state->pixelID.clearMode; @@ -140,7 +140,7 @@ void ComputeRasterizerState(RasterizerState *state) { } state->shadeGouraud = gstate.getShadeMode() == GE_SHADE_GOURAUD; - state->throughMode = gstate.isModeThrough(); + state->throughMode = throughMode; state->antialiasLines = gstate.isAntiAliasEnabled(); state->screenOffsetX = gstate.getOffsetX16(); diff --git a/GPU/Software/Rasterizer.h b/GPU/Software/Rasterizer.h index 2725bb922e82..36236f550010 100644 --- a/GPU/Software/Rasterizer.h +++ b/GPU/Software/Rasterizer.h @@ -67,7 +67,7 @@ struct RasterizerState { } }; -void ComputeRasterizerState(RasterizerState *state); +void ComputeRasterizerState(RasterizerState *state, bool throughMode); // Draws a triangle if its vertices are specified in counter-clockwise order void DrawTriangle(const VertexData &v0, const VertexData &v1, const VertexData &v2, const BinCoords &range, const RasterizerState &state); diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index cbfb754b7763..2a8eeeffa6ab 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -202,11 +202,11 @@ struct TransformState { }; void ComputeTransformState(TransformState *state, const VertexReader &vreader) { - state->enableTransform = !gstate.isModeThrough(); + state->enableTransform = !vreader.isThrough(); state->enableLighting = gstate.isLightingEnabled(); state->enableFog = gstate.isFogEnabled(); state->readUV = !gstate.isModeClear() && gstate.isTextureMapEnabled() && vreader.hasUV(); - state->readWeights = vertTypeIsSkinningEnabled(gstate.vertType) && state->enableTransform; + state->readWeights = vreader.skinningEnabled() && state->enableTransform; state->negateNormals = gstate.areNormalsReversed(); state->uvGenMode = gstate.getUVGenMode(); @@ -302,7 +302,7 @@ VertexData TransformUnit::ReadVertex(VertexReader &vreader, const TransformState Vec3 tmppos(0.f, 0.f, 0.f); Vec3 tmpnrm(0.f, 0.f, 0.f); - for (int i = 0; i < vertTypeGetNumBoneWeights(gstate.vertType); ++i) { + for (int i = 0; i < vreader.numBoneWeights(); ++i) { Vec3 step = Vec3ByMatrix43(pos, gstate.boneMatrix + i * 12); tmppos += step * W[i]; if (vreader.hasNormal()) { @@ -506,7 +506,7 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G // TODO: Do this in two passes - first process the vertices (before indexing/stripping), // then resolve the indices. This lets us avoid transforming shared vertices twice. - binner_->UpdateState(); + binner_->UpdateState(vreader.isThrough()); static TransformState transformState; if (binner_->HasDirty(SoftDirty::LIGHT_ALL | SoftDirty::TRANSFORM_ALL)) { @@ -595,7 +595,7 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G } } - if (data_index == 4 && gstate.isModeThrough() && cullType == CullType::OFF) { + if (data_index == 4 && vreader.isThrough() && cullType == CullType::OFF) { if (Rasterizer::DetectRectangleThroughModeSlices(binner_->State(), data)) { data[1] = data[3]; data_index = 2; From 880f6f8d4953a0312c6cd460405bc533092461ce Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Tue, 6 Sep 2022 22:03:46 -0700 Subject: [PATCH 7/8] GPU: Handle more flags on imm prim command. --- GPU/GPUCommon.cpp | 33 ++++++++++++++++++--------------- GPU/ge_constants.h | 8 ++++++++ 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index a47e9edea8a5..0d4d8cf2fde2 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -2406,7 +2406,9 @@ void GPUCommon::Execute_ImmVertexAlphaPrim(u32 op, u32 diff) { v.v = getFloat24(gstate.imm_vtct); v.uv_w = getFloat24(gstate.imm_vtcq); v.color0_32 = (gstate.imm_cv & 0xFFFFFF) | (gstate.imm_ap << 24); - v.fog = 0.0f; // we have no information about the scale here + // TODO: When !gstate.isModeThrough(), direct fog coefficient (0 = entirely fog), ignore fog flag (also GE_IMM_FOG.) + v.fog = (gstate.imm_fc & 0xFF) / 255.0f; + // TODO: Apply if gstate.isUsingSecondaryColor() && !gstate.isModeThrough(), ignore lighting flag. v.color1_32 = gstate.imm_scv & 0xFFFFFF; if (prim != GE_PRIM_KEEP_PREVIOUS) { immPrim_ = (GEPrimitiveType)prim; @@ -2458,26 +2460,26 @@ void GPUCommon::FlushImm() { } int vtype = GE_VTYPE_TC_FLOAT | GE_VTYPE_POS_FLOAT | GE_VTYPE_COL_8888 | GE_VTYPE_THROUGH; - static constexpr int GE_IMM_CULLENABLE = 0x00080000; - static constexpr int GE_IMM_CULLFACE = 0x00100000; - static constexpr int GE_IMM_TEXTURE = 0x00200000; - static constexpr int GE_IMM_DITHER = 0x00800000; + // TODO: Handle fog and secondary color somehow? - bool texturing = (immFlags_ & GE_IMM_TEXTURE) != 0; - bool prevTexturing = gstate.isTextureMapEnabled(); + bool antialias = (immFlags_ & GE_IMM_ANTIALIAS) != 0; + bool prevAntialias = gstate.isAntiAliasEnabled(); + bool shading = (immFlags_ & GE_IMM_SHADING) != 0; + bool prevShading = gstate.getShadeMode() == GE_SHADE_GOURAUD; bool cullEnable = (immFlags_ & GE_IMM_CULLENABLE) != 0; bool prevCullEnable = gstate.isCullEnabled(); int cullMode = (immFlags_ & GE_IMM_CULLFACE) != 0 ? 1 : 0; - bool prevDither = gstate.isDitherEnabled(); + bool texturing = (immFlags_ & GE_IMM_TEXTURE) != 0; + bool prevTexturing = gstate.isTextureMapEnabled(); bool dither = (immFlags_ & GE_IMM_DITHER) != 0; - // Some notes say there's a flag to control this, but it seems to be flat regardless. - GEShadeMode prevShadeMode = gstate.getShadeMode(); + bool prevDither = gstate.isDitherEnabled(); - if (texturing != prevTexturing || cullEnable != prevCullEnable || dither != prevDither || prevShadeMode != GE_SHADE_FLAT) { + if (texturing != prevTexturing || cullEnable != prevCullEnable || dither != prevDither || prevShading != shading) { DispatchFlush(); - gstate.textureMapEnable = (GE_CMD_TEXTUREMAPENABLE << 24) | (int)texturing; + gstate.antiAliasEnable = (GE_CMD_ANTIALIASENABLE << 24) | (int)antialias; + gstate.shademodel = (GE_CMD_SHADEMODE << 24) | (int)shading; gstate.cullfaceEnable = (GE_CMD_CULLFACEENABLE << 24) | (int)cullEnable; - gstate.shademodel = (GE_CMD_SHADEMODE << 24) | GE_SHADE_FLAT; + gstate.textureMapEnable = (GE_CMD_TEXTUREMAPENABLE << 24) | (int)texturing; gstate.ditherEnable = (GE_CMD_DITHERENABLE << 24) | (int)dither; gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE); } @@ -2489,9 +2491,10 @@ void GPUCommon::FlushImm() { // drawEngineCommon_->DispatchSubmitImm(immBuffer_, immCount_); immCount_ = 0; - gstate.textureMapEnable = (GE_CMD_TEXTUREMAPENABLE << 24) | (int)prevTexturing; + gstate.antiAliasEnable = (GE_CMD_ANTIALIASENABLE << 24) | (int)prevAntialias; + gstate.shademodel = (GE_CMD_SHADEMODE << 24) | (int)prevShading; gstate.cullfaceEnable = (GE_CMD_CULLFACEENABLE << 24) | (int)prevCullEnable; - gstate.shademodel = (GE_CMD_SHADEMODE << 24) | prevShadeMode; + gstate.textureMapEnable = (GE_CMD_TEXTUREMAPENABLE << 24) | (int)prevTexturing; gstate.ditherEnable = (GE_CMD_DITHERENABLE << 24) | (int)prevDither; gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE); } diff --git a/GPU/ge_constants.h b/GPU/ge_constants.h index 3559282c5782..3c47a5781e2b 100644 --- a/GPU/ge_constants.h +++ b/GPU/ge_constants.h @@ -350,6 +350,14 @@ inline bool IsGeBufferFormat16BitColor(GEBufferFormat fmt) { #define GE_CLEARMODE_Z (1<<10) #define GE_CLEARMODE_ALL (GE_CLEARMODE_COLOR|GE_CLEARMODE_ALPHA|GE_CLEARMODE_Z) +#define GE_IMM_ANTIALIAS 0x00000800 +#define GE_IMM_SHADING 0x00040000 +#define GE_IMM_CULLENABLE 0x00080000 +#define GE_IMM_CULLFACE 0x00100000 +#define GE_IMM_TEXTURE 0x00200000 +#define GE_IMM_FOG 0x00400000 +#define GE_IMM_DITHER 0x00800000 + enum GEMatrixType { GE_MTX_BONE0 = 0, GE_MTX_BONE1, From 402492a95842cb7cb4a7872098c64d64d6f1b234 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Tue, 6 Sep 2022 22:18:55 -0700 Subject: [PATCH 8/8] GE Debugger: Show imm prim flag detail in disasm. --- GPU/Debugger/GECommandTable.cpp | 2 -- GPU/Debugger/GECommandTable.h | 2 +- GPU/GPUCommon.cpp | 12 +++++++++++- GPU/GeDisasm.cpp | 29 +++++++++++++++++++++++++++-- GPU/ge_constants.h | 1 + 5 files changed, 40 insertions(+), 6 deletions(-) diff --git a/GPU/Debugger/GECommandTable.cpp b/GPU/Debugger/GECommandTable.cpp index 95e77f1bb6c3..16304b9d7500 100644 --- a/GPU/Debugger/GECommandTable.cpp +++ b/GPU/Debugger/GECommandTable.cpp @@ -275,9 +275,7 @@ static constexpr GECmdInfo geCmdInfo[] = { { GE_CMD_VTCT, "immt", GECmdFormat::FLOAT }, { GE_CMD_VTCQ, "immq", GECmdFormat::FLOAT }, { GE_CMD_VCV, "immrgb", GECmdFormat::RGB }, - // TODO: Confirm if any other bits are used? { GE_CMD_VAP, "imma_prim", GECmdFormat::ALPHA_PRIM }, - // TODO: Confirm it's 8 bit? { GE_CMD_VFC, "immfog", GECmdFormat::DATA8 }, { GE_CMD_VSCV, "immrgb1", GECmdFormat::RGB }, { GE_CMD_UNKNOWN_FA, "unknownfa", GECmdFormat::NONE }, diff --git a/GPU/Debugger/GECommandTable.h b/GPU/Debugger/GECommandTable.h index 9abe0f7c6d99..95b4eddf0f3b 100644 --- a/GPU/Debugger/GECommandTable.h +++ b/GPU/Debugger/GECommandTable.h @@ -66,7 +66,7 @@ enum class GECmdFormat { BLEND_MODE, // 4 bits srcfactor, 4 bits dstfactor, 3 bits equation. DITHER_ROW, // 4 s.3.0 fixed point dither offsets. LOGIC_OP, // 4 bits logic operation. - ALPHA_PRIM, // 8 bits alpha, 3 bits primitive type. + ALPHA_PRIM, // 8 bits alpha, 3 bits primitive type, 1 bit antialias, 6 bit clip?, 1 bit shading, 1 bit cullenable, 1 bit cullface, 1 bit tex enable, 1 bit fog, 1 bit dither. }; struct GECmdInfo { diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 0d4d8cf2fde2..107398b7c834 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -2449,6 +2449,7 @@ void GPUCommon::FlushImm() { float xyz[3]; }; ImmVertex temp[MAX_IMMBUFFER_SIZE]; + uint32_t color1Used = 0; for (int i = 0; i < immCount_; i++) { // Since we're sending through, scale back up to w/h. temp[i].uv[0] = immBuffer_[i].u * gstate.getTextureWidth(0); @@ -2457,6 +2458,7 @@ void GPUCommon::FlushImm() { temp[i].xyz[0] = immBuffer_[i].pos[0]; temp[i].xyz[1] = immBuffer_[i].pos[1]; temp[i].xyz[2] = immBuffer_[i].pos[2]; + color1Used |= immBuffer_[i].color1_32; } int vtype = GE_VTYPE_TC_FLOAT | GE_VTYPE_POS_FLOAT | GE_VTYPE_COL_8888 | GE_VTYPE_THROUGH; @@ -2474,6 +2476,14 @@ void GPUCommon::FlushImm() { bool dither = (immFlags_ & GE_IMM_DITHER) != 0; bool prevDither = gstate.isDitherEnabled(); + if ((immFlags_ & GE_IMM_CLIPMASK) != 0) { + WARN_LOG_REPORT_ONCE(geimmclipvalue, G3D, "Imm vertex used clip value, flags=%06x", immFlags_); + } else if ((immFlags_ & GE_IMM_FOG) != 0) { + WARN_LOG_REPORT_ONCE(geimmfog, G3D, "Imm vertex used fog, flags=%06x", immFlags_); + } else if (color1Used != 0 && gstate.isUsingSecondaryColor()) { + WARN_LOG_REPORT_ONCE(geimmcolor1, G3D, "Imm vertex used secondary color, flags=%06x", immFlags_); + } + if (texturing != prevTexturing || cullEnable != prevCullEnable || dither != prevDither || prevShading != shading) { DispatchFlush(); gstate.antiAliasEnable = (GE_CMD_ANTIALIASENABLE << 24) | (int)antialias; @@ -2487,7 +2497,7 @@ void GPUCommon::FlushImm() { int bytesRead; uint32_t vertTypeID = GetVertTypeID(vtype, 0); drawEngineCommon_->DispatchSubmitImm(temp, nullptr, immPrim_, immCount_, vertTypeID, cullMode, &bytesRead); - // TOOD: In the future, make a special path for these. + // TODO: In the future, make a special path for these. // drawEngineCommon_->DispatchSubmitImm(immBuffer_, immCount_); immCount_ = 0; diff --git a/GPU/GeDisasm.cpp b/GPU/GeDisasm.cpp index c3098f5706ab..f1f12932558e 100644 --- a/GPU/GeDisasm.cpp +++ b/GPU/GeDisasm.cpp @@ -1354,11 +1354,36 @@ void GeDisassembleOp(u32 pc, u32 op, u32 prev, char *buffer, int bufsize) { break; case GE_CMD_VAP: - snprintf(buffer, bufsize, "Vertex draw: alpha=%02x, prim=%s, other=%06x", data & 0xFF, primTypes[(data >> 8) & 7], data & ~0x0007FF); + { + bool antialias = (data & GE_IMM_ANTIALIAS) != 0; + int clip = (data & GE_IMM_CLIPMASK) >> 12; + bool shading = (data & GE_IMM_SHADING) != 0; + bool cullEnable = (data & GE_IMM_CULLENABLE) != 0; + int cullMode = (data & GE_IMM_CULLFACE) != 0 ? 1 : 0; + bool texturing = (data & GE_IMM_TEXTURE) != 0; + bool dither = (data & GE_IMM_DITHER) != 0; + char *p = buffer; + p += snprintf(p, bufsize - (p - buffer), "Vertex draw: alpha=%02x, prim=%s", data & 0xFF, primTypes[(data >> 8) & 7]); + if (antialias) + p += snprintf(p, bufsize - (p - buffer), ", antialias"); + if (clip != 0) + p += snprintf(p, bufsize - (p - buffer), ", clip=%02x", clip); + if (shading) + p += snprintf(p, bufsize - (p - buffer), ", shading"); + if (cullEnable) + p += snprintf(p, bufsize - (p - buffer), ", cull=%s", cullMode == 1 ? "back (CCW)" : "front (CW)"); + if (texturing) + p += snprintf(p, bufsize - (p - buffer), ", texturing"); + if (dither) + p += snprintf(p, bufsize - (p - buffer), ", dither"); + } break; case GE_CMD_VFC: - snprintf(buffer, bufsize, "Vertex fog: %06x", data); + if (data & ~0xFF) + snprintf(buffer, bufsize, "Vertex fog: %02x / %f (extra %04x)", data & 0xFF, (data & 0xFF) / 255.0f, data >> 8); + else + snprintf(buffer, bufsize, "Vertex fog: %02x / %f", data & 0xFF, (data & 0xFF) / 255.0f); break; case GE_CMD_VSCV: diff --git a/GPU/ge_constants.h b/GPU/ge_constants.h index 3c47a5781e2b..19a51ae5ab1d 100644 --- a/GPU/ge_constants.h +++ b/GPU/ge_constants.h @@ -351,6 +351,7 @@ inline bool IsGeBufferFormat16BitColor(GEBufferFormat fmt) { #define GE_CLEARMODE_ALL (GE_CLEARMODE_COLOR|GE_CLEARMODE_ALPHA|GE_CLEARMODE_Z) #define GE_IMM_ANTIALIAS 0x00000800 +#define GE_IMM_CLIPMASK 0x0003F000 #define GE_IMM_SHADING 0x00040000 #define GE_IMM_CULLENABLE 0x00080000 #define GE_IMM_CULLFACE 0x00100000