Skip to content

Commit

Permalink
Merge pull request #16046 from unknownbrackets/tests-update
Browse files Browse the repository at this point in the history
headless: Add some new and passing tests
  • Loading branch information
unknownbrackets authored Sep 18, 2022
2 parents 91d9c69 + ca248e1 commit f2beafe
Show file tree
Hide file tree
Showing 8 changed files with 38 additions and 47 deletions.
14 changes: 3 additions & 11 deletions GPU/Common/VertexDecoderArm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -872,22 +872,14 @@ void VertexDecoderJitCache::Jit_NormalFloat() {
STMIA(scratchReg, false, 3, tempReg1, tempReg2, tempReg3);
}

// Through expands into floats, always. Might want to look at changing this.
void VertexDecoderJitCache::Jit_PosS8Through() {
DEBUG_LOG_REPORT_ONCE(vertexS8Through, G3D, "Using S8 positions in throughmode");
_dbg_assert_msg_(fpScratchReg + 1 == fpScratchReg2, "VertexDecoder fpScratchRegs must be in order.");
_dbg_assert_msg_(fpScratchReg2 + 1 == fpScratchReg3, "VertexDecoder fpScratchRegs must be in order.");

// TODO: SIMD
LDRSB(tempReg1, srcReg, dec_->posoff);
LDRSB(tempReg2, srcReg, dec_->posoff + 1);
LDRB(tempReg3, srcReg, dec_->posoff + 2);
static const ARMReg tr[3] = { tempReg1, tempReg2, tempReg3 };
static const ARMReg fr[3] = { fpScratchReg, fpScratchReg2, fpScratchReg3 };
// 8-bit positions in throughmode always decode to 0, depth included.
VEOR(neonScratchReg, neonScratchReg, neonScratchReg);
VEOR(neonScratchReg2, neonScratchReg, neonScratchReg);
ADD(scratchReg, dstReg, dec_->decFmt.posoff);
VMOV(neonScratchReg, tempReg1, tempReg2);
VMOV(neonScratchReg2, tempReg3, tempReg3);
VCVT(F_32 | I_SIGNED, neonScratchRegQ, neonScratchRegQ);
VST1(F_32, neonScratchReg, scratchReg, 2, ALIGN_NONE);
}

Expand Down
12 changes: 4 additions & 8 deletions GPU/Common/VertexDecoderArm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -668,15 +668,11 @@ void VertexDecoderJitCache::Jit_PosFloat() {
}

void VertexDecoderJitCache::Jit_PosS8Through() {
LDRSB(INDEX_UNSIGNED, tempReg1, srcReg, dec_->posoff);
LDRSB(INDEX_UNSIGNED, tempReg2, srcReg, dec_->posoff + 1);
LDRB(INDEX_UNSIGNED, tempReg3, srcReg, dec_->posoff + 2);
fp.SCVTF(fpScratchReg, tempReg1);
fp.SCVTF(fpScratchReg2, tempReg2);
fp.SCVTF(fpScratchReg3, tempReg3);
// 8-bit positions in throughmode always decode to 0, depth included.
fp.EOR(fpScratchReg, fpScratchReg, fpScratchReg);
STR(INDEX_UNSIGNED, fpScratchReg, dstReg, dec_->decFmt.posoff);
STR(INDEX_UNSIGNED, fpScratchReg2, dstReg, dec_->decFmt.posoff + 4);
STR(INDEX_UNSIGNED, fpScratchReg3, dstReg, dec_->decFmt.posoff + 8);
STR(INDEX_UNSIGNED, fpScratchReg, dstReg, dec_->decFmt.posoff + 4);
STR(INDEX_UNSIGNED, fpScratchReg, dstReg, dec_->decFmt.posoff + 8);
}

void VertexDecoderJitCache::Jit_PosS16Through() {
Expand Down
33 changes: 19 additions & 14 deletions GPU/Common/VertexDecoderCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -773,14 +773,20 @@ void VertexDecoder::Step_PosFloatSkin() const
Vec3ByMatrix43(pos, fn, skinMatrix);
}

void VertexDecoder::Step_PosS8Through() const
{
void VertexDecoder::Step_PosInvalid() const {
// Invalid positions are just culled. Simulate by forcing invalid values.
float *v = (float *)(decoded_ + decFmt.posoff);
const s8 *sv = (const s8 *)(ptr_ + posoff);
const u8 *uv = (const u8 *)(ptr_ + posoff);
v[0] = sv[0];
v[1] = sv[1];
v[2] = uv[2];
v[0] = std::numeric_limits<float>::infinity();
v[1] = std::numeric_limits<float>::infinity();
v[2] = std::numeric_limits<float>::infinity();
}

void VertexDecoder::Step_PosS8Through() const {
// 8-bit positions in throughmode always decode to 0, depth included.
float *v = (float *)(decoded_ + decFmt.posoff);
v[0] = 0;
v[1] = 0;
v[2] = 0;
}

void VertexDecoder::Step_PosS16Through() const
Expand Down Expand Up @@ -1023,35 +1029,35 @@ static const StepFunction nrmstep_morphskin[4] = {
};

static const StepFunction posstep[4] = {
&VertexDecoder::Step_PosS8,
&VertexDecoder::Step_PosInvalid,
&VertexDecoder::Step_PosS8,
&VertexDecoder::Step_PosS16,
&VertexDecoder::Step_PosFloat,
};

static const StepFunction posstep_skin[4] = {
&VertexDecoder::Step_PosS8Skin,
&VertexDecoder::Step_PosInvalid,
&VertexDecoder::Step_PosS8Skin,
&VertexDecoder::Step_PosS16Skin,
&VertexDecoder::Step_PosFloatSkin,
};

static const StepFunction posstep_morph[4] = {
&VertexDecoder::Step_PosS8Morph,
&VertexDecoder::Step_PosInvalid,
&VertexDecoder::Step_PosS8Morph,
&VertexDecoder::Step_PosS16Morph,
&VertexDecoder::Step_PosFloatMorph,
};

static const StepFunction posstep_morph_skin[4] = {
&VertexDecoder::Step_PosS8MorphSkin,
&VertexDecoder::Step_PosInvalid,
&VertexDecoder::Step_PosS8MorphSkin,
&VertexDecoder::Step_PosS16MorphSkin,
&VertexDecoder::Step_PosFloatMorphSkin,
};

static const StepFunction posstep_through[4] = {
&VertexDecoder::Step_PosS8Through,
&VertexDecoder::Step_PosInvalid,
&VertexDecoder::Step_PosS8Through,
&VertexDecoder::Step_PosS16Through,
&VertexDecoder::Step_PosFloatThrough,
Expand Down Expand Up @@ -1224,9 +1230,8 @@ void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options,
bool reportNoPos = false;
if (!pos) {
reportNoPos = true;
pos = 1;
}
if (pos) { // there's always a position
if (pos >= 0) { // there's always a position
size = align(size, posalign[pos]);
posoff = size;
size += possize[pos];
Expand Down
1 change: 1 addition & 0 deletions GPU/Common/VertexDecoderCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,7 @@ class VertexDecoder {
void Step_PosS16MorphSkin() const;
void Step_PosFloatMorphSkin() const;

void Step_PosInvalid() const;
void Step_PosS8Through() const;
void Step_PosS16Through() const;
void Step_PosFloatThrough() const;
Expand Down
7 changes: 1 addition & 6 deletions GPU/Common/VertexDecoderX86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1345,14 +1345,9 @@ void VertexDecoderJitCache::Jit_NormalFloatSkin() {

// Through expands into floats, always. Might want to look at changing this.
void VertexDecoderJitCache::Jit_PosS8Through() {
DEBUG_LOG_REPORT_ONCE(vertexS8Through, G3D, "Using S8 positions in throughmode");
// SIMD doesn't really matter since this isn't useful on hardware.
XORPS(fpScratchReg, R(fpScratchReg));
for (int i = 0; i < 3; i++) {
if (i == 2)
MOVZX(32, 8, tempReg1, MDisp(srcReg, dec_->posoff + i));
else
MOVSX(32, 8, tempReg1, MDisp(srcReg, dec_->posoff + i));
CVTSI2SS(fpScratchReg, R(tempReg1));
MOVSS(MDisp(dstReg, dec_->decFmt.posoff + i * 4), fpScratchReg);
}
}
Expand Down
4 changes: 1 addition & 3 deletions GPU/Software/TransformUnit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -503,10 +503,8 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
if (gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME) {
return;
}
// Throughmode never draws 8-bit primitives, maybe because they can't fully specify the screen?
if ((vertex_type & GE_VTYPE_THROUGH_MASK) != 0 && (vertex_type & GE_VTYPE_POS_MASK) == GE_VTYPE_POS_8BIT)
return;
// Vertices without position are just entirely culled.
// Note: Throughmode does draw 8-bit primitives, but positions are always zero - handled in decode.
if ((vertex_type & GE_VTYPE_POS_MASK) == 0)
return;

Expand Down
2 changes: 1 addition & 1 deletion headless/Headless.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ int printUsage(const char *progname, const char *reason)
fprintf(stderr, " -m, --mount umd.cso mount iso on umd1:\n");
fprintf(stderr, " -r, --root some/path mount path on host0: (elfs must be in here)\n");
fprintf(stderr, " -l, --log full log output, not just emulated printfs\n");
fprintf(stderr, " --debugger=PORT enable websocket debugger and break at start\n");
fprintf(stderr, " --debugger=PORT enable websocket debugger and break at start\n");

fprintf(stderr, " --graphics=BACKEND use a different gpu backend\n");
fprintf(stderr, " options: gles, software, directx9, etc.\n");
Expand Down
12 changes: 8 additions & 4 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def target():
"gpu/commands/blend",
"gpu/commands/blend565",
"gpu/commands/blocktransfer",
"gpu/commands/fog",
"gpu/commands/material",
"gpu/displaylist/alignment",
"gpu/dither/dither",
Expand All @@ -157,7 +158,11 @@ def target():
"gpu/ge/enqueueparam",
"gpu/ge/queue",
"gpu/primitives/indices",
"gpu/primitives/invalidprim",
"gpu/primitives/trianglefan",
"gpu/primitives/trianglestrip",
"gpu/primitives/triangles",
"gpu/rendertarget/copy",
"gpu/rendertarget/depal",
"gpu/signals/pause",
"gpu/signals/pause2",
Expand Down Expand Up @@ -269,6 +274,7 @@ def target():
"threads/mutex/refer",
"threads/mutex/try",
"threads/mutex/unlock",
"threads/mutex/unlock2",
"threads/semaphores/semaphores",
"threads/semaphores/cancel",
"threads/semaphores/create",
Expand Down Expand Up @@ -394,21 +400,19 @@ def target():
"gpu/ge/get",
"gpu/primitives/bezier",
"gpu/primitives/continue",
"gpu/primitives/invalidprim",
"gpu/primitives/immediate",
"gpu/primitives/lines",
"gpu/primitives/linestrip",
"gpu/primitives/points",
"gpu/primitives/rectangles",
"gpu/primitives/spline",
"gpu/primitives/trianglefan",
"gpu/primitives/trianglestrip",
"gpu/reflection/reflection",
"gpu/rendertarget/copy",
"gpu/rendertarget/rendertarget",
"gpu/signals/continue",
"gpu/signals/jumps",
"gpu/signals/simple",
"gpu/simple/simple",
"gpu/textures/size",
"gpu/triangle/triangle",
"gpu/vertices/colors",
"gpu/vertices/texcoords",
Expand Down

0 comments on commit f2beafe

Please sign in to comment.