diff --git a/ares/ares/node/debugger/tracer/instruction.hpp b/ares/ares/node/debugger/tracer/instruction.hpp index 31a778cc51..1d7dd231ac 100644 --- a/ares/ares/node/debugger/tracer/instruction.hpp +++ b/ares/ares/node/debugger/tracer/instruction.hpp @@ -28,6 +28,14 @@ struct Instruction : Tracer { for(auto& history : _history) history = ~0ull; } + auto setEnabled(bool enabled) -> void { + if(!enabled) { + _omitted = 0; + setMask(_mask); + setDepth(_depth); + } + } + auto address(u64 address) -> bool { address &= ~0ull >> (64 - _addressBits); //mask upper bits of address _address = address; diff --git a/ares/n64/accuracy.hpp b/ares/n64/accuracy.hpp index 007ecd4641..4550eada4c 100644 --- a/ares/n64/accuracy.hpp +++ b/ares/n64/accuracy.hpp @@ -11,7 +11,7 @@ struct Accuracy { }; struct RSP { - static constexpr bool Interpreter = 0 | Reference | !recompiler::generic::supported; + static constexpr bool Interpreter = 1;//0 | Reference | !recompiler::generic::supported; static constexpr bool Recompiler = !Interpreter; //VU instructions diff --git a/ares/n64/rsp/debugger.cpp b/ares/n64/rsp/debugger.cpp index 6bb0afd11a..ad3b8f1ae0 100644 --- a/ares/n64/rsp/debugger.cpp +++ b/ares/n64/rsp/debugger.cpp @@ -52,12 +52,31 @@ auto RSP::Debugger::instruction() -> void { if(unlikely(tracer.instruction->enabled())) { u32 address = rsp.pipeline.address & 0xfff; u32 instruction = rsp.pipeline.instruction; + u32 cycle = rsp.pipeline.clocksTotal / 3; + + bool hasDblIssues = rsp.pipeline.dblIssueCount > 0 && cycle != 0; + string cycleStr = hasDblIssues + ? string{" ^"} + : pad(cycle, 4, ' '); + if(tracer.instruction->address(address)) { rsp.disassembler.showColors = 0; - tracer.instruction->notify(rsp.disassembler.disassemble(address, instruction), {}); + string res{"[", cycleStr, "] ", + "\033[A", // cursor up + pad(pad("", rsp.pipeline.stallCount, '*'), 3, ' '), + "\033[B", // cursor down + " | ", + rsp.disassembler.disassemble(address, instruction) + }; + tracer.instruction->notify(res, {}); rsp.disassembler.showColors = 1; } } + + if(tracer.instructionCountdown) { + if (--tracer.instructionCountdown == 0) + tracer.instruction->setEnabled(false); + } } auto RSP::Debugger::ioSCC(bool mode, u32 address, u32 data) -> void { diff --git a/ares/n64/rsp/emux.cpp b/ares/n64/rsp/emux.cpp new file mode 100644 index 0000000000..f654eb9278 --- /dev/null +++ b/ares/n64/rsp/emux.cpp @@ -0,0 +1,257 @@ +auto RSP::TNE(cr32& rs, cr32& rt, u32 code) -> void { + if(&rs != &rt) return INVALID(); + EMUX(rt, code); +} + +namespace { + enum class FormatStage { + SEARCH_MARKER, + IN_FORMAT, + IN_PLACEHOLDER, + IN_LANE, + }; +} + +auto RSP::EMUX(cr32& rt, u32 code) -> void { + + static const char *mips_reg_names[32] = { "zr", "at", "v0", "v1", "a0", + "a1", "a2", "a3", "t0", "t1", + "t2", "t3", "t4", "t5", "t6", + "t7", "s0", "s1", "s2", "s3", + "s4", "s5", "s6", "s7", "t8", + "t9", "k0", "k1", "gp", "sp", + "fp", "ra" }; + + switch (code) { + case 0x20: // trace(start) + printf("[emux] trace start\n"); + debugger.tracer.instruction->setEnabled(true); + debugger.tracer.instruction->setTerminal(true); + debugger.tracer.instructionCountdown = 0; + rsp.pipeline.clocksTotal = 0; + break; + case 0x21: // trace(count) + printf("[emux] trace(count): %08x\n", rt.u32); + debugger.tracer.instruction->setEnabled(true); + debugger.tracer.instructionCountdown = rt.u32; + break; + case 0x22: // trace(stop) + debugger.tracer.instruction->setTerminal(false); + debugger.tracer.instruction->setEnabled(false); + printf("[emux] trace stop\n"); + break; + case 0x30: // log(byte) + fputc(rt.u32 & 0xFF, stdout); + break; + case 0x31: { // log(string) + u32 i = 0; + do { + char c = dmem.read(rt.u32 + i); + if (c == '\0') break; + fputc(c, stdout); + } while (++i < 4096); + } break; + case 0x32: // log(buflen) + emux.buflen = rt.u32; + break; + case 0x33: // log(buf) + for (auto i : range(emux.buflen)) { + fputc(dmem.read(rt.u32 + i), stdout); + } + break; + case 0x34: { // log(formatted string) + std::string fmtReg; + std::string fmtLane; + char fmtType = 'x'; + FormatStage stage = FormatStage::SEARCH_MARKER; + + auto print_lane = [&](r128& vr, r128& vrNext, int lane, char fmt) { + switch(fmt) { + default: + case 'x': fprintf(stdout, "%04X", vr.u16(lane)); return; + case 'u': fprintf(stdout, "%u", vr.u16(lane)); return; + case 'd': fprintf(stdout, "%d", vr.s16(lane)); return; + case 'f': { + s32 partInt = (s32)((u32)vr.u16(lane) << 16); + partInt |= vrNext.u16(lane); + f64 val = partInt; + val /= 65536.0f; + + fprintf(stdout, "%.6f", val); + return; + } + } + }; + + u32 i = 0; + do + { + char c = dmem.read(rt.u32 + i); + + if(stage == FormatStage::SEARCH_MARKER && c == '%') { + stage = FormatStage::IN_FORMAT; + continue; + } + + if(stage == FormatStage::IN_FORMAT) { + fmtType = c; + stage = FormatStage::IN_PLACEHOLDER; + continue; + } + + if(stage == FormatStage::IN_PLACEHOLDER || stage == FormatStage::IN_LANE) + { + bool isPlaceholderChar = (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '.'; + + if(!isPlaceholderChar) + { + const char* fmt = fmtType == 'x' ? "%04X" : "%d"; + bool isUnsigned = fmtType != 'd'; + + if(fmtReg[0] == 'v' && fmtReg.length() == 3) { + auto regIdx = std::stoi(&fmtReg[1]); + int lane = -1; + if(fmtLane.length() >= 2) { + lane = std::stoi(&fmtLane[1]); + } + + auto &vr = vpu.r[regIdx]; + auto &vrNext = vpu.r[(regIdx+1)]; + + if(lane < 0) { + for(int i=0; i<7; ++i) { + print_lane(vr, vrNext, i, fmtType); + fputc(' ', stdout); + } + print_lane(vr, vrNext, 7, fmtType); + } else { + print_lane(vr, vrNext, lane, fmtType); + } + + } else { + u32 r=0; + for(const char* regName : mips_reg_names) { + if(fmtReg == std::string_view{regName}) { + if(fmtType == 'x') { + fprintf(stdout, "%08X", isUnsigned ? ipu.r[r].u32 : ipu.r[r].s32); + } else { + fprintf(stdout, "%d", isUnsigned ? ipu.r[r].u32 : ipu.r[r].s32); + } + } + ++r; + } + } + + fmtReg = ""; + fmtLane = ""; + fmtType = 'x'; + stage = FormatStage::SEARCH_MARKER; + i--; + continue; + } else { + if(c == '.') { + stage = FormatStage::IN_LANE; + continue; + } + if(stage == FormatStage::IN_LANE) { + fmtLane += c; + } else { + fmtReg += c; + } + continue; + } + } + + if (c == '\0')break; + fputc(c, stdout); + + } while (++i < 4096); + } break; + case 0x40: case 0x48: { // dump_regs(gpr) + n32 mask = rt.u32; + const char *fmt = code & 0x8 ? "%s: %-12d" : "%s: %04x %04x"; + bool partial = false; + for (u32 i : range(32)) { + if (mask && !mask.bit(i)) continue; + u32 val = ipu.r[i].u32; + fprintf(stdout, fmt, mips_reg_names[i], val >> 16, val & 0xFFFF); + if (i % 4 == 3) fputc('\n', stdout), partial = false; + else fputs(" ", stdout), partial = true; + } + if (partial) fputc('\n', stdout); + } break; + case 0x41: case 0x44: { // dump_regs(cop0) + static const char *cop0_reg_names[16] = { + "dma_spaddr", "dma_ramaddr", "dma_read", "dma_write", + "sp_status", "dma_full", "dma_busy", "semaphore", + "dp_start", "dp_end", "dp_current", "dp_status", + "dp_clock", "dp_busy", "dp_pipe_busy", "dp_tmem_busy" + }; + n32 mask = rt.u32; + const char *fmt = code & 0x8 ? "%s: %-12d" : "%s: %04x %04x"; + bool partial = false; + Thread dummyThread{}; + for (u32 i : range(16)) { + if (mask && !mask.bit(i)) continue; + u32 val; + if (i == 7) val = status.semaphore; // avoid side effects + else val = (i & 8) ? Nintendo64::rdp.readWord(i & 7, *this) : Nintendo64::rsp.ioRead(i & 7, dummyThread); + fprintf(stdout, fmt, cop0_reg_names[i], val >> 16, val & 0xFFFF); + if (i % 4 == 3) fputc('\n', stdout), partial = false; + else fputs(" ", stdout), partial = true; + } + if (partial) fputc('\n', stdout); + } break; + case 0x42: case 0x46: case 0x4A: case 0x4C: { // dump_regs(cop2) + auto dump_vr = [&](r128& vr) { + for (auto i : range(8)) { + if (code & 0x8) fprintf(stdout, "%-6d", vr.s16(i)); + else fprintf(stdout, "%04x", vr.u16(i)); + if (i < 7) fputc(' ', stdout); + if (i == 3) fputc(' ', stdout); + } + }; + + n32 mask = rt.u32; + bool partial = false; + for (u32 i : range(32)) { + if (mask && !mask.bit(i)) continue; + fprintf(stdout, "v%02d: ", i); + dump_vr(vpu.r[i]); + if (i % 2 == 1) fputc('\n', stdout), partial = false; + else fputs(" ", stdout), partial = true; + } + if (partial) fputc('\n', stdout); + if (code & 0x4) { + r128* accs[3] = { &vpu.acch, &vpu.accm, &vpu.accl }; + const char *accnames[] = { "acch", "accm", "accl" }; + + auto dump_vc8 = [&](r128& vc) { + for (auto i : range(8)) { + fprintf(stdout, "%c", vc.get(i) ? '1' : '-'); + if (i == 3) fputc(' ', stdout); + } + }; + auto dump_vc16 = [&](r128& vch, r128& vcl) { + dump_vc8(vch); + fputc(' ', stdout); + dump_vc8(vcl); + }; + + for (auto i : range(3)) { + fprintf(stdout, "%s: ", accnames[i]); + dump_vr(*accs[i]); + fputs(" ", stdout); + switch (i) { + case 0: fprintf(stdout, "vco: "); dump_vc16(vpu.vcoh, vpu.vcol); fprintf(stdout, "\n"); break; + case 1: fprintf(stdout, "vcc: "); dump_vc16(vpu.vcch, vpu.vccl); fprintf(stdout, "\n"); break; + case 2: fprintf(stdout, "vce: "); dump_vc8(vpu.vce); fprintf(stdout, "\n"); break; + } + } + } + } break; + default: + printf("[emux] unknown emux code: %08x\n", code); + break; + } +} \ No newline at end of file diff --git a/ares/n64/rsp/interpreter.cpp b/ares/n64/rsp/interpreter.cpp index f8b363b4fe..b5c0124550 100644 --- a/ares/n64/rsp/interpreter.cpp +++ b/ares/n64/rsp/interpreter.cpp @@ -34,6 +34,7 @@ #define RDn (OP >> 11 & 31) #define RTn (OP >> 16 & 31) #define RSn (OP >> 21 & 31) +#define CODE (OP >> 6 & 1023) #define VDn (OP >> 6 & 31) #define VSn (OP >> 11 & 31) #define VTn (OP >> 16 & 31) @@ -166,7 +167,7 @@ auto RSP::interpreterSPECIAL() -> void { op(0x33, INVALID); //TLTU op(0x34, INVALID); //TEQ op(0x35, INVALID); - op(0x36, INVALID); //TNE + op(0x36, TNE, RS, RT, CODE); //TNE op(0x37, INVALID); op(0x38, INVALID); //DSLL op(0x39, INVALID); diff --git a/ares/n64/rsp/recompiler.cpp b/ares/n64/rsp/recompiler.cpp index 85468c2b0e..fb62bd6fcb 100644 --- a/ares/n64/rsp/recompiler.cpp +++ b/ares/n64/rsp/recompiler.cpp @@ -129,6 +129,7 @@ auto RSP::Recompiler::emit(u12 address) -> Block* { #define Vdn (instruction >> 6 & 31) #define Vsn (instruction >> 11 & 31) #define Vtn (instruction >> 16 & 31) +#define Code (instruction >> 6 & 1023) #define Rd sreg(1), offsetof(IPU, r) + Rdn * sizeof(r32) #define Rt sreg(1), offsetof(IPU, r) + Rtn * sizeof(r32) #define Rs sreg(1), offsetof(IPU, r) + Rsn * sizeof(r32) @@ -547,7 +548,21 @@ auto RSP::Recompiler::emitSPECIAL(u32 instruction) -> bool { } //INVALID - case range20(0x2c, 0x3f): { + case range10(0x2c, 0x35): { + return 0; + } + + //TNE Rs,Rt,Code + case 0x36: { + lea(reg(1), Rs); + lea(reg(2), Rt); + mov32(reg(3), imm(Code)); + call(&RSP::TNE); + return 0; + } + + //INVALID + case range9(0x37, 0x3f): { return 0; } @@ -1486,6 +1501,7 @@ auto RSP::Recompiler::isTerminal(u32 instruction) -> bool { #undef Vd #undef Vs #undef Vt +#undef Code #undef i16 #undef n16 #undef n26 diff --git a/ares/n64/rsp/rsp.cpp b/ares/n64/rsp/rsp.cpp index 27deac0c7f..fe84ec4f38 100644 --- a/ares/n64/rsp/rsp.cpp +++ b/ares/n64/rsp/rsp.cpp @@ -14,6 +14,7 @@ RSP rsp; #include "debugger.cpp" #include "serialization.cpp" #include "disassembler.cpp" +#include "emux.cpp" auto RSP::load(Node::Object parent) -> void { node = parent->append("RSP"); @@ -43,6 +44,7 @@ auto RSP::instruction() -> void { } if constexpr(Accuracy::RSP::Interpreter) { + pipeline.dblIssueCount = 0; u32 instruction = imem.read(ipu.pc); instructionPrologue(instruction); pipeline.begin(); @@ -55,6 +57,7 @@ auto RSP::instruction() -> void { OpInfo op1 = decoderEXECUTE(instruction); if(canDualIssue(op0, op1)) { + pipeline.dblIssueCount = 1; instructionEpilogue(0); instructionPrologue(instruction); pipeline.issue(op1); @@ -69,6 +72,7 @@ auto RSP::instruction() -> void { //this handles all stepping for the interpreter //with the recompiler, it only steps for taken branch stalls step(pipeline.clocks); + pipeline.clocksTotal += pipeline.clocks; } auto RSP::instructionPrologue(u32 instruction) -> void { @@ -80,6 +84,7 @@ auto RSP::instructionPrologue(u32 instruction) -> void { auto RSP::instructionEpilogue(u32 clocks) -> s32 { if constexpr(Accuracy::RSP::Recompiler) { step(clocks); + pipeline.clocksTotal += clocks; } ipu.r[0].u32 = 0; diff --git a/ares/n64/rsp/rsp.hpp b/ares/n64/rsp/rsp.hpp index cb4f2ea45c..1e6ac7c238 100644 --- a/ares/n64/rsp/rsp.hpp +++ b/ares/n64/rsp/rsp.hpp @@ -68,6 +68,7 @@ struct RSP : Thread, Memory::RCP { struct Tracer { Node::Debugger::Tracer::Instruction instruction; Node::Debugger::Tracer::Notification io; + i32 instructionCountdown = 0; } tracer; } debugger; @@ -117,6 +118,11 @@ struct RSP : Thread, Memory::RCP { u32 address; u32 instruction; u32 clocks; + + u32 clocksTotal; + u32 stallCount; + u32 dblIssueCount; + u1 singleIssue; struct Stage { @@ -145,6 +151,8 @@ struct RSP : Thread, Memory::RCP { auto begin() -> void { clocks = 0; + stallCount = 0; + dblIssueCount = 0; } auto end() -> void { @@ -165,6 +173,7 @@ struct RSP : Thread, Memory::RCP { previous[1] = previous[0]; previous[0] = {}; clocks += 3; + ++stallCount; } auto issue(const OpInfo& op) -> void { @@ -476,6 +485,13 @@ struct RSP : Thread, Memory::RCP { template auto VXOR(r128& rd, cr128& vs, cr128& vt) -> void; template auto VZERO(r128& rd, cr128& vs, cr128& vt) -> void; + //emux.cpp + auto TNE(cr32& rt, cr32& rs, u32 code) -> void; + auto EMUX(cr32& rt, u32 code) -> void; + struct { + u32 buflen; + } emux; + //unserialized: u16 reciprocals[512]; u16 inverseSquareRoots[512];