diff --git a/Makefile b/Makefile index 789c92666e..bc56130c20 100755 --- a/Makefile +++ b/Makefile @@ -70,6 +70,7 @@ install-mk: n64.mk install: install-mk libdragon install -Cv -m 0644 libdragon.a $(INSTALLDIR)/mips64-elf/lib/libdragon.a install -Cv -m 0644 n64.ld $(INSTALLDIR)/mips64-elf/lib/n64.ld + install -Cv -m 0644 rsp.ld $(INSTALLDIR)/mips64-elf/lib/rsp.ld install -Cv -m 0644 header $(INSTALLDIR)/mips64-elf/lib/header install -Cv -m 0644 libdragonsys.a $(INSTALLDIR)/mips64-elf/lib/libdragonsys.a install -Cv -m 0644 include/pputils.h $(INSTALLDIR)/mips64-elf/include/pputils.h diff --git a/include/pputils.h b/include/pputils.h index ec18d13278..ec3d4b2a09 100644 --- a/include/pputils.h +++ b/include/pputils.h @@ -63,6 +63,42 @@ #define __PPCAT2(n,x) n ## x #define __PPCAT(n,x) __PPCAT2(n,x) +// __CALL_FOREACH_BIS. Like __CALL_FOREACH, but it allows to be called without the expansion +// of a __CALL_FOREACH. +#define __FEB_0(_call, ...) +#define __FEB_1(_call, x) _call(x) +#define __FEB_2(_call, x, ...) _call(x) __FEB_1(_call, __VA_ARGS__) +#define __FEB_3(_call, x, ...) _call(x) __FEB_2(_call, __VA_ARGS__) +#define __FEB_4(_call, x, ...) _call(x) __FEB_3(_call, __VA_ARGS__) +#define __FEB_5(_call, x, ...) _call(x) __FEB_4(_call, __VA_ARGS__) +#define __FEB_6(_call, x, ...) _call(x) __FEB_5(_call, __VA_ARGS__) +#define __FEB_7(_call, x, ...) _call(x) __FEB_6(_call, __VA_ARGS__) +#define __FEB_8(_call, x, ...) _call(x) __FEB_7(_call, __VA_ARGS__) +#define __FEB_9(_call, x, ...) _call(x) __FEB_8(_call, __VA_ARGS__) +#define __FEB_10(_call, x, ...) _call(x) __FEB_9(_call, __VA_ARGS__) +#define __FEB_11(_call, x, ...) _call(x) __FEB_10(_call, __VA_ARGS__) +#define __FEB_12(_call, x, ...) _call(x) __FEB_11(_call, __VA_ARGS__) +#define __FEB_13(_call, x, ...) _call(x) __FEB_12(_call, __VA_ARGS__) +#define __FEB_14(_call, x, ...) _call(x) __FEB_13(_call, __VA_ARGS__) +#define __FEB_15(_call, x, ...) _call(x) __FEB_14(_call, __VA_ARGS__) +#define __FEB_16(_call, x, ...) _call(x) __FEB_15(_call, __VA_ARGS__) +#define __FEB_17(_call, x, ...) _call(x) __FEB_16(_call, __VA_ARGS__) +#define __FEB_18(_call, x, ...) _call(x) __FEB_17(_call, __VA_ARGS__) +#define __FEB_19(_call, x, ...) _call(x) __FEB_18(_call, __VA_ARGS__) +#define __FEB_20(_call, x, ...) _call(x) __FEB_19(_call, __VA_ARGS__) +#define __FEB_21(_call, x, ...) _call(x) __FEB_20(_call, __VA_ARGS__) +#define __FEB_22(_call, x, ...) _call(x) __FEB_21(_call, __VA_ARGS__) +#define __FEB_23(_call, x, ...) _call(x) __FEB_22(_call, __VA_ARGS__) +#define __FEB_24(_call, x, ...) _call(x) __FEB_23(_call, __VA_ARGS__) +#define __FEB_25(_call, x, ...) _call(x) __FEB_24(_call, __VA_ARGS__) +#define __FEB_26(_call, x, ...) _call(x) __FEB_25(_call, __VA_ARGS__) +#define __FEB_27(_call, x, ...) _call(x) __FEB_26(_call, __VA_ARGS__) +#define __FEB_28(_call, x, ...) _call(x) __FEB_27(_call, __VA_ARGS__) +#define __FEB_29(_call, x, ...) _call(x) __FEB_28(_call, __VA_ARGS__) +#define __FEB_30(_call, x, ...) _call(x) __FEB_29(_call, __VA_ARGS__) +#define __FEB_31(_call, x, ...) _call(x) __FEB_30(_call, __VA_ARGS__) +#define __CALL_FOREACH_BIS(fn, ...) __GET_33RD_ARG("ignored", ##__VA_ARGS__, __FEB_31, __FEB_30, __FEB_29, __FEB_28, __FEB_27, __FEB_26, __FEB_25, __FEB_24, __FEB_23, __FEB_22, __FEB_21, __FEB_20, __FEB_19, __FEB_18, __FEB_17, __FEB_16, __FEB_15, __FEB_14, __FEB_13, __FEB_12, __FEB_11, __FEB_10, __FEB_9, __FEB_8, __FEB_7, __FEB_6, __FEB_5, __FEB_4, __FEB_3, __FEB_2, __FEB_1, __FEB_0)(fn, ##__VA_ARGS__) + /// @endcond #endif diff --git a/include/rsp.inc b/include/rsp.inc index 37b2e6c789..326694ff70 100644 --- a/include/rsp.inc +++ b/include/rsp.inc @@ -1160,11 +1160,20 @@ makeMxc2Op mfc2, 0x0 .align 4 V_SHIFT: .half 0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1 - .macro setup_vsll vshiftreg - .set noat - la $1,%lo(V_SHIFT) - lqv \vshiftreg,0, 0,$1 - .set at + .macro setup_vsll vshiftreg, emitload + .ifnb \emitload + .ifgt \emitload + .set noat + la $1,%lo(V_SHIFT) + lqv \vshiftreg,0, 0,$1 + .set at + .endif + .else + .set noat + la $1,%lo(V_SHIFT) + lqv \vshiftreg,0, 0,$1 + .set at + .endif .macro vsll vdstreg, vsrcreg, qty .if (\qty == 7) @@ -1241,11 +1250,21 @@ makeMxc2Op mfc2, 0x0 .align 4 V_SHIFT8: .half 0x8000, 0x4000, 0x2000, 0x1000, 0x800, 0x400, 0x200, 0x100 - .macro setup_vsll8 vshiftreg - .set noat - la $1,%lo(V_SHIFT8) - lqv \vshiftreg,0, 0,$1 - .set at + + .macro setup_vsll8 vshiftreg, emitload + .ifnb \emitload + .ifgt \emitload + .set noat + la $1,%lo(V_SHIFT8) + lqv \vshiftreg,0, 0,$1 + .set at + .endif + .else + .set noat + la $1,%lo(V_SHIFT8) + lqv \vshiftreg,0, 0,$1 + .set at + .endif .macro vsll8 vdstreg, vsrcreg, qty .if (\qty == 15) @@ -1504,6 +1523,24 @@ makeMxc2Op mfc2, 0x0 lui $1, \code .set at .endm + .macro assert_ge v0, v1, code + blt \v0, \v1, assertion_failed + .set noat + lui $1, \code + .set at + .endm + .macro assert_gt v0, v1, code + ble \v0, \v1, assertion_failed + .set noat + lui $1, \code + .set at + .endm + .macro assert_lt v0, v1, code + bge \v0, \v1, assertion_failed + .set noat + lui $1, \code + .set at + .endm #else .macro assert code @@ -1512,6 +1549,12 @@ makeMxc2Op mfc2, 0x0 .endm .macro assert_ne v0, v1, code .endm + .macro assert_ge v0, v1, code + .endm + .macro assert_gt v0, v1, code + .endm + .macro assert_lt v0, v1, code + .endm #endif #endif /* RSP_INC */ diff --git a/include/rsp_queue.inc b/include/rsp_queue.inc index ab9cac2f29..6466ee8a0c 100644 --- a/include/rsp_queue.inc +++ b/include/rsp_queue.inc @@ -167,6 +167,26 @@ _RSPQ_SAVED_STATE_END: # The total command size needs to be specified as well. #define CMD_ADDR(offset, cmdsize) (%lo(RSPQ_DMEM_BUFFER) + (offset) - (cmdsize)) (rspq_dmem_buf_ptr) +# This register is initialized to zero any time a command is called +#define vzero $v00 + +# These registers are initialized with the constant data required to make +# vector shift macros code works (power of twos). +#define vshift $v30 +#define vshift8 $v31 + +# We also define direct access to small constants as they can be useful in some +# calculations. +#define K1 vshift,e(7) +#define K2 vshift,e(6) +#define K4 vshift,e(5) +#define K8 vshift,e(4) +#define K16 vshift,e(3) +#define K32 vshift,e(2) +#define K64 vshift,e(1) +#define K128 vshift,e(0) + + ######################################################## # # The following is the actual implementation of the rsp engine. @@ -185,6 +205,11 @@ _RSPQ_SAVED_STATE_END: .data _data_start: +# Data for vector shift registers. +# We put this at the top of the DMEM as we need an absolute address to save one opcode. + vsll_data + vsll8_data + # Overlay tables. See rsp_overlay_t in rsp.c RSPQ_OVERLAY_TABLE: .ds.b RSPQ_OVERLAY_TABLE_SIZE RSPQ_OVERLAY_DESCRIPTORS: .ds.b (RSPQ_OVERLAY_DESC_SIZE * RSPQ_MAX_OVERLAY_COUNT) @@ -232,6 +257,10 @@ _ovl_data_start: .text + # Just declare the shift macros, without emitting code. We will be emitting it later + setup_vsll vshift, 0 + setup_vsll8 vshift8, 0 + .globl _start _start: li rspq_dmem_buf_ptr, 0 @@ -381,6 +410,11 @@ rspq_execute_command: lw a3, %lo(RSPQ_DMEM_BUFFER) + 0xC (rspq_dmem_buf_ptr) add rspq_dmem_buf_ptr, rspq_cmd_size + # Initialize vzero, vshift, vshift8. + vxor vzero, vzero,0 + lqv vshift, 0x00,zero + lqv vshift8, 0x10,zero + # Jump to command. Set ra to the loop function, so that commands can # either do "j RSPQ_Loop" or "jr ra" (or a tail call) to get back to the main loop sll cmd_desc, 2 diff --git a/n64.mk b/n64.mk index a87517d476..426820e64e 100644 --- a/n64.mk +++ b/n64.mk @@ -107,7 +107,8 @@ $(BUILD_DIR)/%.o: $(SOURCE_DIR)/%.S DATASECTION="$(basename $@).data"; \ BINARY="$(basename $@).elf"; \ echo " [RSP] $<"; \ - $(N64_CC) $(RSPASFLAGS) -nostartfiles -Wl,-Ttext=0x1000 -Wl,-Tdata=0x0 -Wl,-e0x1000 -o $$BINARY $<; \ + $(N64_CC) $(RSPASFLAGS) -nostartfiles -Wl,-Trsp.ld -Wl,--gc-sections -o $@ $<; \ + mv "$@" $$BINARY; \ $(N64_OBJCOPY) -O binary -j .text $$BINARY $$TEXTSECTION.bin; \ $(N64_OBJCOPY) -O binary -j .data $$BINARY $$DATASECTION.bin; \ $(N64_OBJCOPY) -I binary -O elf32-bigmips -B mips4300 \ diff --git a/rsp.ld b/rsp.ld new file mode 100644 index 0000000000..e1490f0aa3 --- /dev/null +++ b/rsp.ld @@ -0,0 +1,50 @@ +/* + * rsp.ld: Linker script for rsp ucode. + */ + +OUTPUT_FORMAT ("elf32-bigmips", "elf32-bigmips", "elf32-littlemips") +OUTPUT_ARCH (mips) +ENTRY (_start) + +MEMORY +{ + /* This is the layout in ROM. */ + rom_dmem : ORIGIN = 0x0000, LENGTH = 0x1000 + rom_imem : ORIGIN = 0x1000, LENGTH = 0x1000 + + /* This is a workaround to make ld place text symbols at the correct addresses (0x0 - 0x1000). + The RSP technically uses a harvard-architecture (https://en.wikipedia.org/wiki/Harvard_architecture) + which means that it uses different address spaces for instructions and data accesses. + Because ld is not designed for such architectures, we need to place the data section somewhere different, + since it would otherwise overlap the text section. As a workaround, we place it at 0x04000000 (which is also + the location of DMEM from the VR4300's point of view). Because the RSP only uses the lower 12 bits + of any address, this works out fine (as long as we always wrap data addresses in "%lo()"). + + Note that this is not actually required to run the ucode correctly (instruction addresses above 0x1000 are truncated anyway), + but it makes debugging with gdb a lot easier (e.g. using this fork of cen64 https://github.com/lambertjamesd/cen64). + */ + ram_data : ORIGIN = 0xA4000000, LENGTH = 0x1000 + ram_text : ORIGIN = 0x00000000, LENGTH = 0x1000 +} + +SECTIONS +{ + .text : { + KEEP(*(.text)) + *(.text.*) + } > ram_text AT > rom_imem + + .data : { + KEEP(*(.data)) + *(.data.*) + } > ram_data AT > rom_dmem + + . = ALIGN(8); + + .bss : { + KEEP(*(.bss)) + *(.bss.*) + } > ram_data AT > rom_dmem + + /DISCARD/ : { *(.MIPS.abiflags) } +} diff --git a/src/audio/rsp_mixer.S b/src/audio/rsp_mixer.S index 851ebda31b..9023658ddb 100644 --- a/src/audio/rsp_mixer.S +++ b/src/audio/rsp_mixer.S @@ -190,9 +190,6 @@ VCONST_1: #define k_alpha v_const1.e1 #define k_1malpha v_const1.e2 - vsll_data - vsll8_data - .align 4 BANNER0: .ascii "Dragon RSP Audio" BANNER1: .ascii " Coded by Rasky " @@ -270,9 +267,6 @@ OUTPUT_AREA: .dcb.w MAX_SAMPLES_PER_LOOP*2 command_exec: - setup_vsll v_shift - setup_vsll8 v_shift8 - #define samples_left t4 #define outptr s8 diff --git a/src/display.c b/src/display.c index 25ee37777a..bf06051128 100644 --- a/src/display.c +++ b/src/display.c @@ -137,7 +137,7 @@ static void __write_dram_register( void const * const dram_val ) { volatile uint32_t *reg_base = (uint32_t *)REGISTER_BASE; - reg_base[1] = (uint32_t)dram_val; + reg_base[1] = PhysicalAddr(dram_val); MEMORY_BARRIER(); } @@ -169,6 +169,7 @@ static void __display_callback() /* Least significant bit of the current line register indicates if the currently displayed field is odd or even. */ bool field = reg_base[4] & 1; + bool interlaced = reg_base[0] & (1<<6); /* Check if the next buffer is ready to be displayed, otherwise just leave up the current frame */ @@ -178,7 +179,7 @@ static void __display_callback() ready_mask &= ~(1 << next); } - __write_dram_register(__safe_buffer[now_showing] + (!field ? __width * __bitdepth : 0)); + __write_dram_register(__safe_buffer[now_showing] + (interlaced && !field ? __width * __bitdepth : 0)); } void display_init( resolution_t res, bitdepth_t bit, uint32_t num_buffers, gamma_t gamma, antialias_t aa ) @@ -327,7 +328,7 @@ void display_init( resolution_t res, bitdepth_t bit, uint32_t num_buffers, gamma to avoid confusing the VI chip with in-frame modifications. */ if ( __is_vi_active() ) { __wait_for_vblank(); } - registers[1] = (uintptr_t) __safe_buffer[0]; + registers[1] = PhysicalAddr(__safe_buffer[0]); __write_registers( registers ); enable_interrupts(); diff --git a/src/utils.h b/src/utils.h index f506d7c6ac..5310fec2ed 100644 --- a/src/utils.h +++ b/src/utils.h @@ -5,8 +5,11 @@ * Misc utilities functions and macros. Internal header. */ +#define SWAP(a, b) ({ typeof(a) t = a; a = b; b = t; }) + #define MAX(a,b) ({ typeof(a) _a = a; typeof(b) _b = b; _a > _b ? _a : _b; }) #define MIN(a,b) ({ typeof(a) _a = a; typeof(b) _b = b; _a < _b ? _a : _b; }) +#define CLAMP(x, min, max) (MIN(MAX((x), (min)), (max))) /** Round n up to the next multiple of d */ #define ROUND_UP(n, d) ({ \ diff --git a/tests/testrom.c b/tests/testrom.c index e39853bbc6..c8cdcf829c 100644 --- a/tests/testrom.c +++ b/tests/testrom.c @@ -18,8 +18,8 @@ typedef struct { int result; - char *log; - int logleft; + char *log; char *err; + int logleft, errleft; } TestContext; typedef void (*TestFunc)(TestContext *ctx); @@ -30,10 +30,15 @@ typedef void (*TestFunc)(TestContext *ctx); // LOG(msg, ...): log something that will be displayed if the test fails. #define LOG(msg, ...) ({ \ int __n = snprintf(ctx->log, ctx->logleft, msg, ##__VA_ARGS__); \ - fwrite(ctx->log, 1, __n, stderr); \ ctx->log += __n; ctx->logleft -= __n; \ }) +// ERR(msg, ...): generate an error message (just before failing the test) +#define ERR(msg, ...) ({ \ + int __n = snprintf(ctx->err, ctx->errleft, msg, ##__VA_ARGS__); \ + ctx->err += __n; ctx->errleft -= __n; \ +}) + // DEFER(stmt): execute "stmt" statement when the current lexical block exits. // This is useful in tests to execute cleanup functions even if the test fails // through ASSERT macros. @@ -44,8 +49,8 @@ typedef void (*TestFunc)(TestContext *ctx); // SKIP: skip execution of the test. #define SKIP(msg, ...) ({ \ - LOG("TEST SKIPPED:\n"); \ - LOG(msg "\n", ##__VA_ARGS__); \ + ERR("TEST SKIPPED:\n"); \ + ERR(msg "\n", ##__VA_ARGS__); \ ctx->result = TEST_SKIPPED; \ return; \ }) @@ -60,6 +65,9 @@ static uint32_t rand(void) { return rand_state = x; } +// SRAND(n): set seed for random number generator +#define SRAND(n) ({ rand_state = (n); if (!rand_state) rand_state = 1; }) + // RANDN(n): generate a random number from 0 to n-1 #define RANDN(n) ({ \ __builtin_constant_p((n)) ? \ @@ -70,9 +78,9 @@ static uint32_t rand(void) { // ASSERT(cond, msg): fail the test if the condition is false (with log message) #define ASSERT(cond, msg, ...) ({ \ if (!(cond)) { \ - LOG("ASSERTION FAILED (%s:%d):\n", __FILE__, __LINE__); \ - LOG("%s\n", #cond); \ - LOG(msg "\n", ##__VA_ARGS__); \ + ERR("ASSERTION FAILED (%s:%d):\n", __FILE__, __LINE__); \ + ERR("%s\n", #cond); \ + ERR(msg "\n", ##__VA_ARGS__); \ ctx->result = TEST_FAILED; \ return; \ } \ @@ -82,9 +90,9 @@ static uint32_t rand(void) { #define ASSERT_EQUAL_HEX(_a, _b, msg, ...) ({ \ uint64_t a = _a; uint64_t b = _b; \ if (a != b) { \ - LOG("ASSERTION FAILED (%s:%d):\n", __FILE__, __LINE__); \ - LOG("%s != %s (0x%llx != 0x%llx)\n", #_a, #_b, a, b); \ - LOG(msg "\n", ##__VA_ARGS__); \ + ERR("ASSERTION FAILED (%s:%d):\n", __FILE__, __LINE__); \ + ERR("%s != %s (0x%llx != 0x%llx)\n", #_a, #_b, a, b); \ + ERR(msg "\n", ##__VA_ARGS__); \ ctx->result = TEST_FAILED; \ return; \ } \ @@ -95,9 +103,9 @@ static uint32_t rand(void) { #define ASSERT_EQUAL_UNSIGNED(_a, _b, msg, ...) ({ \ uint64_t a = _a; uint64_t b = _b; \ if (a != b) { \ - LOG("ASSERTION FAILED (%s:%d):\n", __FILE__, __LINE__); \ - LOG("%s != %s (%llu != %llu)\n", #_a, #_b, a, b); \ - LOG(msg "\n", ##__VA_ARGS__); \ + ERR("ASSERTION FAILED (%s:%d):\n", __FILE__, __LINE__); \ + ERR("%s != %s (%llu != %llu)\n", #_a, #_b, a, b); \ + ERR(msg "\n", ##__VA_ARGS__); \ ctx->result = TEST_FAILED; \ return; \ } \ @@ -107,9 +115,9 @@ static uint32_t rand(void) { #define ASSERT_EQUAL_SIGNED(_a, _b, msg, ...) ({ \ int64_t a = _a; int64_t b = _b; \ if (a != b) { \ - LOG("ASSERTION FAILED (%s:%d):\n", __FILE__, __LINE__); \ - LOG("%s != %s (%lld != %lld)\n", #_a, #_b, a, b); \ - LOG(msg "\n", ##__VA_ARGS__); \ + ERR("ASSERTION FAILED (%s:%d):\n", __FILE__, __LINE__); \ + ERR("%s != %s (%lld != %lld)\n", #_a, #_b, a, b); \ + ERR(msg "\n", ##__VA_ARGS__); \ ctx->result = TEST_FAILED; \ return; \ } \ @@ -135,9 +143,9 @@ int assert_equal_mem(TestContext *ctx, const char *file, int line, const uint8_t hexdump(dumpa, a, len, i-2, 5); hexdump(dumpb, b, len, i-2, 5); - LOG("ASSERTION FAILED (%s:%d):\n", file, line); \ - LOG("[%s] != [%s]\n", dumpa, dumpb); - LOG(" ^^ ^^ idx: %d\n", i); + ERR("ASSERTION FAILED (%s:%d):\n", file, line); \ + ERR("[%s] != [%s]\n", dumpa, dumpb); + ERR(" ^^ ^^ idx: %d\n", i); return 0; } } @@ -149,7 +157,7 @@ int assert_equal_mem(TestContext *ctx, const char *file, int line, const uint8_t #define ASSERT_EQUAL_MEM(_a, _b, _len, msg, ...) ({ \ const uint8_t *a = (_a); const uint8_t *b = (_b); int len = (_len); \ if (!assert_equal_mem(ctx, __FILE__, __LINE__, a, b, len)) { \ - LOG(msg "\n", ##__VA_ARGS__); \ + ERR(msg "\n", ##__VA_ARGS__); \ ctx->result = TEST_FAILED; \ return; \ } \ @@ -250,7 +258,7 @@ int main() { const int NUM_TESTS = sizeof(tests) / sizeof(tests[0]); uint32_t start = TICKS_READ(); for (int i=0; i < NUM_TESTS; i++) { - static char logbuf[16384]; + static char logbuf[16384], errbuf[4096]; printf("%-59s", tests[i].name); fflush(stdout); @@ -269,6 +277,8 @@ int main() { TestContext ctx; ctx.log = logbuf; ctx.logleft = sizeof(logbuf); + ctx.err = errbuf; + ctx.errleft = sizeof(errbuf); ctx.result = TEST_SUCCESS; rand_state = 1; // reset to be fully reproducible @@ -299,9 +309,12 @@ int main() { if (ctx.result == TEST_FAILED) { failures++; printf("FAIL\n\n"); - if (ctx.log != logbuf) { - printf("%s\n\n", logbuf); + debugf("%s\n", logbuf); + } + if (ctx.err != errbuf) { + printf("%s\n", errbuf); + debugf("%s\n", errbuf); } } else if (ctx.result == TEST_SKIPPED) { skipped++;