From 4a6e247ddaec27886031863c44388aa278c0bf55 Mon Sep 17 00:00:00 2001 From: Zane van Iperen Date: Sat, 18 May 2024 18:32:42 +1000 Subject: [PATCH] core/math/fixed: rewrite BrFixed{Sin,Cos}() to not use float intermediates This is based on the original fixed386.asm --- core/math/fixed.c | 105 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 103 insertions(+), 2 deletions(-) diff --git a/core/math/fixed.c b/core/math/fixed.c index 4bfadf12..dea53f8b 100644 --- a/core/math/fixed.c +++ b/core/math/fixed.c @@ -10,6 +10,16 @@ #include "brender.h" #include "brmath.h" +/* + * Lookup tables for our various functions. + * 257 because 256+1 = 257 and saves a modulo. + * + * These are unsigned because there's no safe way + * initialise a signed number with a hex digit. + */ +const static br_uint_16 sin_table[257]; +const static br_uint_16 cos_table[257]; + br_fixed_ls BR_PUBLIC_ENTRY BrFixedAbs(br_fixed_ls a) { if(a < 0) @@ -68,14 +78,31 @@ br_fixed_ls BR_PUBLIC_ENTRY BrFixedDiv(br_fixed_ls numerator, br_fixed_ls denomi return (br_fixed_ls)(((br_uint_64)numerator << 16) / denominator); } +/* + * Given a 16-bit input, generate the output value of a function + * using a 256-word lookup table with interpolation between entries. + * + * Table must have all deltas: -32768 <= delta <= 32767 + */ +static br_int_16 interp(br_int_16 input, const br_uint_16 *table) +{ + uint8_t index = (input & 0xFF00) >> 8; + int16_t base_value = (br_int_16)table[index]; + int16_t next_value = (br_int_16)table[index + 1]; + int delta = next_value - base_value; + int scaled_delta = (delta * (input & 0xFF)) >> 8; + + return (br_int_16)(base_value + scaled_delta); +} + br_fixed_ls BR_PUBLIC_ENTRY BrFixedSin(br_fixed_luf a) { - return BrScalarToFixed(BR_SIN(BrFixedLUFToScalar(a))); + return interp((int16_t)a, sin_table) * 2; } br_fixed_ls BR_PUBLIC_ENTRY BrFixedCos(br_fixed_luf a) { - return BrScalarToFixed(BR_COS(BrFixedLUFToScalar(a))); + return interp((int16_t)a, cos_table) * 2; } br_fixed_luf BR_PUBLIC_ENTRY BrFixedATan2(br_fixed_ls x, br_fixed_ls y) @@ -95,3 +122,77 @@ br_fixed_ls BR_PUBLIC_ENTRY BrFixedPow(br_fixed_ls a, br_fixed_ls b) { return BrFloatToFixed(BrFloatPow(BrFixedToFloat(a), BrFixedToFloat(b))); } + +// clang-format off +const static br_uint_16 sin_table[257] = { + 0x00000, 0x00324, 0x00647, 0x0096A, 0x00C8B, 0x00FAB, 0x012C7, 0x015E1, + 0x018F8, 0x01C0B, 0x01F19, 0x02223, 0x02527, 0x02826, 0x02B1E, 0x02E10, + 0x030FB, 0x033DE, 0x036B9, 0x0398C, 0x03C56, 0x03F16, 0x041CD, 0x0447A, + 0x0471C, 0x049B3, 0x04C3F, 0x04EBF, 0x05133, 0x0539A, 0x055F4, 0x05842, + 0x05A81, 0x05CB3, 0x05ED6, 0x060EB, 0x062F1, 0x064E7, 0x066CE, 0x068A5, + 0x06A6C, 0x06C23, 0x06DC9, 0x06F5E, 0x070E1, 0x07254, 0x073B5, 0x07503, + 0x07640, 0x0776B, 0x07883, 0x07989, 0x07A7C, 0x07B5C, 0x07C29, 0x07CE2, + 0x07D89, 0x07E1C, 0x07E9C, 0x07F08, 0x07F61, 0x07FA6, 0x07FD7, 0x07FF5, + 0x07FFF, 0x07FF5, 0x07FD7, 0x07FA6, 0x07F61, 0x07F08, 0x07E9C, 0x07E1C, + 0x07D89, 0x07CE2, 0x07C29, 0x07B5C, 0x07A7C, 0x07989, 0x07883, 0x0776B, + 0x07640, 0x07503, 0x073B5, 0x07254, 0x070E1, 0x06F5E, 0x06DC9, 0x06C23, + 0x06A6C, 0x068A5, 0x066CE, 0x064E7, 0x062F1, 0x060EB, 0x05ED6, 0x05CB3, + 0x05A81, 0x05842, 0x055F4, 0x0539A, 0x05133, 0x04EBF, 0x04C3F, 0x049B3, + 0x0471C, 0x0447A, 0x041CD, 0x03F16, 0x03C56, 0x0398C, 0x036B9, 0x033DE, + 0x030FB, 0x02E10, 0x02B1E, 0x02826, 0x02527, 0x02223, 0x01F19, 0x01C0B, + 0x018F8, 0x015E1, 0x012C7, 0x00FAB, 0x00C8B, 0x0096A, 0x00647, 0x00324, + 0x00000, 0x0FCDC, 0x0F9B9, 0x0F696, 0x0F375, 0x0F055, 0x0ED39, 0x0EA1F, + 0x0E708, 0x0E3F5, 0x0E0E7, 0x0DDDD, 0x0DAD9, 0x0D7DA, 0x0D4E2, 0x0D1F0, + 0x0CF05, 0x0CC22, 0x0C947, 0x0C674, 0x0C3AA, 0x0C0EA, 0x0BE33, 0x0BB86, + 0x0B8E4, 0x0B64D, 0x0B3C1, 0x0B141, 0x0AECD, 0x0AC66, 0x0AA0C, 0x0A7BE, + 0x0A57F, 0x0A34D, 0x0A12A, 0x09F15, 0x09D0F, 0x09B19, 0x09932, 0x0975B, + 0x09594, 0x093DD, 0x09237, 0x090A2, 0x08F1F, 0x08DAC, 0x08C4B, 0x08AFD, + 0x089C0, 0x08895, 0x0877D, 0x08677, 0x08584, 0x084A4, 0x083D7, 0x0831E, + 0x08277, 0x081E4, 0x08164, 0x080F8, 0x0809F, 0x0805A, 0x08029, 0x0800B, + 0x08001, 0x0800B, 0x08029, 0x0805A, 0x0809F, 0x080F8, 0x08164, 0x081E4, + 0x08277, 0x0831E, 0x083D7, 0x084A4, 0x08584, 0x08677, 0x0877D, 0x08895, + 0x089C0, 0x08AFD, 0x08C4B, 0x08DAC, 0x08F1F, 0x090A2, 0x09237, 0x093DD, + 0x09594, 0x0975B, 0x09932, 0x09B19, 0x09D0F, 0x09F15, 0x0A12A, 0x0A34D, + 0x0A57F, 0x0A7BE, 0x0AA0C, 0x0AC66, 0x0AECD, 0x0B141, 0x0B3C1, 0x0B64D, + 0x0B8E4, 0x0BB86, 0x0BE33, 0x0C0EA, 0x0C3AA, 0x0C674, 0x0C947, 0x0CC22, + 0x0CF05, 0x0D1F0, 0x0D4E2, 0x0D7DA, 0x0DAD9, 0x0DDDD, 0x0E0E7, 0x0E3F5, + 0x0E708, 0x0EA1F, 0x0ED39, 0x0F055, 0x0F375, 0x0F696, 0x0F9B9, 0x0FCDC, + 0x00000, +}; + +const static br_uint_16 cos_table[257] = { + 0x07FFF, 0x07FF5, 0x07FD7, 0x07FA6, 0x07F61, 0x07F08, 0x07E9C, 0x07E1C, + 0x07D89, 0x07CE2, 0x07C29, 0x07B5C, 0x07A7C, 0x07989, 0x07883, 0x0776B, + 0x07640, 0x07503, 0x073B5, 0x07254, 0x070E1, 0x06F5E, 0x06DC9, 0x06C23, + 0x06A6C, 0x068A5, 0x066CE, 0x064E7, 0x062F1, 0x060EB, 0x05ED6, 0x05CB3, + 0x05A81, 0x05842, 0x055F4, 0x0539A, 0x05133, 0x04EBF, 0x04C3F, 0x049B3, + 0x0471C, 0x0447A, 0x041CD, 0x03F16, 0x03C56, 0x0398C, 0x036B9, 0x033DE, + 0x030FB, 0x02E10, 0x02B1E, 0x02826, 0x02527, 0x02223, 0x01F19, 0x01C0B, + 0x018F8, 0x015E1, 0x012C7, 0x00FAB, 0x00C8B, 0x0096A, 0x00647, 0x00324, + 0x00000, 0x0FCDC, 0x0F9B9, 0x0F696, 0x0F375, 0x0F055, 0x0ED39, 0x0EA1F, + 0x0E708, 0x0E3F5, 0x0E0E7, 0x0DDDD, 0x0DAD9, 0x0D7DA, 0x0D4E2, 0x0D1F0, + 0x0CF05, 0x0CC22, 0x0C947, 0x0C674, 0x0C3AA, 0x0C0EA, 0x0BE33, 0x0BB86, + 0x0B8E4, 0x0B64D, 0x0B3C1, 0x0B141, 0x0AECD, 0x0AC66, 0x0AA0C, 0x0A7BE, + 0x0A57F, 0x0A34D, 0x0A12A, 0x09F15, 0x09D0F, 0x09B19, 0x09932, 0x0975B, + 0x09594, 0x093DD, 0x09237, 0x090A2, 0x08F1F, 0x08DAC, 0x08C4B, 0x08AFD, + 0x089C0, 0x08895, 0x0877D, 0x08677, 0x08584, 0x084A4, 0x083D7, 0x0831E, + 0x08277, 0x081E4, 0x08164, 0x080F8, 0x0809F, 0x0805A, 0x08029, 0x0800B, + 0x08001, 0x0800B, 0x08029, 0x0805A, 0x0809F, 0x080F8, 0x08164, 0x081E4, + 0x08277, 0x0831E, 0x083D7, 0x084A4, 0x08584, 0x08677, 0x0877D, 0x08895, + 0x089C0, 0x08AFD, 0x08C4B, 0x08DAC, 0x08F1F, 0x090A2, 0x09237, 0x093DD, + 0x09594, 0x0975B, 0x09932, 0x09B19, 0x09D0F, 0x09F15, 0x0A12A, 0x0A34D, + 0x0A57F, 0x0A7BE, 0x0AA0C, 0x0AC66, 0x0AECD, 0x0B141, 0x0B3C1, 0x0B64D, + 0x0B8E4, 0x0BB86, 0x0BE33, 0x0C0EA, 0x0C3AA, 0x0C674, 0x0C947, 0x0CC22, + 0x0CF05, 0x0D1F0, 0x0D4E2, 0x0D7DA, 0x0DAD9, 0x0DDDD, 0x0E0E7, 0x0E3F5, + 0x0E708, 0x0EA1F, 0x0ED39, 0x0F055, 0x0F375, 0x0F696, 0x0F9B9, 0x0FCDC, + 0x00000, 0x00324, 0x00647, 0x0096A, 0x00C8B, 0x00FAB, 0x012C7, 0x015E1, + 0x018F8, 0x01C0B, 0x01F19, 0x02223, 0x02527, 0x02826, 0x02B1E, 0x02E10, + 0x030FB, 0x033DE, 0x036B9, 0x0398C, 0x03C56, 0x03F16, 0x041CD, 0x0447A, + 0x0471C, 0x049B3, 0x04C3F, 0x04EBF, 0x05133, 0x0539A, 0x055F4, 0x05842, + 0x05A81, 0x05CB3, 0x05ED6, 0x060EB, 0x062F1, 0x064E7, 0x066CE, 0x068A5, + 0x06A6C, 0x06C23, 0x06DC9, 0x06F5E, 0x070E1, 0x07254, 0x073B5, 0x07503, + 0x07640, 0x0776B, 0x07883, 0x07989, 0x07A7C, 0x07B5C, 0x07C29, 0x07CE2, + 0x07D89, 0x07E1C, 0x07E9C, 0x07F08, 0x07F61, 0x07FA6, 0x07FD7, 0x07FF5, + 0x07FFF, +}; +// clang-format on