From e1622af68c4de074175baa80e3d055f729eda481 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Thu, 17 May 2018 13:16:46 +0300 Subject: [PATCH] Optimize llvm_cttz_i32() to remove the clumsy cttz_i8 lookup table to simplify code - (probably faster too, though did not benchmark; the arithmetic only form is greatly preferred anyway) --- emscripten.py | 5 ----- src/library.js | 28 ++-------------------------- tests/core/test_llvm_intrinsics.cpp | 10 ++++++++++ tests/core/test_llvm_intrinsics.out | 10 ++++++++++ tests/test_other.py | 2 +- 5 files changed, 23 insertions(+), 32 deletions(-) diff --git a/emscripten.py b/emscripten.py index 2ccc6c8dabcbe..0f4a8f28cdead 100755 --- a/emscripten.py +++ b/emscripten.py @@ -1259,11 +1259,6 @@ def create_basic_vars(exported_implemented_functions, forwarded_json, metadata, basic_vars = ['DYNAMICTOP_PTR', 'tempDoublePtr', 'ABORT'] if not (settings['WASM'] and settings['SIDE_MODULE']): basic_vars += ['STACKTOP', 'STACK_MAX'] - if metadata.get('preciseI64MathUsed'): - basic_vars += ['cttz_i8'] - else: - if forwarded_json['Functions']['libraryFunctions'].get('_llvm_cttz_i32'): - basic_vars += ['cttz_i8'] if settings['RELOCATABLE']: if not (settings['WASM'] and settings['SIDE_MODULE']): basic_vars += ['gb', 'fb'] diff --git a/src/library.js b/src/library.js index 09d835430e96b..f6d2d5698b9fd 100644 --- a/src/library.js +++ b/src/library.js @@ -1067,37 +1067,13 @@ LibraryManager.library = { return ret | 0; }, - llvm_cttz_i32__deps: [function() { - function cttz(x) { - for (var i = 0; i < 8; i++) { - if (x & (1 << i)) { - return i; - } - } - return 8; - } - if (SIDE_MODULE) return ''; // uses it from the parent - -#if USE_PTHREADS - return 'var cttz_i8; if (ENVIRONMENT_IS_PTHREAD) cttz_i8 = PthreadWorkerInit.cttz_i8; else PthreadWorkerInit.cttz_i8 = cttz_i8 = allocate([' + range(256).map(function(x) { return cttz(x) }).join(',') + '], "i8", ALLOC_STATIC);'; -#else - return 'var cttz_i8 = allocate([' + range(256).map(function(x) { return cttz(x) }).join(',') + '], "i8", ALLOC_STATIC);'; -#endif - }], #if WASM == 0 // binaryen will convert these calls to wasm anyhow llvm_cttz_i32__asm: true, #endif llvm_cttz_i32__sig: 'ii', llvm_cttz_i32: function(x) { - x = x|0; - var ret = 0; - ret = {{{ makeGetValueAsm('cttz_i8', 'x & 0xff', 'i8') }}}; - if ((ret|0) < 8) return ret|0; - ret = {{{ makeGetValueAsm('cttz_i8', '(x >> 8)&0xff', 'i8') }}}; - if ((ret|0) < 8) return (ret + 8)|0; - ret = {{{ makeGetValueAsm('cttz_i8', '(x >> 16)&0xff', 'i8') }}}; - if ((ret|0) < 8) return (ret + 16)|0; - return ({{{ makeGetValueAsm('cttz_i8', 'x >>> 24', 'i8') }}} + 24)|0; + x = x | 0; + return x ? (31 - (Math_clz32((x ^ (x - 1))) | 0) | 0) : 32; }, llvm_cttz_i64__deps: ['llvm_cttz_i32'], diff --git a/tests/core/test_llvm_intrinsics.cpp b/tests/core/test_llvm_intrinsics.cpp index 1871505343d96..e56e6529e348d 100644 --- a/tests/core/test_llvm_intrinsics.cpp +++ b/tests/core/test_llvm_intrinsics.cpp @@ -65,6 +65,16 @@ int main(void) { printf("%d,%d\n", (int)llvm_ctpop_i64((0x3101ULL << 32) | 1), llvm_ctpop_i32(0x3101)); + printf("llvm_cttz_i32:\n"); + printf("(0, 0)=%d\n", llvm_cttz_i32(0, 0)); + printf("(1, 0)=%d\n", llvm_cttz_i32(1, 0)); + printf("(2, 0)=%d\n", llvm_cttz_i32(2, 0)); + printf("(0x0000FFFF, 0)=%d\n", llvm_cttz_i32(0x0000FFFF, 0)); + printf("(0x7FFF0000, 0)=%d\n", llvm_cttz_i32(0x7FFF0000, 0)); + printf("(0xFFFF0000, 0)=%d\n", llvm_cttz_i32(0xFFFF0000, 0)); + printf("(0x7FFFFFFF, 0)=%d\n", llvm_cttz_i32(0x7FFFFFFF, 0)); + printf("(0xFFFFFFFE, 0)=%d\n", llvm_cttz_i32(0xFFFFFFFE, 0)); + printf("(0xFFFFFFFF, 0)=%d\n", llvm_cttz_i32(0xFFFFFFFF, 0)); printf("small ctlz: %d,%d\n", (int)llvm_ctlz_i8(2, 0), llvm_ctlz_i16(2, 0)); printf("llvm_ctpop_i32:\n"); diff --git a/tests/core/test_llvm_intrinsics.out b/tests/core/test_llvm_intrinsics.out index d5094b9a7e734..316cd68db2ea7 100644 --- a/tests/core/test_llvm_intrinsics.out +++ b/tests/core/test_llvm_intrinsics.out @@ -5,6 +5,16 @@ c5,de,15,8a 23,21 40,10 5,4 +llvm_cttz_i32: +(0, 0)=32 +(1, 0)=0 +(2, 0)=1 +(0x0000FFFF, 0)=0 +(0x7FFF0000, 0)=16 +(0xFFFF0000, 0)=16 +(0x7FFFFFFF, 0)=0 +(0xFFFFFFFE, 0)=1 +(0xFFFFFFFF, 0)=0 small ctlz: 6,14 llvm_ctpop_i32: 22 diff --git a/tests/test_other.py b/tests/test_other.py index b98e23cde2551..59211ae949048 100644 --- a/tests/test_other.py +++ b/tests/test_other.py @@ -7976,7 +7976,7 @@ def test(filename, expectations): 0, [], ['tempDoublePtr', 'waka'], 8, 0, 0), # totally empty! # but we don't metadce with linkable code! other modules may want it (['-O3', '-s', 'MAIN_MODULE=1'], - 1542, ['invoke_i'], ['waka'], 496958, 168, 2558), + 1541, ['invoke_i'], ['waka'], 496958, 168, 2558), ]) print('test on a minimal pure computational thing')