From 34864baf6e507af940625187b52f2606658236f3 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Mon, 5 Dec 2022 19:18:48 -0800 Subject: [PATCH] Cache symbol lists used by LLD_REPORT_UNDEFINED. NFC This means that the JS libraries only only need to be processed when there is cache miss. The cost of processing the JS libraries is about 300ms on my machine which is about 30% of the link time for hello world. When there is cache hit this cost is reduced to 3ms. This change is in prepartion for switching this mode on my default in. See: #16003 --- emcc.py | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/emcc.py b/emcc.py index 60ea9c1f91762..d15ed6eeb843f 100755 --- a/emcc.py +++ b/emcc.py @@ -23,6 +23,7 @@ from tools.toolchain_profiler import ToolchainProfiler import base64 +import binascii import json import logging import os @@ -500,8 +501,7 @@ def ensure_archive_index(archive_file): run_process([shared.LLVM_RANLIB, archive_file]) -@ToolchainProfiler.profile_block('JS symbol generation') -def get_all_js_syms(): +def generate_js_symbols(): # Runs the js compiler to generate a list of all symbols available in the JS # libraries. This must be done separately for each linker invokation since the # list of symbols depends on what settings are used. @@ -516,6 +516,44 @@ def get_all_js_syms(): if shared.is_c_symbol(name): name = shared.demangle_c_symbol_name(name) library_syms.add(name) + return library_syms + + +@ToolchainProfiler.profile_block('JS symbol generation') +def get_all_js_syms(): + # To avoid the cost of calling generate_js_symbols each time an executable is + # linked we cache symbol lists for the N most recently used configs. + # We define a cache hit as when the settings and `--js-library` contents are + # identical. + input_data = json.dumps(settings.dict(), sort_keys=True, indent=2) + '\n' + for jslib in settings.JS_LIBRARIES: + if os.path.abspath(jslib): + jslib = utils.path_from_root('src', jslib) + input_data += read_file(jslib) + cache_filename = None + num_cache_entries = 20 + + with cache.lock('js_symbol_lists'): + oldest_timestamp = 0 + for i in range(num_cache_entries): + input_file = cache.get_path(f'js_symbol_list_{i}.inputs') + list_file = cache.get_path(f'js_symbol_list_{i}.txt') + if not os.path.exists(input_file) or not os.path.exists(list_file): + cache_filename = list_file + break + timestamp = os.path.getmtime(input_file) + if timestamp < oldest_timestamp or not oldest_timestamp: + oldest_timestamp = timestamp + cache_filename = list_file + if read_file(input_file) == input_data: + # Cache hit, read the symbol list from the list_file + return read_file(list_file).splitlines() + + # Cache miss. Generate a new symbol list and write to the the cache + library_syms = generate_js_symbols() + + write_file(cache_filename, '\n'.join(library_syms) + '\n') + write_file(shared.replace_suffix(cache_filename, '.inputs'), input_data) return library_syms