diff --git a/emcc.py b/emcc.py index fd26a57743c8b..7c15ad599ef64 100755 --- a/emcc.py +++ b/emcc.py @@ -23,6 +23,8 @@ from tools.toolchain_profiler import ToolchainProfiler import base64 +import glob +import hashlib import json import logging import os @@ -500,8 +502,7 @@ def ensure_archive_index(archive_file): run_process([shared.LLVM_RANLIB, archive_file]) -@ToolchainProfiler.profile_block('JS symbol generation') -def get_all_js_syms(): +def generate_js_symbols(): # Runs the js compiler to generate a list of all symbols available in the JS # libraries. This must be done separately for each linker invokation since the # list of symbols depends on what settings are used. @@ -516,6 +517,58 @@ def get_all_js_syms(): if shared.is_c_symbol(name): name = shared.demangle_c_symbol_name(name) library_syms.add(name) + return library_syms + + +@ToolchainProfiler.profile_block('JS symbol generation') +def get_all_js_syms(): + # Avoiding using the cache when generating struct info since + # this step is performed while the cache is locked. + if settings.BOOTSTRAPPING_STRUCT_INFO or config.FROZEN_CACHE: + return generate_js_symbols() + + # To avoid the cost of calling generate_js_symbols each time an executable is + # linked we cache symbol lists for the N most recently used configs. + # We define a cache hit as when the settings and `--js-library` contents are + # identical. + input_files = {} + input_files['settings.json'] = json.dumps(settings.dict(), sort_keys=True, indent=2) + for jslib in sorted(glob.glob(utils.path_from_root('src') + '/library*.js')): + input_files[jslib] = read_file(jslib) + for jslib in settings.JS_LIBRARIES: + if os.path.abspath(jslib): + jslib = utils.path_from_root('src', jslib) + input_files[jslib] = read_file(jslib) + input_data = [] + for name, content in input_files.items(): + content_hash = hashlib.sha1(content.encode('utf-8')).hexdigest() + input_data.append(f'{name}: {content_hash}') + + input_data = '\n'.join(input_data) + '\n' + cache_filename = None + num_cache_entries = 20 + + with cache.lock('js_symbol_lists'): + oldest_timestamp = 0 + for i in range(num_cache_entries): + input_file = cache.get_path(f'js_symbol_list_{i}.inputs') + list_file = cache.get_path(f'js_symbol_list_{i}.txt') + if not os.path.exists(input_file) or not os.path.exists(list_file): + cache_filename = list_file + break + timestamp = os.path.getmtime(input_file) + if timestamp < oldest_timestamp or not oldest_timestamp: + oldest_timestamp = timestamp + cache_filename = list_file + if read_file(input_file) == input_data: + # Cache hit, read the symbol list from the list_file + return read_file(list_file).splitlines() + + # Cache miss. Generate a new symbol list and write to the the cache + library_syms = generate_js_symbols() + + write_file(cache_filename, '\n'.join(library_syms) + '\n') + write_file(shared.replace_suffix(cache_filename, '.inputs'), input_data) return library_syms