From ed29f033856b049e881e4a675f22567db4f86cb8 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Mon, 5 Dec 2022 19:18:48 -0800 Subject: [PATCH] Cache symbol lists used by LLD_REPORT_UNDEFINED. NFC This means that the JS libraries only only need to be processed when there is cache miss. The cost of processing the JS libraries is about 300ms on my machine which is about 30% of the link time for hello world. When there is cache hit this cost is reduced to 3ms. This change is in prepartion for switching this mode on my default in. See: #16003 --- emcc.py | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/emcc.py b/emcc.py index b7243ab3df187..af6ef607dc3b1 100755 --- a/emcc.py +++ b/emcc.py @@ -23,6 +23,8 @@ from tools.toolchain_profiler import ToolchainProfiler import base64 +import glob +import hashlib import json import logging import os @@ -39,7 +41,7 @@ import emscripten -from tools import shared, system_libs, utils, ports +from tools import shared, system_libs, utils, ports, filelock from tools import colored_logger, diagnostics, building from tools.shared import unsuffixed, unsuffixed_basename, WINDOWS, safe_copy from tools.shared import run_process, read_and_preprocess, exit_with_error, DEBUG @@ -500,8 +502,7 @@ def ensure_archive_index(archive_file): run_process([shared.LLVM_RANLIB, archive_file]) -@ToolchainProfiler.profile_block('JS symbol generation') -def get_all_js_syms(): +def generate_js_symbols(): # Runs the js compiler to generate a list of all symbols available in the JS # libraries. This must be done separately for each linker invokation since the # list of symbols depends on what settings are used. @@ -516,6 +517,56 @@ def get_all_js_syms(): if shared.is_c_symbol(name): name = shared.demangle_c_symbol_name(name) library_syms.add(name) + return library_syms + + +@ToolchainProfiler.profile_block('JS symbol generation') +def get_all_js_syms(): + # Avoiding using the cache when generating struct info since + # this step is performed while the cache is locked. + if settings.BOOTSTRAPPING_STRUCT_INFO or config.FROZEN_CACHE: + return generate_js_symbols() + + # We define a cache hit as when the settings and `--js-library` contents are + # identical. + input_files = [json.dumps(settings.dict(), sort_keys=True, indent=2)] + for jslib in sorted(glob.glob(utils.path_from_root('src') + '/library*.js')): + input_files.append(read_file(jslib)) + for jslib in settings.JS_LIBRARIES: + if not os.path.isabs(jslib): + jslib = utils.path_from_root('src', jslib) + input_files.append(read_file(jslib)) + content = '\n'.join(input_files) + content_hash = hashlib.sha1(content.encode('utf-8')).hexdigest() + + def build_symbol_list(filename): + """Only called when there is no existing symbol list for a given content hash. + """ + library_syms = generate_js_symbols() + write_file(filename, '\n'.join(library_syms) + '\n') + + # We need to use a seperate lock here for symbol lists because, unlike with system libraries, + # its normally for these file to get pruned as part of normal operation. This means that it + # can be deleted between the `cache.get()` then the `read_file`. + with filelock.FileLock(cache.get_path(cache.get_path('symbol_lists.lock'))): + filename = cache.get(f'symbol_lists/{content_hash}.txt', build_symbol_list) + library_syms = read_file(filename).splitlines() + + # Limit of the overall side of the cache to 100. + # This code will get test coverage once we make LLD_REPORT_UNDEFINED the default + # since under those cicumstances a full test run of `other` or `core` generates + # ~1000 unique symbol lists. + cache_limit = 100 + root = cache.get_path('symbol_lists') + if len(os.listdir(root)) > cache_limit: + files = [] + for f in os.listdir(root): + f = os.path.join(root, f) + files.append((f, os.path.getmtime(f))) + files.sort(key=lambda x: x[1]) + # Delete all of the newest N files + for f, _ in files[:-cache_limit]: + delete_file(f) return library_syms