Skip to content

Commit

Permalink
Cache symbol lists used by LLD_REPORT_UNDEFINED. NFC
Browse files Browse the repository at this point in the history
This means that the JS libraries only only need to be processed when
there is cache miss.  The cost of processing the JS libraries is about
300ms on my machine which is about 30% of the link time for hello
world.  When there is cache hit this cost is reduced to 3ms.

This change is in prepartion for switching this mode on my default in.

See: #16003
  • Loading branch information
sbc100 committed Dec 6, 2022
1 parent 58a52ab commit 34864ba
Showing 1 changed file with 40 additions and 2 deletions.
42 changes: 40 additions & 2 deletions emcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from tools.toolchain_profiler import ToolchainProfiler

import base64
import binascii
import json
import logging
import os
Expand Down Expand Up @@ -500,8 +501,7 @@ def ensure_archive_index(archive_file):
run_process([shared.LLVM_RANLIB, archive_file])


@ToolchainProfiler.profile_block('JS symbol generation')
def get_all_js_syms():
def generate_js_symbols():
# Runs the js compiler to generate a list of all symbols available in the JS
# libraries. This must be done separately for each linker invokation since the
# list of symbols depends on what settings are used.
Expand All @@ -516,6 +516,44 @@ def get_all_js_syms():
if shared.is_c_symbol(name):
name = shared.demangle_c_symbol_name(name)
library_syms.add(name)
return library_syms


@ToolchainProfiler.profile_block('JS symbol generation')
def get_all_js_syms():
# To avoid the cost of calling generate_js_symbols each time an executable is
# linked we cache symbol lists for the N most recently used configs.
# We define a cache hit as when the settings and `--js-library` contents are
# identical.
input_data = json.dumps(settings.dict(), sort_keys=True, indent=2) + '\n'
for jslib in settings.JS_LIBRARIES:
if os.path.abspath(jslib):
jslib = utils.path_from_root('src', jslib)
input_data += read_file(jslib)
cache_filename = None
num_cache_entries = 20

with cache.lock('js_symbol_lists'):
oldest_timestamp = 0
for i in range(num_cache_entries):
input_file = cache.get_path(f'js_symbol_list_{i}.inputs')
list_file = cache.get_path(f'js_symbol_list_{i}.txt')
if not os.path.exists(input_file) or not os.path.exists(list_file):
cache_filename = list_file
break
timestamp = os.path.getmtime(input_file)
if timestamp < oldest_timestamp or not oldest_timestamp:
oldest_timestamp = timestamp
cache_filename = list_file
if read_file(input_file) == input_data:
# Cache hit, read the symbol list from the list_file
return read_file(list_file).splitlines()

# Cache miss. Generate a new symbol list and write to the the cache
library_syms = generate_js_symbols()

write_file(cache_filename, '\n'.join(library_syms) + '\n')
write_file(shared.replace_suffix(cache_filename, '.inputs'), input_data)

return library_syms

Expand Down

0 comments on commit 34864ba

Please sign in to comment.