From 922deca148932f79eb4da5a96f1ba564538aedf8 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Mon, 26 Apr 2021 06:56:48 -0700 Subject: [PATCH] Enable LLD_REPORT_UNDEFINED by default This makes undefined symbol errors more precise by including the name of the object that references the undefined symbol. Its also paves the way (in my mind anyway) for finally fixing reverse dependencies in a salable way. See #15982. That PR uses an alternative script for the pre-processing of dependencies but also fundamentally relies on processing JS libraries both before and after linking. The cost is about 300ms per link operation due to double processing of the JS libraries. This cost is fixed for most projects (since most project don't add a lot JS libraries over time in the way that they add native code object). I imagine even in the most pathological cases JS libraries usage will be dwarfed by native object file usage so even in those cases the native linking will likely always dominate the link time. If the 300ms extra link time causes issues, for example with cmake or autoconf, that do a lot linking of small programs, we could consider hashing the config setting and caching the result of the processing based on them. --- ChangeLog.md | 5 ++++ emcc.py | 6 ++--- src/library.js | 10 ++++---- src/settings.js | 11 +++++---- test/common.py | 2 ++ test/test_core.py | 17 ++++++-------- test/test_other.py | 49 ++++++++++++++++++++-------------------- tools/gen_struct_info.py | 1 - 8 files changed, 52 insertions(+), 49 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index e21af0489463a..f263608549f72 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -20,6 +20,11 @@ See docs/process.md for more on how version tagging works. 3.1.28 (in development) ----------------------- +- `LLD_REPORT_UNDEFINED` is now enabled by default. This makes undefined symbol + errors more precise by including the name of the object that references the + undefined symbol. The old behaviour (of allowing all undefined symbols at + wasm-ld time and reporting them later when processing JS library files) is + still available using `-sLLD_REPORT_UNDEFINED=0`. (#16003) - musl libc updated from v1.2.2 to v1.2.3. (#18270) - The default emscripten config file no longer contains `EMSCRIPTEN_ROOT`. This setting has long been completely ignored by emscripten itself. For diff --git a/emcc.py b/emcc.py index a65b95d74f2dc..7a6856dbb9d6f 100755 --- a/emcc.py +++ b/emcc.py @@ -1827,6 +1827,7 @@ def phase_linker_setup(options, state, newargs): if 'EXPORTED_FUNCTIONS' in user_settings: if '_main' not in settings.USER_EXPORTED_FUNCTIONS: settings.EXPECT_MAIN = 0 + settings.IGNORE_MISSING_MAIN = 1 else: assert not settings.EXPORTED_FUNCTIONS settings.EXPORTED_FUNCTIONS = ['_main'] @@ -1855,10 +1856,7 @@ def phase_linker_setup(options, state, newargs): if not settings.PURE_WASI and '-nostdlib' not in newargs and '-nodefaultlibs' not in newargs: default_setting('STACK_OVERFLOW_CHECK', max(settings.ASSERTIONS, settings.STACK_OVERFLOW_CHECK)) - if settings.LLD_REPORT_UNDEFINED or settings.STANDALONE_WASM: - # Reporting undefined symbols at wasm-ld time requires us to know if we have a `main` function - # or not, as does standalone wasm mode. - # TODO(sbc): Remove this once this becomes the default + if settings.STANDALONE_WASM: settings.IGNORE_MISSING_MAIN = 0 # For users that opt out of WARN_ON_UNDEFINED_SYMBOLS we assume they also diff --git a/src/library.js b/src/library.js index 9fc15c5bf45d0..a5e83540dbfe1 100644 --- a/src/library.js +++ b/src/library.js @@ -1258,17 +1258,19 @@ mergeInto(LibraryManager.library, { // built with SUPPORT_LONGJMP=1, the object file contains references of not // longjmp but _emscripten_throw_longjmp, which is called from // emscripten_longjmp. - _emscripten_throw_longjmp: function() { error('longjmp support was disabled (SUPPORT_LONGJMP=0), but it is required by the code (either set SUPPORT_LONGJMP=1, or remove uses of it in the project)'); }, get _emscripten_throw_longjmp__deps() { return this.longjmp__deps; }, #endif + _emscripten_throw_longjmp: function() { + error('longjmp support was disabled (SUPPORT_LONGJMP=0), but it is required by the code (either set SUPPORT_LONGJMP=1, or remove uses of it in the project)'); + }, // will never be emitted, as the dep errors at compile time longjmp: function(env, value) { - abort('longjmp not supported'); + abort('longjmp not supported (build with -s SUPPORT_LONGJMP)'); }, - setjmp: function(env, value) { - abort('setjmp not supported'); + setjmp: function(env) { + abort('setjmp not supported (build with -s SUPPORT_LONGJMP)'); }, #endif diff --git a/src/settings.js b/src/settings.js index 9ec1915fc2308..5aea30cd13437 100644 --- a/src/settings.js +++ b/src/settings.js @@ -1917,12 +1917,13 @@ var USE_OFFSET_CONVERTER = false; // This is enabled automatically when using -g4 with sanitizers. var LOAD_SOURCE_MAP = false; -// If set to 1, the JS compiler is run before wasm-ld so that the linker can -// report undefined symbols within the binary. Without this option the linker -// doesn't know which symbols might be defined in JS so reporting of undefined -// symbols is delayed until the JS compiler is run. +// If set to 0, delay undefined symbol report until after wasm-ld runs. This +// avoids running the the JS compiler prior to wasm-ld, but reduces the amount +// of information in the undefined symbol message (Since JS compiler cannot +// report the name of the object file that contains the reference to the +// undefined symbol). // [link] -var LLD_REPORT_UNDEFINED = false; +var LLD_REPORT_UNDEFINED = true; // Default to c++ mode even when run as `emcc` rather then `emc++`. // When this is disabled `em++` is required when compiling and linking C++ diff --git a/test/common.py b/test/common.py index df8d9b70f7070..7dc0e10a28894 100644 --- a/test/common.py +++ b/test/common.py @@ -1043,6 +1043,8 @@ def expect_fail(self, cmd, expect_traceback=False, **args): self.assertContained('Traceback', proc.stderr) elif not WINDOWS or 'Access is denied' not in proc.stderr: self.assertNotContained('Traceback', proc.stderr) + if EMTEST_VERBOSE: + sys.stderr.write(proc.stderr) return proc.stderr # excercise dynamic linker. diff --git a/test/test_core.py b/test/test_core.py index 93b4a25e407ac..106be93773f58 100644 --- a/test/test_core.py +++ b/test/test_core.py @@ -4142,8 +4142,8 @@ def test_dylink_basics_no_modify(self): self.do_basic_dylink_test() @needs_dylink - def test_dylink_basics_lld_report_undefined(self): - self.set_setting('LLD_REPORT_UNDEFINED') + def test_dylink_basics_no_lld_report_undefined(self): + self.set_setting('LLD_REPORT_UNDEFINED', 0) self.do_basic_dylink_test() @needs_dylink @@ -5143,9 +5143,6 @@ def test_dylink_rtti(self): # in the another module. # Each module will define its own copy of certain COMDAT symbols such as # each classs's typeinfo, but at runtime they should both use the same one. - # Use LLD_REPORT_UNDEFINED to test that it works as expected with weak/COMDAT - # symbols. - self.set_setting('LLD_REPORT_UNDEFINED') header = ''' #include @@ -6154,7 +6151,6 @@ def test_unistd_io(self): 'nodefs': (['NODEFS']), }) def test_unistd_misc(self, fs): - self.set_setting('LLD_REPORT_UNDEFINED') self.emcc_args += ['-D' + fs] if fs == 'NODEFS': self.require_node() @@ -9407,9 +9403,8 @@ def test_undefined_main(self): # In standalone we don't support implicitly building without main. The user has to explicitly # opt out (see below). err = self.expect_fail([EMCC, test_file('core/test_ctors_no_main.cpp')] + self.get_emcc_args()) - self.assertContained('error: undefined symbol: main/__main_argc_argv (referenced by top-level compiled C/C++ code)', err) - self.assertContained('warning: To build in STANDALONE_WASM mode without a main(), use emcc --no-entry', err) - elif not self.get_setting('LLD_REPORT_UNDEFINED') and not self.get_setting('STRICT'): + self.assertContained('undefined symbol: main', err) + elif not self.get_setting('STRICT'): # Traditionally in emscripten we allow main to be implicitly undefined. This allows programs # with a main and libraries without a main to be compiled identically. # However we are trying to move away from that model to a more explicit opt-out model. See: @@ -9427,6 +9422,9 @@ def test_undefined_main(self): self.do_core_test('test_ctors_no_main.cpp') self.clear_setting('EXPORTED_FUNCTIONS') + # Marked as impure since the WASI reactor modules (modules without main) + # are not yet suppored by the wasm engines we test against. + @also_with_standalone_wasm(impure=True) def test_undefined_main_explict(self): # If we pass --no-entry this test should compile without issue self.emcc_args.append('--no-entry') @@ -9699,7 +9697,6 @@ def setUp(self): settings={'ALLOW_MEMORY_GROWTH': 1}) # Experimental modes (not tested by CI) -lld = make_run('lld', emcc_args=[], settings={'LLD_REPORT_UNDEFINED': 1}) minimal0 = make_run('minimal0', emcc_args=['-g'], settings={'MINIMAL_RUNTIME': 1}) # TestCoreBase is just a shape for the specific subclasses, we don't test it itself diff --git a/test/test_other.py b/test/test_other.py index 729d339e6e53e..4537c6a2bcc9a 100644 --- a/test/test_other.py +++ b/test/test_other.py @@ -2160,8 +2160,8 @@ def test_undefined_symbols(self, action): print(proc.stderr) if value or action is None: # The default is that we error in undefined symbols - self.assertContained('error: undefined symbol: something', proc.stderr) - self.assertContained('error: undefined symbol: elsey', proc.stderr) + self.assertContained('undefined symbol: something', proc.stderr) + self.assertContained('undefined symbol: elsey', proc.stderr) check_success = False elif action == 'ERROR' and not value: # Error disables, should only warn @@ -3541,7 +3541,7 @@ def test_js_lib_missing_sig(self): def test_js_lib_quoted_key(self): create_file('lib.js', r''' mergeInto(LibraryManager.library, { - __internal_data:{ + __internal_data:{ '<' : 0, 'white space' : 1 }, @@ -6584,7 +6584,7 @@ def test_no_warn_exported_jslibfunc(self): err = self.expect_fail([EMCC, test_file('hello_world.c'), '-sDEFAULT_LIBRARY_FUNCS_TO_INCLUDE=alGetError', '-sEXPORTED_FUNCTIONS=_main,_alGet']) - self.assertContained('undefined exported symbol: "_alGet"', err) + self.assertContained('error: undefined exported symbol: "_alGet" [-Wundefined] [-Werror]', err) def test_musl_syscalls(self): self.run_process([EMCC, test_file('hello_world.c')]) @@ -8354,7 +8354,7 @@ def test_full_js_library(self): def test_full_js_library_undefined(self): create_file('main.c', 'void foo(); int main() { foo(); return 0; }') err = self.expect_fail([EMCC, 'main.c', '-sSTRICT_JS', '-sINCLUDE_FULL_LIBRARY']) - self.assertContained('error: undefined symbol: foo', err) + self.assertContained('undefined symbol: foo', err) def test_full_js_library_except(self): self.set_setting('INCLUDE_FULL_LIBRARY', 1) @@ -9010,19 +9010,20 @@ def test_js_preprocess(self): err = self.run_process([EMCC, test_file('hello_world.c'), '--js-library', 'lib.js'], stderr=PIPE).stderr self.assertContained('JSLIB: none of the above', err) - self.assertEqual(err.count('JSLIB'), 1) + self.assertNotContained('JSLIB: MAIN_MODULE', err) + self.assertNotContained('JSLIB: EXIT_RUNTIME', err) err = self.run_process([EMCC, test_file('hello_world.c'), '--js-library', 'lib.js', '-sMAIN_MODULE'], stderr=PIPE).stderr self.assertContained('JSLIB: MAIN_MODULE=1', err) - self.assertEqual(err.count('JSLIB'), 1) + self.assertNotContained('JSLIB: EXIT_RUNTIME', err) err = self.run_process([EMCC, test_file('hello_world.c'), '--js-library', 'lib.js', '-sMAIN_MODULE=2'], stderr=PIPE).stderr self.assertContained('JSLIB: MAIN_MODULE=2', err) - self.assertEqual(err.count('JSLIB'), 1) + self.assertNotContained('JSLIB: EXIT_RUNTIME', err) err = self.run_process([EMCC, test_file('hello_world.c'), '--js-library', 'lib.js', '-sEXIT_RUNTIME'], stderr=PIPE).stderr self.assertContained('JSLIB: EXIT_RUNTIME', err) - self.assertEqual(err.count('JSLIB'), 1) + self.assertNotContained('JSLIB: MAIN_MODULE', err) def test_html_preprocess(self): src_file = test_file('module/test_stdin.c') @@ -9195,7 +9196,7 @@ def test_dash_s_list_parsing(self): # stray slash ('EXPORTED_FUNCTIONS=["_a", "_b",\\ "_c", "_d"]', 'undefined exported symbol: "\\\\ "_c"'), # missing comma - ('EXPORTED_FUNCTIONS=["_a", "_b" "_c", "_d"]', 'undefined exported symbol: "_b" "_c"'), + ('EXPORTED_FUNCTIONS=["_a", "_b" "_c", "_d"]', 'emcc: error: undefined exported symbol: "_b" "_c" [-Wundefined] [-Werror]'), ]: print(export_arg) proc = self.run_process([EMCC, 'src.c', '-s', export_arg], stdout=PIPE, stderr=PIPE, check=not expected) @@ -10880,20 +10881,20 @@ def test_signature_mismatch(self): self.expect_fail([EMCC, '-Wl,--fatal-warnings', 'a.c', 'b.c']) self.expect_fail([EMCC, '-sSTRICT', 'a.c', 'b.c']) + # TODO(sbc): Remove these tests once we remove the LLD_REPORT_UNDEFINED def test_lld_report_undefined(self): create_file('main.c', 'void foo(); int main() { foo(); return 0; }') - stderr = self.expect_fail([EMCC, '-sLLD_REPORT_UNDEFINED', 'main.c']) - self.assertContained('wasm-ld: error:', stderr) - self.assertContained('main_0.o: undefined symbol: foo', stderr) + stderr = self.expect_fail([EMCC, '-sLLD_REPORT_UNDEFINED=0', 'main.c']) + self.assertContained('error: undefined symbol: foo (referenced by top-level compiled C/C++ code)', stderr) def test_lld_report_undefined_reverse_deps(self): - self.run_process([EMCC, '-sLLD_REPORT_UNDEFINED', '-sREVERSE_DEPS=all', test_file('hello_world.c')]) + self.run_process([EMCC, '-sLLD_REPORT_UNDEFINED=0', '-sREVERSE_DEPS=all', test_file('hello_world.c')]) def test_lld_report_undefined_exceptions(self): - self.run_process([EMXX, '-sLLD_REPORT_UNDEFINED', '-fwasm-exceptions', test_file('hello_libcxx.cpp')]) + self.run_process([EMXX, '-sLLD_REPORT_UNDEFINED=0', '-fwasm-exceptions', test_file('hello_libcxx.cpp')]) def test_lld_report_undefined_main_module(self): - self.run_process([EMCC, '-sLLD_REPORT_UNDEFINED', '-sMAIN_MODULE=2', test_file('hello_world.c')]) + self.run_process([EMCC, '-sLLD_REPORT_UNDEFINED=0', '-sMAIN_MODULE=2', test_file('hello_world.c')]) # Verifies that warning messages that Closure outputs are recorded to console def test_closure_warnings(self): @@ -11031,14 +11032,12 @@ def test_linker_version(self): def test_chained_js_error_diagnostics(self): err = self.expect_fail([EMCC, test_file('test_chained_js_error_diagnostics.c'), '--js-library', test_file('test_chained_js_error_diagnostics.js')]) self.assertContained("error: undefined symbol: nonexistent_function (referenced by bar__deps: ['nonexistent_function'], referenced by foo__deps: ['bar'], referenced by top-level compiled C/C++ code)", err) - # Check that we don't recommend LLD_REPORT_UNDEFINED for chained dependencies. - self.assertNotContained('LLD_REPORT_UNDEFINED', err) - # Test without chaining. In this case we don't include the JS library at all resulting in `foo` - # being undefined in the native code and in this case we recommend LLD_REPORT_UNDEFINED. + # Test without chaining. In this case we don't include the JS library at + # all resulting in `foo` being undefined in the native code. err = self.expect_fail([EMCC, test_file('test_chained_js_error_diagnostics.c')]) - self.assertContained('error: undefined symbol: foo (referenced by top-level compiled C/C++ code)', err) - self.assertContained('Link with `-sLLD_REPORT_UNDEFINED` to get more information on undefined symbols', err) + self.assertContained('undefined symbol: foo', err) + self.assertNotContained('referenced by top-level compiled C/C++ code', err) def test_xclang_flag(self): create_file('foo.h', ' ') @@ -11476,7 +11475,7 @@ def test_split_main_module(self): self.run_process([EMCC, side_src, '-sSIDE_MODULE', '-g', '-o', 'libhello.wasm']) - self.emcc_args += ['-g'] + self.emcc_args += ['-g', 'libhello.wasm'] self.emcc_args += ['-sMAIN_MODULE=2'] self.emcc_args += ['-sEXPORTED_FUNCTIONS=_printf'] self.emcc_args += ['-sSPLIT_MODULE', '-Wno-experimental'] @@ -11840,7 +11839,7 @@ def test_no_main_with_PROXY_TO_PTHREAD(self): void foo() {} ''') err = self.expect_fail([EMCC, 'lib.cpp', '-pthread', '-sPROXY_TO_PTHREAD']) - self.assertContained('error: PROXY_TO_PTHREAD proxies main() for you, but no main exists', err) + self.assertContained('crt1_proxy_main.o: undefined symbol: main', err) def test_archive_bad_extension(self): # Regression test for https://github.com/emscripten-core/emscripten/issues/14012 @@ -11882,7 +11881,7 @@ def test_unimplemented_syscalls(self, args): cmd = [EMCC, 'main.c', '-sASSERTIONS'] + args if args: err = self.expect_fail(cmd) - self.assertContained('error: undefined symbol: __syscall_mincore', err) + self.assertContained('undefined symbol: __syscall_mincore', err) else: self.run_process(cmd) err = self.run_js('a.out.js') diff --git a/tools/gen_struct_info.py b/tools/gen_struct_info.py index 2062db5b6ab3c..12d1da198cb94 100755 --- a/tools/gen_struct_info.py +++ b/tools/gen_struct_info.py @@ -269,7 +269,6 @@ def inspect_headers(headers, cflags): '-nostdlib', compiler_rt, '-sBOOTSTRAPPING_STRUCT_INFO', - '-sLLD_REPORT_UNDEFINED', '-sSTRICT', '-sASSERTIONS=0', # Use SINGLE_FILE so there is only a single