From 169084898a9bdeac983f4b53bf5a217ab3ba42dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Damian=20Kr=C3=B3lik?= <66667989+Damian-Nordic@users.noreply.github.com> Date: Tue, 30 Jul 2024 20:10:51 +0200 Subject: [PATCH] [Zephyr] Fix build issues with LTO (#34633) * [Zephyr] Fix sys_heap malloc build with LTO Fix the build of Matter samples with sys_heap malloc enabled. The linker errors would occur because: 1. The realloc() function is not used outside the LTO- optimized code. 2. Thus, the LTO linker plugin would mark __wrap_realloc symbol as PREVAILING_DEF_IRONLY, which would enable the compiler to optimize away the function. 3. As a result, undefined references to realloc() could not be resolved by the linker. Mark malloc/calloc/realloc/free as externally visible. Signed-off-by: Damian Krolik * [nrfconnect] Do not link libCHIPShell.a with --whole-archive libCHIPShell.a is already part of libCHIP.a but the former had to be linked additionally with --whole-archive flag to process the shell and init objects defined with SHELL_XXX and SYS_INIT Zephyr macros and, in turn, register Matter shell commands properly. This symbol duplication between the two libraries causes issues when building Matter with LTO, so replace the current approach with explicitly pulling in one symbol from MainLoopZephyr.cpp file. Signed-off-by: Damian Krolik * [nrfconnect] Build all clusters app for nRF7002 with LTO The flash usage on this app/platform is close to the limit. Enable LTO to prevent from blocking the incoming PRs. Signed-off-by: Damian Krolik --------- Signed-off-by: Damian Krolik --- config/nrfconnect/chip-module/CMakeLists.txt | 11 +++++- .../nrfconnect/prj_release.conf | 4 +++ src/lib/shell/MainLoopZephyr.cpp | 4 +-- src/platform/Zephyr/SysHeapMalloc.cpp | 35 +++++++++++-------- 4 files changed, 37 insertions(+), 17 deletions(-) diff --git a/config/nrfconnect/chip-module/CMakeLists.txt b/config/nrfconnect/chip-module/CMakeLists.txt index bccd16b1416160..20c5c692b68633 100644 --- a/config/nrfconnect/chip-module/CMakeLists.txt +++ b/config/nrfconnect/chip-module/CMakeLists.txt @@ -200,7 +200,6 @@ matter_generate_args_tmp_file() # ============================================================================== matter_build(chip - LIB_SHELL ${CONFIG_CHIP_LIB_SHELL} LIB_TESTS ${CONFIG_CHIP_BUILD_TESTS} DEVICE_INFO_EXAMPLE_PROVIDER ${CONFIG_CHIP_EXAMPLE_DEVICE_INFO_PROVIDER} GN_DEPENDENCIES kernel @@ -225,6 +224,16 @@ if (CONFIG_CHIP_MALLOC_SYS_HEAP_OVERRIDE) ) endif() +if (CONFIG_CHIP_LIB_SHELL) + # Force pulling chip::Shell::Engine::RunMainLoop() in the final binary. + # Without this workaround, the linker script does not process the shell and + # init objects defined in MainLoopZephyr.cpp unless the Matter library or + # the Matter shell library is linked using the '--whole-archive' flag. + target_link_options(chip INTERFACE + -Wl,-u,_ZN4chip5Shell6Engine11RunMainLoopEv + ) +endif() + # ============================================================================== # Define 'chip-ota-image' target for building CHIP OTA image # ============================================================================== diff --git a/examples/all-clusters-app/nrfconnect/prj_release.conf b/examples/all-clusters-app/nrfconnect/prj_release.conf index b98004283b64b9..18685a7488fce5 100644 --- a/examples/all-clusters-app/nrfconnect/prj_release.conf +++ b/examples/all-clusters-app/nrfconnect/prj_release.conf @@ -62,3 +62,7 @@ CONFIG_CHIP_FACTORY_DATA_BUILD=y # Enable the Read Client for binding purposes CONFIG_CHIP_ENABLE_READ_CLIENT=y + +# Enable LTO to reduce the flash usage +CONFIG_LTO=y +CONFIG_ISR_TABLES_LOCAL_DECLARATION=y diff --git a/src/lib/shell/MainLoopZephyr.cpp b/src/lib/shell/MainLoopZephyr.cpp index 84fca4ef9d29af..108ed2e4385ad7 100644 --- a/src/lib/shell/MainLoopZephyr.cpp +++ b/src/lib/shell/MainLoopZephyr.cpp @@ -91,7 +91,7 @@ int ExecCommandInShellThread(const struct shell * shell, size_t argc, char ** ar return error == CHIP_NO_ERROR ? 0 : -ENOEXEC; } -int RegisterCommands() +int RegisterMatterCommands() { Shell::Engine::Root().RegisterDefaultCommands(); return 0; @@ -99,7 +99,7 @@ int RegisterCommands() } // namespace -SYS_INIT(RegisterCommands, POST_KERNEL, CONFIG_KERNEL_INIT_PRIORITY_DEFAULT); +SYS_INIT(RegisterMatterCommands, POST_KERNEL, CONFIG_KERNEL_INIT_PRIORITY_DEFAULT); SHELL_CMD_ARG_REGISTER(matter, NULL, "Matter commands", ExecCommandInShellThread, 1, CHIP_SHELL_MAX_TOKENS); namespace chip { diff --git a/src/platform/Zephyr/SysHeapMalloc.cpp b/src/platform/Zephyr/SysHeapMalloc.cpp index d4f65b05459615..fbd7ba09fc2493 100644 --- a/src/platform/Zephyr/SysHeapMalloc.cpp +++ b/src/platform/Zephyr/SysHeapMalloc.cpp @@ -30,9 +30,6 @@ extern "C" { #include #include -// Construct name of the given function wrapped with the `--wrap=symbol` GCC option. -#define WRAP(f) __wrap_##f - using namespace chip; namespace { @@ -67,7 +64,7 @@ LockGuard::~LockGuard() } } -int initHeap() +int InitSysHeapMalloc() { sys_heap_init(&sHeap, sHeapMemory, sizeof(sHeapMemory)); return 0; @@ -77,7 +74,7 @@ int initHeap() // Initialize the heap in the POST_KERNEL phase to make sure that it is ready even before // C++ static constructors are called (which happens prior to the APPLICATION initialization phase). -SYS_INIT(initHeap, POST_KERNEL, CONFIG_KERNEL_INIT_PRIORITY_DEFAULT); +SYS_INIT(InitSysHeapMalloc, POST_KERNEL, CONFIG_KERNEL_INIT_PRIORITY_DEFAULT); namespace chip { namespace DeviceLayer { @@ -99,7 +96,7 @@ void * Calloc(size_t num, size_t size) return nullptr; } - void * mem = malloc(totalSize); + void * mem = Malloc(totalSize); if (mem) { @@ -156,27 +153,37 @@ void ResetMaxStats() extern "C" { -void * WRAP(malloc)(size_t size) __attribute((alias("_ZN4chip11DeviceLayer6Malloc6MallocEj"))); -void * WRAP(calloc)(size_t num, size_t size) __attribute((alias("_ZN4chip11DeviceLayer6Malloc6CallocEjj"))); -void * WRAP(realloc)(void * mem, size_t size) __attribute((alias("_ZN4chip11DeviceLayer6Malloc7ReallocEPvj"))); -void WRAP(free)(void * mem) __attribute((alias("_ZN4chip11DeviceLayer6Malloc4FreeEPv"))); +// Construct the name of a function wrapped with the `--wrap=symbol` GCC option. +#define WRAP(f) __wrap_##f + +// Define a function as an alias of another function. +#define ALIAS(f) __attribute((alias(f))) + +// Mark a function as externally visible so that it is not optimized-away even +// if LTO or whole-program optimization is enabled. +#define EXTERNALLY_VISIBLE __attribute((externally_visible)) + +EXTERNALLY_VISIBLE void * WRAP(malloc)(size_t size) ALIAS("_ZN4chip11DeviceLayer6Malloc6MallocEj"); +EXTERNALLY_VISIBLE void * WRAP(calloc)(size_t num, size_t size) ALIAS("_ZN4chip11DeviceLayer6Malloc6CallocEjj"); +EXTERNALLY_VISIBLE void * WRAP(realloc)(void * mem, size_t size) ALIAS("_ZN4chip11DeviceLayer6Malloc7ReallocEPvj"); +EXTERNALLY_VISIBLE void WRAP(free)(void * mem) ALIAS("_ZN4chip11DeviceLayer6Malloc4FreeEPv"); -void * WRAP(_malloc_r)(_reent *, size_t size) +EXTERNALLY_VISIBLE void * WRAP(_malloc_r)(_reent *, size_t size) { return WRAP(malloc)(size); } -void * WRAP(_calloc_r)(_reent *, size_t num, size_t size) +EXTERNALLY_VISIBLE void * WRAP(_calloc_r)(_reent *, size_t num, size_t size) { return WRAP(calloc)(num, size); } -void * WRAP(_realloc_r)(_reent *, void * mem, size_t size) +EXTERNALLY_VISIBLE void * WRAP(_realloc_r)(_reent *, void * mem, size_t size) { return WRAP(realloc)(mem, size); } -void WRAP(_free_r)(_reent *, void * mem) +EXTERNALLY_VISIBLE void WRAP(_free_r)(_reent *, void * mem) { WRAP(free)(mem); }