From 650806333e6c595b1ba9907a5529cfc8917e2121 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 31 Aug 2023 12:10:19 -0500 Subject: [PATCH] [libc] Support 'assert.h' on the GPU This patch adds the necessary support to provide `assert` functionality through the GPU `libc` implementation. This implementation creates a special-case GPU implementation rather than relying on the common version. This is because the GPU has special considerings for printing. The assertion is printed out in chunks with `write_to_stderr`, however when combined with the GPU execution model this causes 32+ threads to all execute in-lock step. Meaning that we'll get a horribly fragmented message. Furthermore, potentially thousands of threads could hit the assertion at once and try to print even if we had it all in one `printf`. This is solved by having a one-time lock that each thread group / wave / warp will attempt to claim. We only let one thread group pass through while the others simply stop executing. Finally only the first thread in that group will do the printing until we finally abort execution. Reviewed By: sivachandra Differential Revision: https://reviews.llvm.org/D159296 commit-id:a8e5550c --- libc/config/gpu/api.td | 32 +++++++++++++ libc/config/gpu/entrypoints.txt | 3 ++ libc/config/gpu/headers.txt | 1 + libc/docs/gpu/support.rst | 12 ++++- libc/src/__support/GPU/utils.h | 15 +++++++ libc/src/__support/RPC/rpc.h | 4 +- libc/src/__support/RPC/rpc_util.h | 10 ----- libc/src/assert/CMakeLists.txt | 22 +++++---- libc/src/assert/generic/CMakeLists.txt | 12 +++++ .../assert/{ => generic}/__assert_fail.cpp | 0 libc/src/assert/gpu/CMakeLists.txt | 14 ++++++ libc/src/assert/gpu/__assert_fail.cpp | 45 +++++++++++++++++++ 12 files changed, 149 insertions(+), 21 deletions(-) create mode 100644 libc/src/assert/generic/CMakeLists.txt rename libc/src/assert/{ => generic}/__assert_fail.cpp (100%) create mode 100644 libc/src/assert/gpu/CMakeLists.txt create mode 100644 libc/src/assert/gpu/__assert_fail.cpp diff --git a/libc/config/gpu/api.td b/libc/config/gpu/api.td index 4435ded2aa45e4..f8db0742ac8292 100644 --- a/libc/config/gpu/api.td +++ b/libc/config/gpu/api.td @@ -3,6 +3,38 @@ include "config/public_api.td" include "spec/stdc.td" include "spec/posix.td" include "spec/gpu_ext.td" +include "spec/gnu_ext.td" +include "spec/llvm_libc_ext.td" + +def AssertMacro : MacroDef<"assert"> { + let Defn = [{ + #undef assert + + #ifdef NDEBUG + #define assert(e) (void)0 + #else + + #define assert(e) \ + ((e) ? (void)0 : __assert_fail(#e, __FILE__, __LINE__, __PRETTY_FUNCTION__)) + #endif + }]; +} + +def StaticAssertMacro : MacroDef<"static_assert"> { + let Defn = [{ + #ifndef __cplusplus + #undef static_assert + #define static_assert _Static_assert + #endif + }]; +} + +def AssertAPI : PublicAPI<"assert.h"> { + let Macros = [ + AssertMacro, + StaticAssertMacro, + ]; +} def StringAPI : PublicAPI<"string.h"> { let Types = ["size_t"]; diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt index 1d3047ab3c146b..bf59ba6b0a3eaa 100644 --- a/libc/config/gpu/entrypoints.txt +++ b/libc/config/gpu/entrypoints.txt @@ -1,4 +1,7 @@ set(TARGET_LIBC_ENTRYPOINTS + # assert.h entrypoints + libc.src.assert.__assert_fail + # ctype.h entrypoints libc.src.ctype.isalnum libc.src.ctype.isalpha diff --git a/libc/config/gpu/headers.txt b/libc/config/gpu/headers.txt index 3a8f091cda413a..dae01310fe9c31 100644 --- a/libc/config/gpu/headers.txt +++ b/libc/config/gpu/headers.txt @@ -1,4 +1,5 @@ set(TARGET_PUBLIC_HEADERS + libc.include.assert libc.include.ctype libc.include.string libc.include.inttypes diff --git a/libc/docs/gpu/support.rst b/libc/docs/gpu/support.rst index d1c27c7e8032d7..fbe69c66ca53b4 100644 --- a/libc/docs/gpu/support.rst +++ b/libc/docs/gpu/support.rst @@ -130,7 +130,7 @@ fopen |check| |check| fread |check| |check| ============= ========= ============ -stdio.h +time.h -------- ============= ========= ============ @@ -139,3 +139,13 @@ Function Name Available RPC Required clock |check| nanosleep |check| ============= ========= ============ + +assert.h +-------- + +============= ========= ============ +Function Name Available RPC Required +============= ========= ============ +assert |check| |check| +__assert_fail |check| |check| +============= ========= ============ diff --git a/libc/src/__support/GPU/utils.h b/libc/src/__support/GPU/utils.h index f3277f42a32d20..07de9d72fbe62c 100644 --- a/libc/src/__support/GPU/utils.h +++ b/libc/src/__support/GPU/utils.h @@ -19,4 +19,19 @@ #include "generic/utils.h" #endif +namespace __llvm_libc { +namespace gpu { +/// Get the first active thread inside the lane. +LIBC_INLINE uint64_t get_first_lane_id(uint64_t lane_mask) { + return __builtin_ffsl(lane_mask) - 1; +} + +/// Conditional that is only true for a single thread in a lane. +LIBC_INLINE bool is_first_lane(uint64_t lane_mask) { + return gpu::get_lane_id() == get_first_lane_id(lane_mask); +} + +} // namespace gpu +} // namespace __llvm_libc + #endif // LLVM_LIBC_SRC_SUPPORT_OSUTIL_IO_H diff --git a/libc/src/__support/RPC/rpc.h b/libc/src/__support/RPC/rpc.h index 49336fbc0332d3..fc95e5edf1c720 100644 --- a/libc/src/__support/RPC/rpc.h +++ b/libc/src/__support/RPC/rpc.h @@ -214,7 +214,7 @@ template struct Process { // restrict to a single thread to avoid one thread dropping the lock, then // an unrelated warp claiming the lock, then a second thread in this warp // dropping the lock again. - clear_nth(lock, index, rpc::is_first_lane(lane_mask)); + clear_nth(lock, index, gpu::is_first_lane(lane_mask)); gpu::sync_lane(lane_mask); } @@ -546,7 +546,7 @@ template LIBC_INLINE Client::Port Client::open() { continue; } - if (is_first_lane(lane_mask)) { + if (gpu::is_first_lane(lane_mask)) { process.packet[index].header.opcode = opcode; process.packet[index].header.mask = lane_mask; } diff --git a/libc/src/__support/RPC/rpc_util.h b/libc/src/__support/RPC/rpc_util.h index 1e2c53880cb747..8f500369541de9 100644 --- a/libc/src/__support/RPC/rpc_util.h +++ b/libc/src/__support/RPC/rpc_util.h @@ -30,16 +30,6 @@ LIBC_INLINE void sleep_briefly() { #endif } -/// Get the first active thread inside the lane. -LIBC_INLINE uint64_t get_first_lane_id(uint64_t lane_mask) { - return __builtin_ffsl(lane_mask) - 1; -} - -/// Conditional that is only true for a single thread in a lane. -LIBC_INLINE bool is_first_lane(uint64_t lane_mask) { - return gpu::get_lane_id() == get_first_lane_id(lane_mask); -} - /// Conditional to indicate if this process is running on the GPU. LIBC_INLINE constexpr bool is_process_gpu() { #if defined(LIBC_TARGET_ARCH_IS_GPU) diff --git a/libc/src/assert/CMakeLists.txt b/libc/src/assert/CMakeLists.txt index 57ff9e97aa2635..cb81e3b68b1dce 100644 --- a/libc/src/assert/CMakeLists.txt +++ b/libc/src/assert/CMakeLists.txt @@ -1,12 +1,18 @@ +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS}) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS}) +else() + add_subdirectory(generic) +endif() + +if(TARGET libc.src.assert.${LIBC_TARGET_OS}.__assert_fail) + set(assert_fail_dep libc.src.assert.${LIBC_TARGET_OS}.__assert_fail) +else() + set(assert_fail_dep libc.src.assert.generic.__assert_fail) +endif() + add_entrypoint_object( __assert_fail - SRCS - __assert_fail.cpp - HDRS - __assert_fail.h - assert.h + ALIAS DEPENDS - libc.include.assert - libc.src.__support.OSUtil.osutil - libc.src.stdlib.abort + ${assert_fail_dep} ) diff --git a/libc/src/assert/generic/CMakeLists.txt b/libc/src/assert/generic/CMakeLists.txt new file mode 100644 index 00000000000000..387ab32be2741c --- /dev/null +++ b/libc/src/assert/generic/CMakeLists.txt @@ -0,0 +1,12 @@ +add_entrypoint_object( + __assert_fail + SRCS + __assert_fail.cpp + HDRS + ../__assert_fail.h + ../assert.h + DEPENDS + libc.include.assert + libc.src.__support.OSUtil.osutil + libc.src.stdlib.abort +) diff --git a/libc/src/assert/__assert_fail.cpp b/libc/src/assert/generic/__assert_fail.cpp similarity index 100% rename from libc/src/assert/__assert_fail.cpp rename to libc/src/assert/generic/__assert_fail.cpp diff --git a/libc/src/assert/gpu/CMakeLists.txt b/libc/src/assert/gpu/CMakeLists.txt new file mode 100644 index 00000000000000..3a4a0c7d10cfbe --- /dev/null +++ b/libc/src/assert/gpu/CMakeLists.txt @@ -0,0 +1,14 @@ +add_entrypoint_object( + __assert_fail + SRCS + __assert_fail.cpp + HDRS + ../__assert_fail.h + ../assert.h + DEPENDS + libc.include.assert + libc.src.__support.OSUtil.osutil + libc.src.__support.GPU.utils + libc.src.__support.CPP.atomic + libc.src.stdlib.abort +) diff --git a/libc/src/assert/gpu/__assert_fail.cpp b/libc/src/assert/gpu/__assert_fail.cpp new file mode 100644 index 00000000000000..b8ee168b069d16 --- /dev/null +++ b/libc/src/assert/gpu/__assert_fail.cpp @@ -0,0 +1,45 @@ +//===-- GPU definition of a libc internal assert macro ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/assert/__assert_fail.h" + +#include "src/__support/CPP/atomic.h" +#include "src/__support/GPU/utils.h" +#include "src/__support/libc_assert.h" +#include "src/stdlib/abort.h" + +namespace __llvm_libc { + +// A single-use lock to allow only a single thread to print the assertion. +static cpp::Atomic lock = 0; + +LLVM_LIBC_FUNCTION(void, __assert_fail, + (const char *assertion, const char *file, unsigned line, + const char *function)) { + uint64_t mask = gpu::get_lane_mask(); + // We only want a single work group or warp to handle the assertion. Each + // group attempts to claim the lock, if it is already claimed we simply exit. + uint32_t claimed = gpu::is_first_lane(mask) + ? !lock.fetch_or(1, cpp::MemoryOrder::ACQUIRE) + : 0; + if (!gpu::broadcast_value(mask, claimed)) { +#if defined(LIBC_TARGET_ARCH_IS_NVPTX) + LIBC_INLINE_ASM("exit;" ::: "memory"); +#elif defined(LIBC_TARGET_ARCH_IS_AMDGPU) + __builtin_amdgcn_endpgm(); +#endif + __builtin_unreachable(); + } + + // Only a single line should be printed if an assertion is hit. + if (gpu::is_first_lane(mask)) + __llvm_libc::report_assertion_failure(assertion, file, line, function); + __llvm_libc::abort(); +} + +} // namespace __llvm_libc