From 9d352ef861ed77277753ee6531d2fd69b177c532 Mon Sep 17 00:00:00 2001 From: Tianyu Liu Date: Thu, 12 Sep 2024 02:56:14 -0400 Subject: [PATCH] Intentionally leak static CUDA resources to avoid crash (part 2) (#462) The NVbench application `PARQUET_READER_NVBENCH` in libcudf currently crashes with the segmentation fault. To reproduce: ``` ./PARQUET_READER_NVBENCH -d 0 -b 1 --run-once -a io_type=FILEPATH -a compression_type=SNAPPY -a cardinality=0 -a run_length=1 ``` The root cause is that some (1) `thread_local` objects on the main thread in `libcudf` and (2) `static` objects in `kvikio` are destroyed after `cudaDeviceReset()` in NVbench and upon program termination. These objects should simply be leaked, since their destructors making CUDA calls upon program termination constitutes UB in CUDA. This simple PR is the kvikIO side of the fix. The other part is done here https://github.com/rapidsai/cudf/pull/16787. Authors: - Tianyu Liu (https://github.com/kingcrimsontianyu) Approvers: - Mads R. B. Kristensen (https://github.com/madsbk) URL: https://github.com/rapidsai/kvikio/pull/462 --- cpp/include/kvikio/posix_io.hpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/cpp/include/kvikio/posix_io.hpp b/cpp/include/kvikio/posix_io.hpp index 9e88a3e265..9a28e06eec 100644 --- a/cpp/include/kvikio/posix_io.hpp +++ b/cpp/include/kvikio/posix_io.hpp @@ -42,16 +42,14 @@ class StreamsByThread { public: StreamsByThread() = default; - ~StreamsByThread() noexcept - { - for (auto& [_, stream] : _streams) { - try { - CUDA_DRIVER_TRY(cudaAPI::instance().StreamDestroy(stream)); - } catch (const CUfileException& e) { - std::cerr << e.what() << std::endl; - } - } - } + + // Here we intentionally do not destroy in the destructor the CUDA resources + // (e.g. CUstream) with static storage duration, but instead let them leak + // on program termination. This is to prevent undefined behavior in CUDA. See + // + // This also prevents crash (segmentation fault) if clients call + // cuDevicePrimaryCtxReset() or cudaDeviceReset() before program termination. + ~StreamsByThread() = default; static CUstream get(CUcontext ctx, std::thread::id thd_id) {