From 3f995695672945060db6989370329646067cb126 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Wed, 16 Aug 2023 14:46:54 -0700 Subject: [PATCH] Register the memory mapped buffer in `datasource` to improve H2D throughput (#13814) On systems where pageable memory uses host page tables, `cudaHostRegister` is very cheap. Since host buffer registration can improve throughput, datasource now registers the entire memory mapped buffer when host page tables are used. This mainly impacts the CSV reader, which reads input files using a `host_read` call. Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - David Wendt (https://github.com/davidwendt) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/13814 --- cpp/src/io/utilities/datasource.cpp | 72 +++++++++++++++++++++++++++-- 1 file changed, 67 insertions(+), 5 deletions(-) diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp index 6186d9d9736..7a7121aa91d 100644 --- a/cpp/src/io/utilities/datasource.cpp +++ b/cpp/src/io/utilities/datasource.cpp @@ -32,6 +32,8 @@ #include #include +#include + namespace cudf { namespace io { namespace { @@ -107,6 +109,27 @@ class file_source : public datasource { static constexpr size_t _gds_read_preferred_threshold = 128 << 10; // 128KB }; +/** + * @brief Memoized pageableMemoryAccessUsesHostPageTables device property. + */ +[[nodiscard]] bool pageableMemoryAccessUsesHostPageTables() +{ + static std::unordered_map result_cache{}; + + int deviceId{}; + CUDF_CUDA_TRY(cudaGetDevice(&deviceId)); + + if (result_cache.find(deviceId) == result_cache.end()) { + cudaDeviceProp props{}; + CUDF_CUDA_TRY(cudaGetDeviceProperties(&props, deviceId)); + result_cache[deviceId] = (props.pageableMemoryAccessUsesHostPageTables == 1); + CUDF_LOG_INFO( + "Device {} pageableMemoryAccessUsesHostPageTables: {}", deviceId, result_cache[deviceId]); + } + + return result_cache[deviceId]; +} + /** * @brief Implementation class for reading from a file using memory mapped access. * @@ -118,12 +141,18 @@ class memory_mapped_source : public file_source { explicit memory_mapped_source(char const* filepath, size_t offset, size_t size) : file_source(filepath) { - if (_file.size() != 0) map(_file.desc(), offset, size); + if (_file.size() != 0) { + map(_file.desc(), offset, size); + register_mmap_buffer(); + } } ~memory_mapped_source() override { - if (_map_addr != nullptr) { munmap(_map_addr, _map_size); } + if (_map_addr != nullptr) { + munmap(_map_addr, _map_size); + unregister_mmap_buffer(); + } } std::unique_ptr host_read(size_t offset, size_t size) override @@ -150,6 +179,38 @@ class memory_mapped_source : public file_source { } private: + /** + * @brief Page-locks (registers) the memory range of the mapped file. + * + * Fixes nvbugs/4215160 + */ + void register_mmap_buffer() + { + if (_map_addr == nullptr or _map_size == 0 or not pageableMemoryAccessUsesHostPageTables()) { + return; + } + + auto const result = cudaHostRegister(_map_addr, _map_size, cudaHostRegisterDefault); + if (result == cudaSuccess) { + _is_map_registered = true; + } else { + CUDF_LOG_WARN("cudaHostRegister failed with {} ({})", result, cudaGetErrorString(result)); + } + } + + /** + * @brief Unregisters the memory range of the mapped file. + */ + void unregister_mmap_buffer() + { + if (not _is_map_registered) { return; } + + auto const result = cudaHostUnregister(_map_addr); + if (result != cudaSuccess) { + CUDF_LOG_WARN("cudaHostUnregister failed with {} ({})", result, cudaGetErrorString(result)); + } + } + void map(int fd, size_t offset, size_t size) { CUDF_EXPECTS(offset < _file.size(), "Offset is past end of file"); @@ -168,9 +229,10 @@ class memory_mapped_source : public file_source { } private: - size_t _map_size = 0; - size_t _map_offset = 0; - void* _map_addr = nullptr; + size_t _map_size = 0; + size_t _map_offset = 0; + void* _map_addr = nullptr; + bool _is_map_registered = false; }; /**