Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Register the memory mapped buffer in datasource to improve H2D throughput #13814

Merged
merged 17 commits into from
Aug 16, 2023
Merged
72 changes: 67 additions & 5 deletions cpp/src/io/utilities/datasource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
#include <sys/mman.h>
#include <unistd.h>

#include <unordered_map>

namespace cudf {
namespace io {
namespace {
Expand Down Expand Up @@ -104,6 +106,27 @@ class file_source : public datasource {
static constexpr size_t _gds_read_preferred_threshold = 128 << 10; // 128KB
};

/**
* @brief Memoized pageableMemoryAccessUsesHostPageTables device property.
*/
[[nodiscard]] bool pageableMemoryAccessUsesHostPageTables()
{
static std::unordered_map<int, bool> result_cache{};

int deviceId{};
cudaGetDevice(&deviceId);
vuule marked this conversation as resolved.
Show resolved Hide resolved

if (result_cache.find(deviceId) == result_cache.end()) {
cudaDeviceProp props{};
cudaGetDeviceProperties(&props, deviceId);
vuule marked this conversation as resolved.
Show resolved Hide resolved
result_cache[deviceId] = (props.pageableMemoryAccessUsesHostPageTables == 1);
CUDF_LOG_INFO(
"Device {} pageableMemoryAccessUsesHostPageTables: {}", deviceId, result_cache[deviceId]);
}

return result_cache[deviceId];
}

/**
* @brief Implementation class for reading from a file using memory mapped access.
*
Expand All @@ -115,12 +138,18 @@ class memory_mapped_source : public file_source {
explicit memory_mapped_source(char const* filepath, size_t offset, size_t size)
: file_source(filepath)
{
if (_file.size() != 0) map(_file.desc(), offset, size);
if (_file.size() != 0) {
map(_file.desc(), offset, size);
register_mmap_buffer();
}
}

~memory_mapped_source() override
{
if (_map_addr != nullptr) { munmap(_map_addr, _map_size); }
if (_map_addr != nullptr) {
munmap(_map_addr, _map_size);
unregister_mmap_buffer();
}
}

std::unique_ptr<buffer> host_read(size_t offset, size_t size) override
Expand All @@ -147,6 +176,38 @@ class memory_mapped_source : public file_source {
}

private:
/**
* @brief Page-locks (registers) the memory range of the mapped file.
*
* Fixes nvbugs/4215160
*/
void register_mmap_buffer()
{
if (_map_addr == nullptr or _map_size == 0 or not pageableMemoryAccessUsesHostPageTables()) {
return;
}

auto const result = cudaHostRegister(_map_addr, _map_size, cudaHostRegisterDefault);
if (result == cudaSuccess) {
_is_map_registered = true;
} else {
CUDF_LOG_WARN("cudaHostRegister failed with {} ({})", result, cudaGetErrorString(result));
}
}

/**
* @brief Unregisters the memory range of the mapped file.
*/
void unregister_mmap_buffer()
{
if (not _is_map_registered) { return; }

auto const result = cudaHostUnregister(_map_addr);
if (result != cudaSuccess) {
CUDF_LOG_WARN("cudaHostUnregister failed with {} ({})", result, cudaGetErrorString(result));
}
}

void map(int fd, size_t offset, size_t size)
{
CUDF_EXPECTS(offset < _file.size(), "Offset is past end of file");
Expand All @@ -165,9 +226,10 @@ class memory_mapped_source : public file_source {
}

private:
size_t _map_size = 0;
size_t _map_offset = 0;
void* _map_addr = nullptr;
size_t _map_size = 0;
size_t _map_offset = 0;
void* _map_addr = nullptr;
bool _is_map_registered = false;
};

/**
Expand Down