Skip to content

Commit

Permalink
Register the memory mapped buffer in datasource to improve H2D thro…
Browse files Browse the repository at this point in the history
…ughput (#13814)

On systems where pageable memory uses host page tables, `cudaHostRegister` is very cheap. Since host buffer registration can improve throughput, datasource now registers the entire memory mapped buffer when host page tables are used.

This mainly impacts the CSV reader, which reads input files using a `host_read` call.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Bradley Dice (https://github.com/bdice)

URL: #13814
  • Loading branch information
vuule authored Aug 16, 2023
1 parent fdeabab commit 3f99569
Showing 1 changed file with 67 additions and 5 deletions.
72 changes: 67 additions & 5 deletions cpp/src/io/utilities/datasource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
#include <sys/mman.h>
#include <unistd.h>

#include <unordered_map>

namespace cudf {
namespace io {
namespace {
Expand Down Expand Up @@ -107,6 +109,27 @@ class file_source : public datasource {
static constexpr size_t _gds_read_preferred_threshold = 128 << 10; // 128KB
};

/**
* @brief Memoized pageableMemoryAccessUsesHostPageTables device property.
*/
[[nodiscard]] bool pageableMemoryAccessUsesHostPageTables()
{
static std::unordered_map<int, bool> result_cache{};

int deviceId{};
CUDF_CUDA_TRY(cudaGetDevice(&deviceId));

if (result_cache.find(deviceId) == result_cache.end()) {
cudaDeviceProp props{};
CUDF_CUDA_TRY(cudaGetDeviceProperties(&props, deviceId));
result_cache[deviceId] = (props.pageableMemoryAccessUsesHostPageTables == 1);
CUDF_LOG_INFO(
"Device {} pageableMemoryAccessUsesHostPageTables: {}", deviceId, result_cache[deviceId]);
}

return result_cache[deviceId];
}

/**
* @brief Implementation class for reading from a file using memory mapped access.
*
Expand All @@ -118,12 +141,18 @@ class memory_mapped_source : public file_source {
explicit memory_mapped_source(char const* filepath, size_t offset, size_t size)
: file_source(filepath)
{
if (_file.size() != 0) map(_file.desc(), offset, size);
if (_file.size() != 0) {
map(_file.desc(), offset, size);
register_mmap_buffer();
}
}

~memory_mapped_source() override
{
if (_map_addr != nullptr) { munmap(_map_addr, _map_size); }
if (_map_addr != nullptr) {
munmap(_map_addr, _map_size);
unregister_mmap_buffer();
}
}

std::unique_ptr<buffer> host_read(size_t offset, size_t size) override
Expand All @@ -150,6 +179,38 @@ class memory_mapped_source : public file_source {
}

private:
/**
* @brief Page-locks (registers) the memory range of the mapped file.
*
* Fixes nvbugs/4215160
*/
void register_mmap_buffer()
{
if (_map_addr == nullptr or _map_size == 0 or not pageableMemoryAccessUsesHostPageTables()) {
return;
}

auto const result = cudaHostRegister(_map_addr, _map_size, cudaHostRegisterDefault);
if (result == cudaSuccess) {
_is_map_registered = true;
} else {
CUDF_LOG_WARN("cudaHostRegister failed with {} ({})", result, cudaGetErrorString(result));
}
}

/**
* @brief Unregisters the memory range of the mapped file.
*/
void unregister_mmap_buffer()
{
if (not _is_map_registered) { return; }

auto const result = cudaHostUnregister(_map_addr);
if (result != cudaSuccess) {
CUDF_LOG_WARN("cudaHostUnregister failed with {} ({})", result, cudaGetErrorString(result));
}
}

void map(int fd, size_t offset, size_t size)
{
CUDF_EXPECTS(offset < _file.size(), "Offset is past end of file");
Expand All @@ -168,9 +229,10 @@ class memory_mapped_source : public file_source {
}

private:
size_t _map_size = 0;
size_t _map_offset = 0;
void* _map_addr = nullptr;
size_t _map_size = 0;
size_t _map_offset = 0;
void* _map_addr = nullptr;
bool _is_map_registered = false;
};

/**
Expand Down

0 comments on commit 3f99569

Please sign in to comment.