Skip to content

Commit

Permalink
Pinned vector factory that uses the global pool (#15895)
Browse files Browse the repository at this point in the history
closes #15612
Expanded the set of vector factories to cover pinned vectors. The functions return `cudf::detail::host_vector`, which use a type-erased allocator, allowing us to utilize the runtime configurable global pinned (previously host) resource.
The `pinned_host_vector` type has been removed as it can only support the non-pooled pinned allocations. Its use is not replaced with `cudf::detail::host_vector`.
Moved the global host (now pinned) resource out of cuIO and changed the type to host_device. User-specified resources are now required to allocate device-accessible memory. The name has been changed to pinned to reflect the new requirement.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Alessandro Bellina (https://github.com/abellina)
  - Yunsong Wang (https://github.com/PointKernel)
  - Mark Harris (https://github.com/harrism)
  - David Wendt (https://github.com/davidwendt)

URL: #15895
  • Loading branch information
vuule authored Jun 12, 2024
1 parent d2cd1d4 commit f7ba6ab
Show file tree
Hide file tree
Showing 28 changed files with 487 additions and 637 deletions.
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,7 @@ add_library(
src/utilities/default_stream.cpp
src/utilities/linked_column.cpp
src/utilities/logger.cpp
src/utilities/pinned_memory.cpp
src/utilities/stacktrace.cpp
src/utilities/stream_pool.cpp
src/utilities/traits.cpp
Expand Down
13 changes: 7 additions & 6 deletions cpp/benchmarks/fixture/nvbench_fixture.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
*/
#pragma once

#include <cudf/io/memory_resource.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/pinned_memory.hpp>

#include <rmm/cuda_device.hpp>
#include <rmm/mr/device/arena_memory_resource.hpp>
Expand Down Expand Up @@ -81,17 +81,18 @@ struct nvbench_base_fixture {
"\nExpecting: cuda, pool, async, arena, managed, or managed_pool");
}

inline rmm::host_async_resource_ref make_cuio_host_pinned()
inline rmm::host_device_async_resource_ref make_cuio_host_pinned()
{
static std::shared_ptr<rmm::mr::pinned_host_memory_resource> mr =
std::make_shared<rmm::mr::pinned_host_memory_resource>();
return *mr;
}

inline rmm::host_async_resource_ref create_cuio_host_memory_resource(std::string const& mode)
inline rmm::host_device_async_resource_ref create_cuio_host_memory_resource(
std::string const& mode)
{
if (mode == "pinned") return make_cuio_host_pinned();
if (mode == "pinned_pool") return cudf::io::get_host_memory_resource();
if (mode == "pinned_pool") return cudf::get_pinned_memory_resource();
CUDF_FAIL("Unknown cuio_host_mem parameter: " + mode + "\nExpecting: pinned or pinned_pool");
}

Expand All @@ -112,14 +113,14 @@ struct nvbench_base_fixture {
rmm::mr::set_current_device_resource(mr.get());
std::cout << "RMM memory resource = " << rmm_mode << "\n";

cudf::io::set_host_memory_resource(create_cuio_host_memory_resource(cuio_host_mode));
cudf::set_pinned_memory_resource(create_cuio_host_memory_resource(cuio_host_mode));
std::cout << "CUIO host memory resource = " << cuio_host_mode << "\n";
}

~nvbench_base_fixture()
{
// Ensure the the pool is freed before the CUDA context is destroyed:
cudf::io::set_host_memory_resource(this->make_cuio_host_pinned());
cudf::set_pinned_memory_resource(this->make_cuio_host_pinned());
}

std::shared_ptr<rmm::mr::device_memory_resource> mr;
Expand Down
12 changes: 12 additions & 0 deletions cpp/benchmarks/io/cuio_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
#include <cudf/detail/utilities/integer_utils.hpp>
#include <cudf/detail/utilities/logger.hpp>

#include <rmm/mr/pinned_host_memory_resource.hpp>
#include <rmm/resource_ref.hpp>

#include <unistd.h>

#include <cstdio>
Expand All @@ -28,6 +31,14 @@

temp_directory const cuio_source_sink_pair::tmpdir{"cudf_gbench"};

// Don't use cudf's pinned pool for the source data
rmm::host_async_resource_ref pinned_memory_resource()
{
static rmm::mr::pinned_host_memory_resource mr = rmm::mr::pinned_host_memory_resource{};

return mr;
}

std::string random_file_in_dir(std::string const& dir_path)
{
// `mkstemp` modifies the template in place
Expand All @@ -41,6 +52,7 @@ std::string random_file_in_dir(std::string const& dir_path)

cuio_source_sink_pair::cuio_source_sink_pair(io_type type)
: type{type},
pinned_buffer({pinned_memory_resource(), cudf::get_default_stream()}),
d_buffer{0, cudf::get_default_stream()},
file_name{random_file_in_dir(tmpdir.path())},
void_sink{cudf::io::data_sink::create()}
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/io/cuio_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

#include <cudf_test/file_utilities.hpp>

#include <cudf/detail/utilities/pinned_host_vector.hpp>
#include <cudf/detail/utilities/host_vector.hpp>
#include <cudf/io/data_sink.hpp>
#include <cudf/io/datasource.hpp>

Expand Down Expand Up @@ -79,7 +79,7 @@ class cuio_source_sink_pair {

io_type const type;
std::vector<char> h_buffer;
cudf::detail::pinned_host_vector<char> pinned_buffer;
cudf::detail::host_vector<char> pinned_buffer;
rmm::device_uvector<std::byte> d_buffer;
std::string const file_name;
std::unique_ptr<cudf::io::data_sink> void_sink;
Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
#include <benchmarks/io/nvbench_helpers.hpp>

#include <cudf/detail/utilities/stream_pool.hpp>
#include <cudf/io/memory_resource.hpp>
#include <cudf/io/parquet.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/pinned_memory.hpp>
#include <cudf/utilities/thread_pool.hpp>

#include <nvtx3/nvtx3.hpp>
Expand Down
10 changes: 5 additions & 5 deletions cpp/benchmarks/io/text/multibyte_split.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -22,7 +22,6 @@
#include <cudf_test/file_utilities.hpp>

#include <cudf/column/column_factories.hpp>
#include <cudf/detail/utilities/pinned_host_vector.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/io/text/data_chunk_source_factories.hpp>
#include <cudf/io/text/detail/bgzip_utils.hpp>
Expand Down Expand Up @@ -132,9 +131,10 @@ static void bench_multibyte_split(nvbench::state& state,

auto const delim_factor = static_cast<double>(delim_percent) / 100;
std::unique_ptr<cudf::io::datasource> datasource;
auto device_input = create_random_input(file_size_approx, delim_factor, 0.05, delim);
auto host_input = std::vector<char>{};
auto host_pinned_input = cudf::detail::pinned_host_vector<char>{};
auto device_input = create_random_input(file_size_approx, delim_factor, 0.05, delim);
auto host_input = std::vector<char>{};
auto host_pinned_input =
cudf::detail::make_pinned_vector_async<char>(0, cudf::get_default_stream());

if (source_type != data_chunk_source_type::device &&
source_type != data_chunk_source_type::host_pinned) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/error.hpp>

#include <rmm/aligned.hpp>
#include <rmm/resource_ref.hpp>

#include <thrust/host_vector.h>
Expand All @@ -32,8 +33,6 @@ namespace cudf::detail {
/*! \p rmm_host_allocator is a CUDA-specific host memory allocator
* that employs \c a `rmm::host_async_resource_ref` for allocation.
*
* This implementation is ported from pinned_host_vector in cudf.
*
* \see https://en.cppreference.com/w/cpp/memory/allocator
*/
template <typename T>
Expand All @@ -42,8 +41,6 @@ class rmm_host_allocator;
/*! \p rmm_host_allocator is a CUDA-specific host memory allocator
* that employs \c an `cudf::host_async_resource_ref` for allocation.
*
* This implementation is ported from pinned_host_vector in cudf.
*
* \see https://en.cppreference.com/w/cpp/memory/allocator
*/
template <>
Expand All @@ -70,8 +67,7 @@ class rmm_host_allocator<void> {
* The \p rmm_host_allocator provides an interface for host memory allocation through the user
* provided \c `rmm::host_async_resource_ref`. The \p rmm_host_allocator does not take ownership of
* this reference and therefore it is the user's responsibility to ensure its lifetime for the
* duration of the lifetime of the \p rmm_host_allocator. This implementation is ported from
* pinned_host_vector in cudf.
* duration of the lifetime of the \p rmm_host_allocator.
*
* \see https://en.cppreference.com/w/cpp/memory/allocator
*/
Expand Down Expand Up @@ -121,8 +117,12 @@ class rmm_host_allocator {
inline pointer allocate(size_type cnt)
{
if (cnt > this->max_size()) { throw std::bad_alloc(); } // end if
return static_cast<pointer>(
mr.allocate_async(cnt * sizeof(value_type), rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream));
auto const result =
mr.allocate_async(cnt * sizeof(value_type), rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream);
// Synchronize to ensure the memory is allocated before thrust::host_vector initialization
// TODO: replace thrust::host_vector with a type that does not require synchronization
stream.synchronize();
return static_cast<pointer>(result);
}

/**
Expand Down Expand Up @@ -182,6 +182,6 @@ class rmm_host_allocator {
* @brief A vector class with rmm host memory allocator
*/
template <typename T>
using rmm_host_vector = thrust::host_vector<T, rmm_host_allocator<T>>;
using host_vector = thrust::host_vector<T, rmm_host_allocator<T>>;

} // namespace cudf::detail
Loading

0 comments on commit f7ba6ab

Please sign in to comment.