Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-24.06' into ref/rangein…
Browse files Browse the repository at this point in the history
…dex/attrs
  • Loading branch information
mroeschke committed Apr 23, 2024
2 parents 0458b1c + 8db1851 commit 6a6d30e
Show file tree
Hide file tree
Showing 23 changed files with 1,739 additions and 228 deletions.
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,7 @@ add_library(
src/interop/from_arrow.cu
src/interop/to_arrow.cu
src/interop/to_arrow_device.cu
src/interop/from_arrow_device.cu
src/interop/to_arrow_schema.cpp
src/interop/to_arrow_utilities.cpp
src/interop/detail/arrow_allocator.cpp
Expand Down
21 changes: 16 additions & 5 deletions cpp/benchmarks/fixture/nvbench_fixture.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ static std::string cuio_host_mem_param{
* Initializes the default memory resource to use the RMM pool device resource.
*/
struct nvbench_base_fixture {
using host_pooled_mr_t = rmm::mr::pool_memory_resource<rmm::mr::pinned_host_memory_resource>;

inline auto make_cuda() { return std::make_shared<rmm::mr::cuda_memory_resource>(); }

inline auto make_pool()
Expand Down Expand Up @@ -90,12 +92,14 @@ struct nvbench_base_fixture {

inline rmm::host_async_resource_ref make_cuio_host_pinned_pool()
{
using host_pooled_mr = rmm::mr::pool_memory_resource<rmm::mr::pinned_host_memory_resource>;
static std::shared_ptr<host_pooled_mr> mr = std::make_shared<host_pooled_mr>(
std::make_shared<rmm::mr::pinned_host_memory_resource>().get(),
size_t{1} * 1024 * 1024 * 1024);
if (!this->host_pooled_mr) {
// Don't store in static, as the CUDA context may be destroyed before static destruction
this->host_pooled_mr = std::make_shared<host_pooled_mr_t>(
std::make_shared<rmm::mr::pinned_host_memory_resource>().get(),
size_t{1} * 1024 * 1024 * 1024);
}

return *mr;
return *this->host_pooled_mr;
}

inline rmm::host_async_resource_ref create_cuio_host_memory_resource(std::string const& mode)
Expand Down Expand Up @@ -126,9 +130,16 @@ struct nvbench_base_fixture {
std::cout << "CUIO host memory resource = " << cuio_host_mode << "\n";
}

~nvbench_base_fixture()
{
// Ensure the the pool is freed before the CUDA context is destroyed:
cudf::io::set_host_memory_resource(this->make_cuio_host_pinned());
}

std::shared_ptr<rmm::mr::device_memory_resource> mr;
std::string rmm_mode{"pool"};

std::shared_ptr<host_pooled_mr_t> host_pooled_mr;
std::string cuio_host_mode{"pinned"};
};

Expand Down
47 changes: 31 additions & 16 deletions cpp/benchmarks/fixture/nvbench_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,29 +15,44 @@
*/

#include <benchmarks/fixture/nvbench_fixture.hpp>
#define NVBENCH_ENVIRONMENT cudf::nvbench_base_fixture

#include <nvbench/main.cuh>

#include <string>
#include <vector>

namespace cudf {

// strip off the rmm_mode and cuio_host_mem parameters before passing the
// remaining arguments to nvbench::option_parser
#undef NVBENCH_MAIN_PARSE
#define NVBENCH_MAIN_PARSE(argc, argv) \
nvbench::option_parser parser; \
std::vector<std::string> m_args; \
for (int i = 0; i < argc; ++i) { \
std::string arg = argv[i]; \
if (arg == cudf::detail::rmm_mode_param) { \
i += 2; \
} else if (arg == cudf::detail::cuio_host_mem_param) { \
i += 2; \
} else { \
m_args.push_back(arg); \
} \
} \
parser.parse(m_args)
void benchmark_arg_handler(std::vector<std::string>& args)
{
std::vector<std::string> _cudf_tmp_args;

for (std::size_t i = 0; i < args.size(); ++i) {
std::string arg = args[i];
if (arg == cudf::detail::rmm_mode_param) {
i++; // skip the next argument
} else if (arg == cudf::detail::cuio_host_mem_param) {
i++; // skip the next argument
} else {
_cudf_tmp_args.push_back(arg);
}
}

args = _cudf_tmp_args;
}

} // namespace cudf

// Install arg handler
#undef NVBENCH_MAIN_CUSTOM_ARGS_HANDLER
#define NVBENCH_MAIN_CUSTOM_ARGS_HANDLER(args) cudf::benchmark_arg_handler(args)

// Global fixture setup:
#undef NVBENCH_MAIN_INITIALIZE_CUSTOM_POST
#define NVBENCH_MAIN_INITIALIZE_CUSTOM_POST(argc, argv) \
[[maybe_unused]] auto env_state = cudf::nvbench_base_fixture(argc, argv);

// this declares/defines the main() function using the definitions above
NVBENCH_MAIN
29 changes: 0 additions & 29 deletions cpp/cmake/thirdparty/patches/nvbench_global_setup.diff

This file was deleted.

9 changes: 2 additions & 7 deletions cpp/cmake/thirdparty/patches/nvbench_override.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,8 @@
{
"packages" : {
"nvbench" : {
"patches" : [
{
"file" : "${current_json_dir}/nvbench_global_setup.diff",
"issue" : "Fix add support for global setup to initialize RMM in nvbench [https://github.com/NVIDIA/nvbench/pull/123]",
"fixed_in" : ""
}
]
"git_url": "https://github.com/NVIDIA/nvbench.git",
"git_tag": "555d628e9b250868c9da003e4407087ff1982e8e"
}
}
}
124 changes: 124 additions & 0 deletions cpp/include/cudf/interop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -348,5 +348,129 @@ std::unique_ptr<cudf::scalar> from_arrow(
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());

/**
* @brief typedef for a vector of owning columns, used for conversion from ArrowDeviceArray
*
*/
using owned_columns_t = std::vector<std::unique_ptr<cudf::column>>;

/**
* @brief functor for a custom deleter to a unique_ptr of table_view
*
* When converting from an ArrowDeviceArray, there are cases where data can't
* be zero-copy (i.e. bools or non-UINT32 dictionary indices). This custom deleter
* is used to maintain ownership over the data allocated since a `cudf::table_view`
* doesn't hold ownership.
*/
template <typename ViewType>
struct custom_view_deleter {
/**
* @brief Construct a new custom view deleter object
*
* @param owned Vector of owning columns
*/
explicit custom_view_deleter(owned_columns_t&& owned) : owned_mem_{std::move(owned)} {}

/**
* @brief operator to delete the unique_ptr
*
* @param ptr Pointer to the object to be deleted
*/
void operator()(ViewType* ptr) const { delete ptr; }

owned_columns_t owned_mem_; ///< Owned columns that must be deleted.
};

/**
* @brief typedef for a unique_ptr to a `cudf::table_view` with custom deleter
*
*/
using unique_table_view_t =
std::unique_ptr<cudf::table_view, custom_view_deleter<cudf::table_view>>;

/**
* @brief Create `cudf::table_view` from given `ArrowDeviceArray` and `ArrowSchema`
*
* Constructs a non-owning `cudf::table_view` using `ArrowDeviceArray` and `ArrowSchema`,
* data must be accessible to the CUDA device. Because the resulting `cudf::table_view` will
* not own the data, the `ArrowDeviceArray` must be kept alive for the lifetime of the result.
* It is the responsibility of callers to ensure they call the release callback on the
* `ArrowDeviceArray` after it is no longer needed, and that the `cudf::table_view` is not
* accessed after this happens.
*
* @throws cudf::logic_error if device_type is not `ARROW_DEVICE_CUDA`, `ARROW_DEVICE_CUDA_HOST`
* or `ARROW_DEVICE_CUDA_MANAGED`
*
* @throws cudf::data_type_error if the input array is not a struct array, non-struct
* arrays should be passed to `from_arrow_device_column` instead.
*
* @throws cudf::data_type_error if the input arrow data type is not supported.
*
* Each child of the input struct will be the columns of the resulting table_view.
*
* @note The custom deleter used for the unique_ptr to the table_view maintains ownership
* over any memory which is allocated, such as converting boolean columns from the bitmap
* used by Arrow to the 1-byte per value for cudf.
*
* @note If the input `ArrowDeviceArray` contained a non-null sync_event it is assumed
* to be a `cudaEvent_t*` and the passed in stream will have `cudaStreamWaitEvent` called
* on it with the event. This function, however, will not explicitly synchronize on the
* stream.
*
* @param schema `ArrowSchema` pointer to object describing the type of the device array
* @param input `ArrowDeviceArray` pointer to object owning the Arrow data
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to perform any allocations
* @return `cudf::table_view` generated from given Arrow data
*/
unique_table_view_t from_arrow_device(
ArrowSchema const* schema,
ArrowDeviceArray const* input,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief typedef for a unique_ptr to a `cudf::column_view` with custom deleter
*
*/
using unique_column_view_t =
std::unique_ptr<cudf::column_view, custom_view_deleter<cudf::column_view>>;

/**
* @brief Create `cudf::column_view` from given `ArrowDeviceArray` and `ArrowSchema`
*
* Constructs a non-owning `cudf::column_view` using `ArrowDeviceArray` and `ArrowSchema`,
* data must be accessible to the CUDA device. Because the resulting `cudf::column_view` will
* not own the data, the `ArrowDeviceArray` must be kept alive for the lifetime of the result.
* It is the responsibility of callers to ensure they call the release callback on the
* `ArrowDeviceArray` after it is no longer needed, and that the `cudf::column_view` is not
* accessed after this happens.
*
* @throws cudf::logic_error if device_type is not `ARROW_DEVICE_CUDA`, `ARROW_DEVICE_CUDA_HOST`
* or `ARROW_DEVICE_CUDA_MANAGED`
*
* @throws cudf::data_type_error input arrow data type is not supported.
*
* @note The custom deleter used for the unique_ptr to the table_view maintains ownership
* over any memory which is allocated, such as converting boolean columns from the bitmap
* used by Arrow to the 1-byte per value for cudf.
*
* @note If the input `ArrowDeviceArray` contained a non-null sync_event it is assumed
* to be a `cudaEvent_t*` and the passed in stream will have `cudaStreamWaitEvent` called
* on it with the event. This function, however, will not explicitly synchronize on the
* stream.
*
* @param schema `ArrowSchema` pointer to object describing the type of the device array
* @param input `ArrowDeviceArray` pointer to object owning the Arrow data
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to perform any allocations
* @return `cudf::column_view` generated from given Arrow data
*/
unique_column_view_t from_arrow_device_column(
ArrowSchema const* schema,
ArrowDeviceArray const* input,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group
} // namespace cudf
Loading

0 comments on commit 6a6d30e

Please sign in to comment.