From 39de5a2527b297ba79c625993a49b28c3baf5b00 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Thu, 4 Jul 2024 06:49:06 +1000 Subject: [PATCH] Refactor from_arrow_device/host to use resource_ref (#16160) Fixes #16159 Also fixes typos / leftovers in dictionary `add_keys` copydocs. Authors: - Mark Harris (https://github.com/harrism) Approvers: - Paul Mattione (https://github.com/pmattione-nvidia) - David Wendt (https://github.com/davidwendt) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/16160 --- .../cudf/dictionary/detail/update_keys.hpp | 10 +++---- cpp/include/cudf/interop.hpp | 29 ++++++++++--------- cpp/src/interop/from_arrow_device.cu | 27 ++++++++--------- cpp/src/interop/from_arrow_host.cu | 19 ++++++------ cpp/src/interop/from_arrow_stream.cu | 6 ++-- 5 files changed, 47 insertions(+), 44 deletions(-) diff --git a/cpp/include/cudf/dictionary/detail/update_keys.hpp b/cpp/include/cudf/dictionary/detail/update_keys.hpp index e8486a80afc..9cdda773dbb 100644 --- a/cpp/include/cudf/dictionary/detail/update_keys.hpp +++ b/cpp/include/cudf/dictionary/detail/update_keys.hpp @@ -29,7 +29,7 @@ namespace dictionary { namespace detail { /** * @copydoc cudf::dictionary::add_keys(dictionary_column_view const&,column_view - * const&,mm::mr::device_memory_resource*) + * const&,rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -40,7 +40,7 @@ std::unique_ptr add_keys(dictionary_column_view const& dictionary_column /** * @copydoc cudf::dictionary::remove_keys(dictionary_column_view const&,column_view - * const&,mm::mr::device_memory_resource*) + * const&,rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -51,7 +51,7 @@ std::unique_ptr remove_keys(dictionary_column_view const& dictionary_col /** * @copydoc cudf::dictionary::remove_unused_keys(dictionary_column_view - * const&,mm::mr::device_memory_resource*) + * const&,rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -61,7 +61,7 @@ std::unique_ptr remove_unused_keys(dictionary_column_view const& diction /** * @copydoc cudf::dictionary::set_keys(dictionary_column_view - * const&,mm::mr::device_memory_resource*) + * const&,rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -72,7 +72,7 @@ std::unique_ptr set_keys(dictionary_column_view const& dictionary_column /** * @copydoc - * cudf::dictionary::match_dictionaries(std::vector,mm::mr::device_memory_resource*) + * cudf::dictionary::match_dictionaries(std::vector,rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp index 502ffb9ba4f..11f6ce2bad7 100644 --- a/cpp/include/cudf/interop.hpp +++ b/cpp/include/cudf/interop.hpp @@ -39,6 +39,7 @@ #include #include +#include #include @@ -372,8 +373,8 @@ std::unique_ptr from_arrow( std::unique_ptr from_arrow( ArrowSchema const* schema, ArrowArray const* input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Create `cudf::column` from a given ArrowArray and ArrowSchema input @@ -391,8 +392,8 @@ std::unique_ptr from_arrow( std::unique_ptr from_arrow_column( ArrowSchema const* schema, ArrowArray const* input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Create `cudf::table` from given ArrowDeviceArray input @@ -415,8 +416,8 @@ std::unique_ptr from_arrow_column( std::unique_ptr from_arrow_host( ArrowSchema const* schema, ArrowDeviceArray const* input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Create `cudf::table` from given ArrowArrayStream input @@ -433,8 +434,8 @@ std::unique_ptr
from_arrow_host( */ std::unique_ptr
from_arrow_stream( ArrowArrayStream* input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Create `cudf::column` from given ArrowDeviceArray input @@ -456,8 +457,8 @@ std::unique_ptr
from_arrow_stream( std::unique_ptr from_arrow_host_column( ArrowSchema const* schema, ArrowDeviceArray const* input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief typedef for a vector of owning columns, used for conversion from ArrowDeviceArray @@ -537,8 +538,8 @@ using unique_table_view_t = unique_table_view_t from_arrow_device( ArrowSchema const* schema, ArrowDeviceArray const* input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief typedef for a unique_ptr to a `cudf::column_view` with custom deleter @@ -580,8 +581,8 @@ using unique_column_view_t = unique_column_view_t from_arrow_device_column( ArrowSchema const* schema, ArrowDeviceArray const* input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace cudf diff --git a/cpp/src/interop/from_arrow_device.cu b/cpp/src/interop/from_arrow_device.cu index 73c1a474310..e1d289e67a3 100644 --- a/cpp/src/interop/from_arrow_device.cu +++ b/cpp/src/interop/from_arrow_device.cu @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -56,7 +57,7 @@ struct dispatch_from_arrow_device { data_type, bool, rmm::cuda_stream_view, - rmm::mr::device_memory_resource*) + rmm::device_async_resource_ref) { CUDF_FAIL("Unsupported type in from_arrow_device", cudf::data_type_error); } @@ -68,7 +69,7 @@ struct dispatch_from_arrow_device { data_type type, bool skip_mask, rmm::cuda_stream_view, - rmm::mr::device_memory_resource*) + rmm::device_async_resource_ref mr) { size_type const num_rows = input->length; size_type const offset = input->offset; @@ -90,7 +91,7 @@ dispatch_tuple_t get_column(ArrowSchemaView* schema, data_type type, bool skip_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); template <> dispatch_tuple_t dispatch_from_arrow_device::operator()(ArrowSchemaView* schema, @@ -98,7 +99,7 @@ dispatch_tuple_t dispatch_from_arrow_device::operator()(ArrowSchemaView* s data_type type, bool skip_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { if (input->length == 0) { return std::make_tuple( @@ -141,7 +142,7 @@ dispatch_tuple_t dispatch_from_arrow_device::operator()( data_type type, bool skip_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(schema->type != NANOARROW_TYPE_LARGE_STRING, "Large strings are not yet supported in from_arrow_device", @@ -182,7 +183,7 @@ dispatch_tuple_t dispatch_from_arrow_device::operator()( data_type type, bool skip_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { ArrowSchemaView keys_schema_view; NANOARROW_THROW_NOT_OK( @@ -238,7 +239,7 @@ dispatch_tuple_t dispatch_from_arrow_device::operator()( data_type type, bool skip_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { std::vector children; owned_columns_t out_owned_cols; @@ -283,7 +284,7 @@ dispatch_tuple_t dispatch_from_arrow_device::operator()( data_type type, bool skip_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { size_type const num_rows = input->length; size_type const offset = input->offset; @@ -324,7 +325,7 @@ dispatch_tuple_t get_column(ArrowSchemaView* schema, data_type type, bool skip_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return type.id() != type_id::EMPTY ? std::move(type_dispatcher( @@ -342,7 +343,7 @@ dispatch_tuple_t get_column(ArrowSchemaView* schema, unique_table_view_t from_arrow_device(ArrowSchema const* schema, ArrowDeviceArray const* input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(schema != nullptr && input != nullptr, "input ArrowSchema and ArrowDeviceArray must not be NULL", @@ -397,7 +398,7 @@ unique_table_view_t from_arrow_device(ArrowSchema const* schema, unique_column_view_t from_arrow_device_column(ArrowSchema const* schema, ArrowDeviceArray const* input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(schema != nullptr && input != nullptr, "input ArrowSchema and ArrowDeviceArray must not be NULL", @@ -429,7 +430,7 @@ unique_column_view_t from_arrow_device_column(ArrowSchema const* schema, unique_table_view_t from_arrow_device(ArrowSchema const* schema, ArrowDeviceArray const* input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); @@ -439,7 +440,7 @@ unique_table_view_t from_arrow_device(ArrowSchema const* schema, unique_column_view_t from_arrow_device_column(ArrowSchema const* schema, ArrowDeviceArray const* input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); diff --git a/cpp/src/interop/from_arrow_host.cu b/cpp/src/interop/from_arrow_host.cu index b7e07056686..b3087dedf98 100644 --- a/cpp/src/interop/from_arrow_host.cu +++ b/cpp/src/interop/from_arrow_host.cu @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -49,7 +50,7 @@ namespace { struct dispatch_copy_from_arrow_host { rmm::cuda_stream_view stream; - rmm::mr::device_memory_resource* mr; + rmm::device_async_resource_ref mr; std::unique_ptr get_mask_buffer(ArrowArray const* array) { @@ -131,7 +132,7 @@ std::unique_ptr get_column_copy(ArrowSchemaView* schema, data_type type, bool skip_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); template <> std::unique_ptr dispatch_copy_from_arrow_host::operator()(ArrowSchemaView* schema, @@ -388,7 +389,7 @@ std::unique_ptr get_column_copy(ArrowSchemaView* schema, data_type type, bool skip_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return type.id() != type_id::EMPTY ? std::move(type_dispatcher( @@ -405,7 +406,7 @@ std::unique_ptr get_column_copy(ArrowSchemaView* schema, std::unique_ptr
from_arrow_host(ArrowSchema const* schema, ArrowDeviceArray const* input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(schema != nullptr && input != nullptr, "input ArrowSchema and ArrowDeviceArray must not be NULL", @@ -441,7 +442,7 @@ std::unique_ptr
from_arrow_host(ArrowSchema const* schema, std::unique_ptr from_arrow_host_column(ArrowSchema const* schema, ArrowDeviceArray const* input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(schema != nullptr && input != nullptr, "input ArrowSchema and ArrowDeviceArray must not be NULL", @@ -462,7 +463,7 @@ std::unique_ptr from_arrow_host_column(ArrowSchema const* schema, std::unique_ptr
from_arrow_host(ArrowSchema const* schema, ArrowDeviceArray const* input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); @@ -472,7 +473,7 @@ std::unique_ptr
from_arrow_host(ArrowSchema const* schema, std::unique_ptr from_arrow_host_column(ArrowSchema const* schema, ArrowDeviceArray const* input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); @@ -482,7 +483,7 @@ std::unique_ptr from_arrow_host_column(ArrowSchema const* schema, std::unique_ptr
from_arrow(ArrowSchema const* schema, ArrowArray const* input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); @@ -497,7 +498,7 @@ std::unique_ptr
from_arrow(ArrowSchema const* schema, std::unique_ptr from_arrow_column(ArrowSchema const* schema, ArrowArray const* input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); diff --git a/cpp/src/interop/from_arrow_stream.cu b/cpp/src/interop/from_arrow_stream.cu index 0c85b561944..578105aa90a 100644 --- a/cpp/src/interop/from_arrow_stream.cu +++ b/cpp/src/interop/from_arrow_stream.cu @@ -41,7 +41,7 @@ namespace { std::unique_ptr make_empty_column_from_schema(ArrowSchema const* schema, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { ArrowSchemaView schema_view; NANOARROW_THROW_NOT_OK(ArrowSchemaViewInit(&schema_view, schema, nullptr)); @@ -81,7 +81,7 @@ std::unique_ptr make_empty_column_from_schema(ArrowSchema const* schema, std::unique_ptr
from_arrow_stream(ArrowArrayStream* input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(input != nullptr, "input ArrowArrayStream must not be NULL", std::invalid_argument); @@ -135,7 +135,7 @@ std::unique_ptr
from_arrow_stream(ArrowArrayStream* input, std::unique_ptr
from_arrow_stream(ArrowArrayStream* input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::from_arrow_stream(input, stream, mr);