From 5cc021af0ef934ddf3f5f66cee2d8dd2490ba623 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Mon, 29 Jan 2024 13:51:26 -0500 Subject: [PATCH] Use offsetalator in cudf::strings::copy_slice (#14844) Replace hardcoded offset types with offsetalator in `cudf::strings::detail::copy_slice`. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Mark Harris (https://github.com/harrism) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/14844 --- cpp/src/strings/copying/copying.cu | 35 ++++++++++++++++-------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/cpp/src/strings/copying/copying.cu b/cpp/src/strings/copying/copying.cu index 4f37d3864ac..013028d6df3 100644 --- a/cpp/src/strings/copying/copying.cu +++ b/cpp/src/strings/copying/copying.cu @@ -16,9 +16,10 @@ #include #include -#include #include +#include #include +#include #include #include @@ -33,47 +34,49 @@ namespace cudf { namespace strings { namespace detail { -std::unique_ptr copy_slice(strings_column_view const& strings, +std::unique_ptr copy_slice(strings_column_view const& input, size_type start, size_type end, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - if (strings.is_empty()) return make_empty_column(type_id::STRING); - if (end < 0 || end > strings.size()) end = strings.size(); + if (input.is_empty()) { return make_empty_column(type_id::STRING); } CUDF_EXPECTS(((start >= 0) && (start < end)), "Invalid start parameter value."); auto const strings_count = end - start; - auto const offsets_offset = start + strings.offset(); + auto const offsets_offset = start + input.offset(); // slice the offsets child column auto offsets_column = std::make_unique( cudf::detail::slice( - strings.offsets(), {offsets_offset, offsets_offset + strings_count + 1}, stream) + input.offsets(), {offsets_offset, offsets_offset + strings_count + 1}, stream) .front(), stream, mr); auto const chars_offset = - offsets_offset == 0 ? 0 : cudf::detail::get_value(offsets_column->view(), 0, stream); + offsets_offset == 0 ? 0L : get_offset_value(offsets_column->view(), 0, stream); if (chars_offset > 0) { // adjust the individual offset values only if needed - auto d_offsets = offsets_column->mutable_view(); + auto d_offsets = + cudf::detail::offsetalator_factory::make_output_iterator(offsets_column->mutable_view()); + auto input_offsets = + cudf::detail::offsetalator_factory::make_input_iterator(input.offsets(), offsets_offset); thrust::transform(rmm::exec_policy(stream), - d_offsets.begin(), - d_offsets.end(), - d_offsets.begin(), - cuda::proclaim_return_type( + input_offsets, + input_offsets + offsets_column->size(), + d_offsets, + cuda::proclaim_return_type( [chars_offset] __device__(auto offset) { return offset - chars_offset; })); } // slice the chars child column - auto const data_size = static_cast( - cudf::detail::get_value(offsets_column->view(), strings_count, stream)); + auto const data_size = + static_cast(get_offset_value(offsets_column->view(), strings_count, stream)); auto chars_buffer = - rmm::device_buffer{strings.chars_begin(stream) + chars_offset, data_size, stream, mr}; + rmm::device_buffer{input.chars_begin(stream) + chars_offset, data_size, stream, mr}; // slice the null mask auto null_mask = cudf::detail::copy_bitmask( - strings.null_mask(), offsets_offset, offsets_offset + strings_count, stream, mr); + input.null_mask(), offsets_offset, offsets_offset + strings_count, stream, mr); auto null_count = cudf::detail::null_count( static_cast(null_mask.data()), 0, strings_count, stream);