Skip to content

Commit

Permalink
Use offsetalator in cudf::strings::copy_slice (#14844)
Browse files Browse the repository at this point in the history
Replace hardcoded offset types with offsetalator in `cudf::strings::detail::copy_slice`.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Mark Harris (https://github.com/harrism)
  - Bradley Dice (https://github.com/bdice)

URL: #14844
  • Loading branch information
davidwendt authored Jan 29, 2024
1 parent 7fe9bc8 commit 5cc021a
Showing 1 changed file with 19 additions and 16 deletions.
35 changes: 19 additions & 16 deletions cpp/src/strings/copying/copying.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@

#include <cudf/column/column_factories.hpp>
#include <cudf/detail/copy.hpp>
#include <cudf/detail/get_value.cuh>
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/offsets_iterator_factory.cuh>
#include <cudf/strings/detail/copying.hpp>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/strings/strings_column_view.hpp>

#include <rmm/cuda_stream_view.hpp>
Expand All @@ -33,47 +34,49 @@ namespace cudf {
namespace strings {
namespace detail {

std::unique_ptr<cudf::column> copy_slice(strings_column_view const& strings,
std::unique_ptr<cudf::column> copy_slice(strings_column_view const& input,
size_type start,
size_type end,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
if (strings.is_empty()) return make_empty_column(type_id::STRING);
if (end < 0 || end > strings.size()) end = strings.size();
if (input.is_empty()) { return make_empty_column(type_id::STRING); }
CUDF_EXPECTS(((start >= 0) && (start < end)), "Invalid start parameter value.");
auto const strings_count = end - start;
auto const offsets_offset = start + strings.offset();
auto const offsets_offset = start + input.offset();

// slice the offsets child column
auto offsets_column = std::make_unique<cudf::column>(
cudf::detail::slice(
strings.offsets(), {offsets_offset, offsets_offset + strings_count + 1}, stream)
input.offsets(), {offsets_offset, offsets_offset + strings_count + 1}, stream)
.front(),
stream,
mr);
auto const chars_offset =
offsets_offset == 0 ? 0 : cudf::detail::get_value<int32_t>(offsets_column->view(), 0, stream);
offsets_offset == 0 ? 0L : get_offset_value(offsets_column->view(), 0, stream);
if (chars_offset > 0) {
// adjust the individual offset values only if needed
auto d_offsets = offsets_column->mutable_view();
auto d_offsets =
cudf::detail::offsetalator_factory::make_output_iterator(offsets_column->mutable_view());
auto input_offsets =
cudf::detail::offsetalator_factory::make_input_iterator(input.offsets(), offsets_offset);
thrust::transform(rmm::exec_policy(stream),
d_offsets.begin<int32_t>(),
d_offsets.end<int32_t>(),
d_offsets.begin<int32_t>(),
cuda::proclaim_return_type<int32_t>(
input_offsets,
input_offsets + offsets_column->size(),
d_offsets,
cuda::proclaim_return_type<int64_t>(
[chars_offset] __device__(auto offset) { return offset - chars_offset; }));
}

// slice the chars child column
auto const data_size = static_cast<std::size_t>(
cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream));
auto const data_size =
static_cast<std::size_t>(get_offset_value(offsets_column->view(), strings_count, stream));
auto chars_buffer =
rmm::device_buffer{strings.chars_begin(stream) + chars_offset, data_size, stream, mr};
rmm::device_buffer{input.chars_begin(stream) + chars_offset, data_size, stream, mr};

// slice the null mask
auto null_mask = cudf::detail::copy_bitmask(
strings.null_mask(), offsets_offset, offsets_offset + strings_count, stream, mr);
input.null_mask(), offsets_offset, offsets_offset + strings_count, stream, mr);

auto null_count = cudf::detail::null_count(
static_cast<bitmask_type const*>(null_mask.data()), 0, strings_count, stream);
Expand Down

0 comments on commit 5cc021a

Please sign in to comment.