Skip to content

Commit

Permalink
Deprecate strings_column_view::offsets_begin() (#15205)
Browse files Browse the repository at this point in the history
Deprecates the `cudf::strings_column_view::offsets_begin()` and `cudf::strings_column_view::offsets_end()` since they are hardcoded to return `size_type*`. There are very few places that used these functions.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Mark Harris (https://github.com/harrism)
  - Bradley Dice (https://github.com/bdice)
  - Nghia Truong (https://github.com/ttnghia)

URL: #15205
  • Loading branch information
davidwendt authored Mar 6, 2024
1 parent dbf7236 commit ab20f47
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 30 deletions.
8 changes: 6 additions & 2 deletions cpp/include/cudf/strings/strings_column_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,20 +88,24 @@ class strings_column_view : private column_view {
/**
* @brief Return an iterator for the offsets child column.
*
* @deprecated Since 24.04
*
* This automatically applies the offset of the parent.
*
* @return Iterator pointing to the first offset value.
*/
[[nodiscard]] offset_iterator offsets_begin() const;
[[deprecated]] offset_iterator offsets_begin() const;

/**
* @brief Return an end iterator for the offsets child column.
*
* @deprecated Since 24.04
*
* This automatically applies the offset of the parent.
*
* @return Iterator pointing 1 past the last offset value.
*/
[[nodiscard]] offset_iterator offsets_end() const;
[[deprecated]] offset_iterator offsets_end() const;

/**
* @brief Returns the number of bytes in the chars child column.
Expand Down
13 changes: 6 additions & 7 deletions cpp/src/strings/replace/multi.cu
Original file line number Diff line number Diff line change
Expand Up @@ -302,17 +302,16 @@ std::unique_ptr<column> replace_character_parallel(strings_column_view const& in
auto string_indices = rmm::device_uvector<size_type>(target_count, stream);

auto const pos_itr = cudf::detail::make_counting_transform_iterator(
0, cuda::proclaim_return_type<size_type>([d_positions] __device__(auto idx) -> size_type {
0, cuda::proclaim_return_type<int64_t>([d_positions] __device__(auto idx) -> int64_t {
return d_positions[idx].first;
}));
auto pos_count = std::distance(d_positions, copy_end);

thrust::upper_bound(rmm::exec_policy(stream),
input.offsets_begin(),
input.offsets_end(),
pos_itr,
pos_itr + pos_count,
string_indices.begin());
auto begin =
cudf::detail::offsetalator_factory::make_input_iterator(input.offsets(), input.offset());
auto end = begin + input.offsets().size();
thrust::upper_bound(
rmm::exec_policy(stream), begin, end, pos_itr, pos_itr + pos_count, string_indices.begin());

// compute offsets per string
auto targets_offsets = rmm::device_uvector<size_type>(strings_count + 1, stream);
Expand Down
8 changes: 4 additions & 4 deletions cpp/src/strings/replace/replace.cu
Original file line number Diff line number Diff line change
Expand Up @@ -413,10 +413,10 @@ std::unique_ptr<column> replace_char_parallel(strings_column_view const& strings
{
auto const strings_count = strings.size();
auto const offset_count = strings_count + 1;
auto const d_offsets = strings.offsets_begin();
auto const d_in_chars = strings.chars_begin(stream);
auto const chars_bytes = chars_end - chars_start;
auto const target_size = d_target.size_bytes();
auto const d_offsets = strings.offsets().begin<int32_t>() + strings.offset(); // TODO: PR 14824
auto const d_in_chars = strings.chars_begin(stream);
auto const chars_bytes = chars_end - chars_start;
auto const target_size = d_target.size_bytes();

// detect a target match at the specified byte position
device_span<char const> const d_chars_span(d_in_chars, chars_end);
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/strings/strings_column_view.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,12 @@ column_view strings_column_view::offsets() const

strings_column_view::offset_iterator strings_column_view::offsets_begin() const
{
return offsets().begin<size_type>() + offset();
return offsets().begin<int32_t>() + offset();
}

strings_column_view::offset_iterator strings_column_view::offsets_end() const
{
return offsets_begin() + size() + 1;
return offsets().begin<int32_t>() + offset() + size() + 1;
}

int64_t strings_column_view::chars_size(rmm::cuda_stream_view stream) const noexcept
Expand Down
15 changes: 0 additions & 15 deletions cpp/tests/strings/array_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,19 +213,4 @@ TEST_F(StringsColumnTest, ScatterZeroSizeStringsColumn)
cudf::test::expect_column_empty(results->view().column(0));
}

TEST_F(StringsColumnTest, OffsetsBeginEnd)
{
cudf::test::strings_column_wrapper input({"eee", "bb", "", "", "aa", "bbb", "ééé"},
{1, 1, 0, 1, 1, 1, 1});

cudf::test::fixed_width_column_wrapper<int32_t> expected({0, 5});
auto scv = cudf::strings_column_view(input);
EXPECT_EQ(std::distance(scv.offsets_begin(), scv.offsets_end()),
static_cast<std::ptrdiff_t>(scv.size() + 1));

scv = cudf::strings_column_view(cudf::slice(input, {1, 5}).front());
EXPECT_EQ(std::distance(scv.offsets_begin(), scv.offsets_end()),
static_cast<std::ptrdiff_t>(scv.size() + 1));
}

CUDF_TEST_PROGRAM_MAIN()

0 comments on commit ab20f47

Please sign in to comment.