From ab20f470090e7a6ebc4a3065feddff77b9e24f27 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Wed, 6 Mar 2024 09:11:41 -0500 Subject: [PATCH] Deprecate strings_column_view::offsets_begin() (#15205) Deprecates the `cudf::strings_column_view::offsets_begin()` and `cudf::strings_column_view::offsets_end()` since they are hardcoded to return `size_type*`. There are very few places that used these functions. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Mark Harris (https://github.com/harrism) - Bradley Dice (https://github.com/bdice) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/15205 --- cpp/include/cudf/strings/strings_column_view.hpp | 8 ++++++-- cpp/src/strings/replace/multi.cu | 13 ++++++------- cpp/src/strings/replace/replace.cu | 8 ++++---- cpp/src/strings/strings_column_view.cpp | 4 ++-- cpp/tests/strings/array_tests.cpp | 15 --------------- 5 files changed, 18 insertions(+), 30 deletions(-) diff --git a/cpp/include/cudf/strings/strings_column_view.hpp b/cpp/include/cudf/strings/strings_column_view.hpp index 036589e17fe..1156f0a5b73 100644 --- a/cpp/include/cudf/strings/strings_column_view.hpp +++ b/cpp/include/cudf/strings/strings_column_view.hpp @@ -88,20 +88,24 @@ class strings_column_view : private column_view { /** * @brief Return an iterator for the offsets child column. * + * @deprecated Since 24.04 + * * This automatically applies the offset of the parent. * * @return Iterator pointing to the first offset value. */ - [[nodiscard]] offset_iterator offsets_begin() const; + [[deprecated]] offset_iterator offsets_begin() const; /** * @brief Return an end iterator for the offsets child column. * + * @deprecated Since 24.04 + * * This automatically applies the offset of the parent. * * @return Iterator pointing 1 past the last offset value. */ - [[nodiscard]] offset_iterator offsets_end() const; + [[deprecated]] offset_iterator offsets_end() const; /** * @brief Returns the number of bytes in the chars child column. diff --git a/cpp/src/strings/replace/multi.cu b/cpp/src/strings/replace/multi.cu index ffa922d5944..8b5a4317b50 100644 --- a/cpp/src/strings/replace/multi.cu +++ b/cpp/src/strings/replace/multi.cu @@ -302,17 +302,16 @@ std::unique_ptr replace_character_parallel(strings_column_view const& in auto string_indices = rmm::device_uvector(target_count, stream); auto const pos_itr = cudf::detail::make_counting_transform_iterator( - 0, cuda::proclaim_return_type([d_positions] __device__(auto idx) -> size_type { + 0, cuda::proclaim_return_type([d_positions] __device__(auto idx) -> int64_t { return d_positions[idx].first; })); auto pos_count = std::distance(d_positions, copy_end); - thrust::upper_bound(rmm::exec_policy(stream), - input.offsets_begin(), - input.offsets_end(), - pos_itr, - pos_itr + pos_count, - string_indices.begin()); + auto begin = + cudf::detail::offsetalator_factory::make_input_iterator(input.offsets(), input.offset()); + auto end = begin + input.offsets().size(); + thrust::upper_bound( + rmm::exec_policy(stream), begin, end, pos_itr, pos_itr + pos_count, string_indices.begin()); // compute offsets per string auto targets_offsets = rmm::device_uvector(strings_count + 1, stream); diff --git a/cpp/src/strings/replace/replace.cu b/cpp/src/strings/replace/replace.cu index c37c64e348c..1f752f543d0 100644 --- a/cpp/src/strings/replace/replace.cu +++ b/cpp/src/strings/replace/replace.cu @@ -413,10 +413,10 @@ std::unique_ptr replace_char_parallel(strings_column_view const& strings { auto const strings_count = strings.size(); auto const offset_count = strings_count + 1; - auto const d_offsets = strings.offsets_begin(); - auto const d_in_chars = strings.chars_begin(stream); - auto const chars_bytes = chars_end - chars_start; - auto const target_size = d_target.size_bytes(); + auto const d_offsets = strings.offsets().begin() + strings.offset(); // TODO: PR 14824 + auto const d_in_chars = strings.chars_begin(stream); + auto const chars_bytes = chars_end - chars_start; + auto const target_size = d_target.size_bytes(); // detect a target match at the specified byte position device_span const d_chars_span(d_in_chars, chars_end); diff --git a/cpp/src/strings/strings_column_view.cpp b/cpp/src/strings/strings_column_view.cpp index 83ae916afc3..3ae97a00bbf 100644 --- a/cpp/src/strings/strings_column_view.cpp +++ b/cpp/src/strings/strings_column_view.cpp @@ -37,12 +37,12 @@ column_view strings_column_view::offsets() const strings_column_view::offset_iterator strings_column_view::offsets_begin() const { - return offsets().begin() + offset(); + return offsets().begin() + offset(); } strings_column_view::offset_iterator strings_column_view::offsets_end() const { - return offsets_begin() + size() + 1; + return offsets().begin() + offset() + size() + 1; } int64_t strings_column_view::chars_size(rmm::cuda_stream_view stream) const noexcept diff --git a/cpp/tests/strings/array_tests.cpp b/cpp/tests/strings/array_tests.cpp index c6cc8e078bb..b22d7257041 100644 --- a/cpp/tests/strings/array_tests.cpp +++ b/cpp/tests/strings/array_tests.cpp @@ -213,19 +213,4 @@ TEST_F(StringsColumnTest, ScatterZeroSizeStringsColumn) cudf::test::expect_column_empty(results->view().column(0)); } -TEST_F(StringsColumnTest, OffsetsBeginEnd) -{ - cudf::test::strings_column_wrapper input({"eee", "bb", "", "", "aa", "bbb", "ééé"}, - {1, 1, 0, 1, 1, 1, 1}); - - cudf::test::fixed_width_column_wrapper expected({0, 5}); - auto scv = cudf::strings_column_view(input); - EXPECT_EQ(std::distance(scv.offsets_begin(), scv.offsets_end()), - static_cast(scv.size() + 1)); - - scv = cudf::strings_column_view(cudf::slice(input, {1, 5}).front()); - EXPECT_EQ(std::distance(scv.offsets_begin(), scv.offsets_end()), - static_cast(scv.size() + 1)); -} - CUDF_TEST_PROGRAM_MAIN()