From 72438d8a480f9158d0c6a1e5d8db119bb1550200 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Thu, 4 Mar 2021 06:40:45 -0600 Subject: [PATCH] Fix off-by-one error in char-parallel string scalar replace (#7502) Fixes #7500. The filter for target replacement positions crossing a string boundary had an off-by-one error. Target strings that were at the end of a string were incorrectly flagged as crossing a boundary. Authors: - Jason Lowe (@jlowe) Approvers: - David (@davidwendt) - Paul Taylor (@trxcllnt) URL: https://github.com/rapidsai/cudf/pull/7502 --- cpp/src/strings/replace/replace.cu | 2 +- cpp/tests/strings/replace_tests.cpp | 31 +++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/cpp/src/strings/replace/replace.cu b/cpp/src/strings/replace/replace.cu index 0955c217526..ea474644d06 100644 --- a/cpp/src/strings/replace/replace.cu +++ b/cpp/src/strings/replace/replace.cu @@ -315,7 +315,7 @@ size_type filter_false_target_positions(rmm::device_uvector& target_p // find the end of the string containing the start of this target size_type const* offset_ptr = thrust::upper_bound( thrust::seq, d_offsets_span.begin(), d_offsets_span.end(), target_pos); - return target_pos + target_size >= *offset_ptr; + return target_pos + target_size > *offset_ptr; }); auto const target_count = cudf::distance(d_target_positions, target_pos_end); if (target_count == 0) { return 0; } diff --git a/cpp/tests/strings/replace_tests.cpp b/cpp/tests/strings/replace_tests.cpp index 98175674a3a..359b31177bf 100644 --- a/cpp/tests/strings/replace_tests.cpp +++ b/cpp/tests/strings/replace_tests.cpp @@ -200,6 +200,37 @@ TEST_F(StringsReplaceTest, ReplaceNullInput) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, strings); } +TEST_F(StringsReplaceTest, ReplaceEndOfString) +{ + auto strings = build_corpus(); + auto strings_view = cudf::strings_column_view(strings); + // replace all occurrences of 'in' with ' ' + std::vector h_expected{"the quick brown fox jumps over the lazy dog", + "the fat cat lays next to the other accénted cat", + "a slow mov g turtlé cannot catch the bird", + "which can be composéd together to form a more complete", + "The result does not clude the value the sum ", + "", + nullptr}; + + cudf::test::strings_column_wrapper expected( + h_expected.begin(), + h_expected.end(), + thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; })); + + auto results = + cudf::strings::replace(strings_view, cudf::string_scalar("in"), cudf::string_scalar(" ")); + cudf::test::expect_columns_equal(*results, expected); + + results = cudf::strings::detail::replace( + strings_view, cudf::string_scalar("in"), cudf::string_scalar(" ")); + cudf::test::expect_columns_equal(*results, expected); + + results = cudf::strings::detail::replace( + strings_view, cudf::string_scalar("in"), cudf::string_scalar(" ")); + cudf::test::expect_columns_equal(*results, expected); +} + TEST_F(StringsReplaceTest, ReplaceSlice) { std::vector h_strings{"Héllo", "thesé", nullptr, "ARE THE", "tést strings", ""};