Skip to content

Commit

Permalink
Fix off-by-one error in char-parallel string scalar replace (#7502)
Browse files Browse the repository at this point in the history
Fixes #7500.

The filter for target replacement positions crossing a string boundary had an off-by-one error.  Target strings that were at the end of a string were incorrectly flagged as crossing a boundary.

Authors:
  - Jason Lowe (@jlowe)

Approvers:
  - David (@davidwendt)
  - Paul Taylor (@trxcllnt)

URL: #7502
  • Loading branch information
jlowe authored Mar 4, 2021
1 parent 7871e7a commit 72438d8
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 1 deletion.
2 changes: 1 addition & 1 deletion cpp/src/strings/replace/replace.cu
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ size_type filter_false_target_positions(rmm::device_uvector<size_type>& target_p
// find the end of the string containing the start of this target
size_type const* offset_ptr = thrust::upper_bound(
thrust::seq, d_offsets_span.begin(), d_offsets_span.end(), target_pos);
return target_pos + target_size >= *offset_ptr;
return target_pos + target_size > *offset_ptr;
});
auto const target_count = cudf::distance(d_target_positions, target_pos_end);
if (target_count == 0) { return 0; }
Expand Down
31 changes: 31 additions & 0 deletions cpp/tests/strings/replace_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,37 @@ TEST_F(StringsReplaceTest, ReplaceNullInput)
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, strings);
}

TEST_F(StringsReplaceTest, ReplaceEndOfString)
{
auto strings = build_corpus();
auto strings_view = cudf::strings_column_view(strings);
// replace all occurrences of 'in' with ' '
std::vector<const char*> h_expected{"the quick brown fox jumps over the lazy dog",
"the fat cat lays next to the other accénted cat",
"a slow mov g turtlé cannot catch the bird",
"which can be composéd together to form a more complete",
"The result does not clude the value the sum ",
"",
nullptr};

cudf::test::strings_column_wrapper expected(
h_expected.begin(),
h_expected.end(),
thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; }));

auto results =
cudf::strings::replace(strings_view, cudf::string_scalar("in"), cudf::string_scalar(" "));
cudf::test::expect_columns_equal(*results, expected);

results = cudf::strings::detail::replace<cudf::strings::detail::replace_algorithm::CHAR_PARALLEL>(
strings_view, cudf::string_scalar("in"), cudf::string_scalar(" "));
cudf::test::expect_columns_equal(*results, expected);

results = cudf::strings::detail::replace<cudf::strings::detail::replace_algorithm::ROW_PARALLEL>(
strings_view, cudf::string_scalar("in"), cudf::string_scalar(" "));
cudf::test::expect_columns_equal(*results, expected);
}

TEST_F(StringsReplaceTest, ReplaceSlice)
{
std::vector<const char*> h_strings{"Héllo", "thesé", nullptr, "ARE THE", "tést strings", ""};
Expand Down

0 comments on commit 72438d8

Please sign in to comment.