Skip to content

Commit

Permalink
Fix cudf::strings::replace_multiple hang on empty target (#16167)
Browse files Browse the repository at this point in the history
Fixes logic in `cudf::strings::replace_multiple` to ignore empty targets correctly in the `replace_multi_fn` functor.
Also updated the doxygen and added a gtest for this case.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Bradley Dice (https://github.com/bdice)

URL: #16167
  • Loading branch information
davidwendt authored Jul 5, 2024
1 parent c978181 commit a583c97
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 6 deletions.
2 changes: 1 addition & 1 deletion cpp/include/cudf/strings/replace.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ std::unique_ptr<column> replace_slice(
* If a target string is found, it is replaced by the corresponding entry in the repls column.
* All occurrences found in each string are replaced.
*
* This does not use regex to match targets in the string.
* This does not use regex to match targets in the string. Empty string targets are ignored.
*
* Null string entries will return null output string entries.
*
Expand Down
9 changes: 4 additions & 5 deletions cpp/src/strings/replace/multi.cu
Original file line number Diff line number Diff line change
Expand Up @@ -451,8 +451,8 @@ struct replace_multi_fn {
while (spos < d_str.size_bytes()) {
for (int tgt_idx = 0; tgt_idx < d_targets.size(); ++tgt_idx) {
auto const d_tgt = d_targets.element<string_view>(tgt_idx);
if ((d_tgt.size_bytes() <= (d_str.size_bytes() - spos)) && // check fit
(d_tgt.compare(in_ptr + spos, d_tgt.size_bytes()) == 0)) // and match
if (!d_tgt.empty() && (d_tgt.size_bytes() <= (d_str.size_bytes() - spos)) && // check fit
(d_tgt.compare(in_ptr + spos, d_tgt.size_bytes()) == 0)) // and match
{
auto const d_repl = (d_repls.size() == 1) ? d_repls.element<string_view>(0)
: d_repls.element<string_view>(tgt_idx);
Expand All @@ -468,9 +468,8 @@ struct replace_multi_fn {
}
++spos;
}
if (out_ptr) // copy remainder
{
memcpy(out_ptr, in_ptr + lpos, d_str.size_bytes() - lpos);
if (out_ptr) {
memcpy(out_ptr, in_ptr + lpos, d_str.size_bytes() - lpos); // copy remainder
} else {
d_sizes[idx] = bytes;
}
Expand Down
17 changes: 17 additions & 0 deletions cpp/tests/strings/replace_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,23 @@ TEST_F(StringsReplaceTest, ReplaceMultiLong)
}
}

TEST_F(StringsReplaceTest, EmptyTarget)
{
auto const input = cudf::test::strings_column_wrapper({"hello", "world", "", "accénted"});
auto const sv = cudf::strings_column_view(input);

auto const targets = cudf::test::strings_column_wrapper({"e", "", "d"});
auto const tv = cudf::strings_column_view(targets);

auto const repls = cudf::test::strings_column_wrapper({"E", "_", "D"});
auto const rv = cudf::strings_column_view(repls);

// empty target should be ignored
auto results = cudf::strings::replace_multiple(sv, tv, rv);
auto expected = cudf::test::strings_column_wrapper({"hEllo", "worlD", "", "accéntED"});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected);
}

TEST_F(StringsReplaceTest, EmptyStringsColumn)
{
auto const zero_size_strings_column = cudf::make_empty_column(cudf::type_id::STRING)->view();
Expand Down

0 comments on commit a583c97

Please sign in to comment.