From 097d6f2e444c40ec7b1172dd9158a0ca24b5b1df Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Fri, 21 Apr 2023 20:11:14 -0400 Subject: [PATCH] Fix slice_strings to return empty strings for stop < start indices (#13178) Fixes bug where `stop` value is less than `start` value in calls to `cudf::strings::slice_strings` should result in an empty string. Optimization in #13057 introduced this bug. Additional gtest was added to check for this condition. Close #13173 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Jason Lowe (https://github.com/jlowe) - Nghia Truong (https://github.com/ttnghia) - MithunR (https://github.com/mythrocks) URL: https://github.com/rapidsai/cudf/pull/13178 --- cpp/src/strings/slice.cu | 2 +- cpp/tests/strings/slice_tests.cpp | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/cpp/src/strings/slice.cu b/cpp/src/strings/slice.cu index fbe54c01bba..a3182577c34 100644 --- a/cpp/src/strings/slice.cu +++ b/cpp/src/strings/slice.cu @@ -59,7 +59,7 @@ struct substring_from_fn { auto const stop = stops[idx]; auto const end = (((stop < 0) || (stop > length)) ? length : stop); - return d_str.substr(start, end - start); + return start < end ? d_str.substr(start, end - start) : string_view{}; } substring_from_fn(column_device_view const& d_column, IndexIterator starts, IndexIterator stops) diff --git a/cpp/tests/strings/slice_tests.cpp b/cpp/tests/strings/slice_tests.cpp index 8e0c3b325b2..ca73e1791d6 100644 --- a/cpp/tests/strings/slice_tests.cpp +++ b/cpp/tests/strings/slice_tests.cpp @@ -93,6 +93,24 @@ TEST_P(Parameters, Substring_From) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } +TEST_P(Parameters, SubstringStopZero) +{ + cudf::size_type start = GetParam(); + cudf::test::strings_column_wrapper input({"abc", "défgh", "", "XYZ"}); + auto view = cudf::strings_column_view(input); + + auto results = cudf::strings::slice_strings(view, start, 0); + cudf::test::strings_column_wrapper expected({"", "", "", ""}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + + auto starts = + cudf::test::fixed_width_column_wrapper({start, start, start, start}); + auto stops = cudf::test::fixed_width_column_wrapper({0, 0, 0, 0}); + + results = cudf::strings::slice_strings(view, starts, stops); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); +} + TEST_P(Parameters, AllEmpty) { std::vector h_strings{"", "", "", ""};