Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deprecate cudf::strings::slice_strings APIs that accept delimiters #13373

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 1 addition & 9 deletions cpp/benchmarks/string/slice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

class StringSlice : public cudf::benchmark {};

enum slice_type { position, multi_position, delimiter, multi_delimiter };
enum slice_type { position, multi_position };

static void BM_slice(benchmark::State& state, slice_type rt)
{
Expand All @@ -47,8 +47,6 @@ static void BM_slice(benchmark::State& state, slice_type rt)
auto stops_itr = thrust::constant_iterator<cudf::size_type>(max_str_length / 2);
cudf::test::fixed_width_column_wrapper<int32_t> starts(starts_itr, starts_itr + n_rows);
cudf::test::fixed_width_column_wrapper<int32_t> stops(stops_itr, stops_itr + n_rows);
auto delim_itr = thrust::constant_iterator<std::string>(" ");
cudf::test::strings_column_wrapper delimiters(delim_itr, delim_itr + n_rows);

for (auto _ : state) {
cuda_event_timer raii(state, true, cudf::get_default_stream());
Expand All @@ -57,10 +55,6 @@ static void BM_slice(benchmark::State& state, slice_type rt)
cudf::strings::slice_strings(input, max_str_length / 3, max_str_length / 2);
break;
case multi_position: cudf::strings::slice_strings(input, starts, stops); break;
case delimiter: cudf::strings::slice_strings(input, std::string{" "}, 1); break;
case multi_delimiter:
cudf::strings::slice_strings(input, cudf::strings_column_view(delimiters), 1);
break;
}
}

Expand Down Expand Up @@ -88,5 +82,3 @@ static void generate_bench_args(benchmark::internal::Benchmark* b)

STRINGS_BENCHMARK_DEFINE(position)
STRINGS_BENCHMARK_DEFINE(multi_position)
STRINGS_BENCHMARK_DEFINE(delimiter)
STRINGS_BENCHMARK_DEFINE(multi_delimiter)
8 changes: 6 additions & 2 deletions cpp/include/cudf/strings/slice.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ std::unique_ptr<column> slice_strings(
/**
* @brief Slices a column of strings by using a delimiter as a slice point.
*
* @deprecated Since 23.06
*
* Returns a column of strings after searching for @p delimiter @p count number of
* times in the source @p strings from left to right if @p count is positive or from
* right to left if @p count is negative. If @p count is positive, it returns a substring
Expand Down Expand Up @@ -144,7 +146,7 @@ std::unique_ptr<column> slice_strings(
* @param mr Resource for allocating device memory.
* @return New strings column containing the substrings.
*/
std::unique_ptr<column> slice_strings(
[[deprecated]] std::unique_ptr<column> slice_strings(
strings_column_view const& strings,
string_scalar const& delimiter,
size_type count,
Expand All @@ -153,6 +155,8 @@ std::unique_ptr<column> slice_strings(
/**
* @brief Slices a column of strings by using a delimiter column as slice points.
*
* @deprecated Since 23.06
*
* Returns a column of strings after searching the delimiter defined per row from
* @p delimiter_strings @p count number of times in the source @p strings from left to right
* if @p count is positive or from right to left if @p count is negative. If @p count is
Expand Down Expand Up @@ -194,7 +198,7 @@ std::unique_ptr<column> slice_strings(
* @param mr Resource for allocating device memory.
* @return New strings column containing the substrings.
*/
std::unique_ptr<column> slice_strings(
[[deprecated]] std::unique_ptr<column> slice_strings(
strings_column_view const& strings,
strings_column_view const& delimiter_strings,
size_type count,
Expand Down
256 changes: 2 additions & 254 deletions cpp/tests/strings/slice_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -274,10 +274,6 @@ TEST_F(StringsSliceTest, Error)
auto strings_view = cudf::strings_column_view(strings);
EXPECT_THROW(cudf::strings::slice_strings(strings_view, 0, 0, 0), cudf::logic_error);

auto delim_col = cudf::test::strings_column_wrapper({"", ""});
EXPECT_THROW(cudf::strings::slice_strings(strings_view, cudf::strings_column_view{delim_col}, -1),
cudf::logic_error);

auto indexes = cudf::test::fixed_width_column_wrapper<int32_t>({1, 2});
EXPECT_THROW(cudf::strings::slice_strings(strings_view, indexes, indexes), cudf::logic_error);

Expand All @@ -299,16 +295,10 @@ TEST_F(StringsSliceTest, ZeroSizeStringsColumn)
auto results = cudf::strings::slice_strings(strings_view, 1, 2);
cudf::test::expect_column_empty(results->view());

results = cudf::strings::slice_strings(strings_view, cudf::string_scalar("foo"), 1);
cudf::test::expect_column_empty(results->view());

cudf::column_view starts_column(cudf::data_type{cudf::type_id::INT32}, 0, nullptr, nullptr, 0);
cudf::column_view stops_column(cudf::data_type{cudf::type_id::INT32}, 0, nullptr, nullptr, 0);
results = cudf::strings::slice_strings(strings_view, starts_column, stops_column);
cudf::test::expect_column_empty(results->view());

results = cudf::strings::slice_strings(strings_view, strings_view, 1);
cudf::test::expect_column_empty(results->view());
}

TEST_F(StringsSliceTest, AllEmpty)
Expand All @@ -317,250 +307,8 @@ TEST_F(StringsSliceTest, AllEmpty)
auto strings_view = cudf::strings_column_view(strings_col);
auto exp_results = cudf::column_view(strings_col);

auto results = cudf::strings::slice_strings(strings_view, cudf::string_scalar("e"), -1);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results);
results = cudf::strings::slice_strings(strings_view, strings_view, -1);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results);
}

TEST_F(StringsSliceTest, EmptyDelimiter)
{
auto strings_col = cudf::test::strings_column_wrapper(
{"Héllo", "thesé", "", "lease", "tést strings", ""}, {true, true, false, true, true, true});
;
auto strings_view = cudf::strings_column_view(strings_col);

auto exp_results = cudf::test::strings_column_wrapper({"", "", "", "", "", ""},
{true, true, false, true, true, true});

auto results = cudf::strings::slice_strings(strings_view, cudf::string_scalar(""), 1);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results);

auto delim_col = cudf::test::strings_column_wrapper({"", "", "", "", "", ""},
{true, false, true, false, true, false});

results = cudf::strings::slice_strings(strings_view, cudf::strings_column_view{delim_col}, 1);
auto results = cudf::strings::slice_strings(strings_view, 0, -1);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results);
}

TEST_F(StringsSliceTest, ZeroCount)
{
auto strings_col = cudf::test::strings_column_wrapper(
{"Héllo", "thesé", "", "lease", "tést strings", ""}, {true, true, false, true, true, true});
;
auto strings_view = cudf::strings_column_view(strings_col);

auto exp_results = cudf::test::strings_column_wrapper({"", "", "", "", "", ""},
{true, true, false, true, true, true});

auto results = cudf::strings::slice_strings(strings_view, cudf::string_scalar("é"), 0);
results = cudf::strings::slice_strings(strings_view, 0, -1);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results);

auto delim_col = cudf::test::strings_column_wrapper({"", "", "", "", "", ""},
{true, false, true, false, true, false});

results = cudf::strings::slice_strings(strings_view, cudf::strings_column_view{delim_col}, 0);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results);
}

TEST_F(StringsSliceTest, SearchScalarDelimiter)
{
auto strings_col = cudf::test::strings_column_wrapper(
{"Héllo", "thesé", "", "lease", "tést strings", ""}, {true, true, false, true, true, true});
;
auto strings_view = cudf::strings_column_view(strings_col);

{
auto exp_results = cudf::test::strings_column_wrapper({"H", "thes", "", "lease", "t", ""},
{true, true, false, true, true, true});

auto results = cudf::strings::slice_strings(strings_view, cudf::string_scalar("é"), 1);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results);
}

{
auto exp_results = cudf::test::strings_column_wrapper(
{"llo", "", "", "lease", "st strings", ""}, {true, true, false, true, true, true});

auto results = cudf::strings::slice_strings(strings_view, cudf::string_scalar("é"), -1);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results);
}

{
auto results = cudf::strings::slice_strings(strings_view, cudf::string_scalar("é"), 2);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, strings_col);
}

{
auto results = cudf::strings::slice_strings(strings_view, cudf::string_scalar("é"), -2);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, strings_col);
}

{
auto col0 = cudf::test::strings_column_wrapper(
{"Hello LLollooogh", "oopppllo", "", "oppollo", "polo lop apploo po", ""},
{true, true, false, true, true, true});

auto exp_results = cudf::test::strings_column_wrapper({"Hello LL", "o", "", "opp", "pol", ""},
{true, true, false, true, true, true});

auto results =
cudf::strings::slice_strings(cudf::strings_column_view{col0}, cudf::string_scalar("o"), 2);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results);
}

{
auto col0 = cudf::test::strings_column_wrapper(
{"Hello LLollooogh", "oopppllo", "", "oppollo", "polo lop apploo po", ""},
{true, true, false, true, true, true});

auto exp_results = cudf::test::strings_column_wrapper({"ogh", "pppllo", "", "llo", " po", ""},
{true, true, false, true, true, true});

auto results =
cudf::strings::slice_strings(cudf::strings_column_view{col0}, cudf::string_scalar("o"), -2);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results);
}

{
auto col0 = cudf::test::strings_column_wrapper(
{"Héllo HélloHéllo", "Hélloééééé", "", "éééééé", "poloéé lopéé applooéé po", ""},
{true, true, false, true, true, true});

auto exp_results = cudf::test::strings_column_wrapper(
{"Héllo HélloHéllo", "Hélloééééé", "", "éééé", "poloéé lopéé apploo", ""},
{true, true, false, true, true, true});

auto results =
cudf::strings::slice_strings(cudf::strings_column_view{col0}, cudf::string_scalar("éé"), 3);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results);
}

{
auto col0 = cudf::test::strings_column_wrapper(
{"Héllo HélloHéllo", "Hélloééééé", "", "éééééé", "poloéé lopéé applooéé po", ""},
{true, true, false, true, true, true});

auto exp_results = cudf::test::strings_column_wrapper(
{"Héllo HélloHéllo", "Hélloééééé", "", "éééé", " lopéé applooéé po", ""},
{true, true, false, true, true, true});

auto results =
cudf::strings::slice_strings(cudf::strings_column_view{col0}, cudf::string_scalar("éé"), -3);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results);
}

{
auto col0 = cudf::test::strings_column_wrapper({"www.yahoo.com",
"www.apache..org",
"tennis...com",
"nvidia....com",
"google...........com",
"microsoft...c.....co..m"});

auto exp_results = cudf::test::strings_column_wrapper(
{"www.yahoo.com", "www.apache.", "tennis..", "nvidia..", "google..", "microsoft.."});

auto results =
cudf::strings::slice_strings(cudf::strings_column_view{col0}, cudf::string_scalar("."), 3);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results);
}

{
auto col0 = cudf::test::strings_column_wrapper({"www.yahoo.com",
"www.apache..org",
"tennis..com",
"nvidia....com",
"google...........com",
".",
"microsoft...c.....co..m"});

auto exp_results = cudf::test::strings_column_wrapper(
{"www.yahoo.com", "www.apache..org", "tennis..com", "..com", "..com", ".", "co..m"});

auto results =
cudf::strings::slice_strings(cudf::strings_column_view{col0}, cudf::string_scalar(".."), -2);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results);
}
}

TEST_F(StringsSliceTest, SearchColumnDelimiter)
{
{
auto col0 = cudf::test::strings_column_wrapper(
{"H™élloi ™◎oo™ff™", "thesé", "", "lease™", "tést strings", "™"},
{true, true, false, true, true, true});
auto delim_col = cudf::test::strings_column_wrapper({"™", "™", "", "e", "t", "™"});

auto exp_results = cudf::test::strings_column_wrapper({"H", "thesé", "", "l", "", ""},
{true, true, false, true, true, true});

auto results = cudf::strings::slice_strings(
cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, 1);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results);
}

{
auto col0 = cudf::test::strings_column_wrapper({"H™élloff ffffi ™◎ooff™ff™",
"tffffhffesé",
"",
"lff fooff ffff eaffse™",
"tést ffstri.nffgs",
"ffff ™ ffff ff"},
{true, true, false, true, true, true});
auto delim_col = cudf::test::strings_column_wrapper({"ff™", "ff", "", "ff ", "t", "ff ™"});

auto exp_results = cudf::test::strings_column_wrapper(
{"ff™", "esé", "", "eaffse™", "ri.nffgs", " ffff ff"}, {true, true, false, true, true, true});

auto results = cudf::strings::slice_strings(
cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, -1);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results);
}

{
auto col0 = cudf::test::strings_column_wrapper({"H™élloff ffffi fooff™ barff™ gooff™ ™◎ooff™ff™",
"tffffhffesé",
"",
"lff fooff ffff eaffse™",
"tést ff™ffff™ff™ffffstri.ff™ffff™nffgs",
"ffff ™ ffff ff™ ff™ff™ff™ ff™ff™ ff"},
{true, true, false, true, true, true});
auto delim_col = cudf::test::strings_column_wrapper({"ff™", "ff", "", "e ", "ff™ff", "ff™ff™"},
{true, true, false, true, true, true});

auto exp_results = cudf::test::strings_column_wrapper({"H™élloff ffffi fooff™ barff™ goo",
"tffffh",
"",
"lff fooff ffff eaffse™",
"tést ff™ffff™ff™ffffstri.",
"ffff ™ ffff ff™ ff™ff™ff™ ff™ff™ ff"},
{true, true, false, true, true, true});

auto results = cudf::strings::slice_strings(
cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, 3);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results);
}

{
auto col0 = cudf::test::strings_column_wrapper({"H™élloff ffffi fooff™ barff™ gooff™ ™◎ooff™ff™",
"tffffhffesé",
"",
"lff fooff ffff eaffse™",
"tést ff™ffff™ff™ffffstri.ff™ffff™nffgs",
"ffff ™ ffff ff™ ff™ff™ff™ ff™ff™ ff"});
auto delim_col = cudf::test::strings_column_wrapper({"ff™", "ff", "", "e ", "ff™ff", "ff™ff™"},
{true, true, false, true, true, true});

auto exp_results = cudf::test::strings_column_wrapper({" gooff™ ™◎ooff™ff™",
"ffhffesé",
"",
"lff fooff ffff eaffse™",
"ff™ff™ffffstri.ff™ffff™nffgs",
"ffff ™ ffff ff™ ff™ff™ff™ ff™ff™ ff"});

auto results = cudf::strings::slice_strings(
cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, -3);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results);
}
}