From 7dde02dee8fc3624fd3eae2a0594b93baa880725 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Wed, 17 May 2023 15:52:50 -0400 Subject: [PATCH] Deprecate cudf::strings::slice_strings that accept delimiters --- cpp/benchmarks/string/slice.cpp | 10 +- cpp/include/cudf/strings/slice.hpp | 8 +- cpp/tests/strings/slice_tests.cpp | 256 +---------------------------- 3 files changed, 9 insertions(+), 265 deletions(-) diff --git a/cpp/benchmarks/string/slice.cpp b/cpp/benchmarks/string/slice.cpp index e0b801ea0a7..6c1d7d98d3a 100644 --- a/cpp/benchmarks/string/slice.cpp +++ b/cpp/benchmarks/string/slice.cpp @@ -33,7 +33,7 @@ class StringSlice : public cudf::benchmark {}; -enum slice_type { position, multi_position, delimiter, multi_delimiter }; +enum slice_type { position, multi_position }; static void BM_slice(benchmark::State& state, slice_type rt) { @@ -47,8 +47,6 @@ static void BM_slice(benchmark::State& state, slice_type rt) auto stops_itr = thrust::constant_iterator(max_str_length / 2); cudf::test::fixed_width_column_wrapper starts(starts_itr, starts_itr + n_rows); cudf::test::fixed_width_column_wrapper stops(stops_itr, stops_itr + n_rows); - auto delim_itr = thrust::constant_iterator(" "); - cudf::test::strings_column_wrapper delimiters(delim_itr, delim_itr + n_rows); for (auto _ : state) { cuda_event_timer raii(state, true, cudf::get_default_stream()); @@ -57,10 +55,6 @@ static void BM_slice(benchmark::State& state, slice_type rt) cudf::strings::slice_strings(input, max_str_length / 3, max_str_length / 2); break; case multi_position: cudf::strings::slice_strings(input, starts, stops); break; - case delimiter: cudf::strings::slice_strings(input, std::string{" "}, 1); break; - case multi_delimiter: - cudf::strings::slice_strings(input, cudf::strings_column_view(delimiters), 1); - break; } } @@ -88,5 +82,3 @@ static void generate_bench_args(benchmark::internal::Benchmark* b) STRINGS_BENCHMARK_DEFINE(position) STRINGS_BENCHMARK_DEFINE(multi_position) -STRINGS_BENCHMARK_DEFINE(delimiter) -STRINGS_BENCHMARK_DEFINE(multi_delimiter) diff --git a/cpp/include/cudf/strings/slice.hpp b/cpp/include/cudf/strings/slice.hpp index e28d42b8154..abb9bc9dda5 100644 --- a/cpp/include/cudf/strings/slice.hpp +++ b/cpp/include/cudf/strings/slice.hpp @@ -110,6 +110,8 @@ std::unique_ptr slice_strings( /** * @brief Slices a column of strings by using a delimiter as a slice point. * + * @deprecated Since 23.06 + * * Returns a column of strings after searching for @p delimiter @p count number of * times in the source @p strings from left to right if @p count is positive or from * right to left if @p count is negative. If @p count is positive, it returns a substring @@ -144,7 +146,7 @@ std::unique_ptr slice_strings( * @param mr Resource for allocating device memory. * @return New strings column containing the substrings. */ -std::unique_ptr slice_strings( +[[deprecated]] std::unique_ptr slice_strings( strings_column_view const& strings, string_scalar const& delimiter, size_type count, @@ -153,6 +155,8 @@ std::unique_ptr slice_strings( /** * @brief Slices a column of strings by using a delimiter column as slice points. * + * @deprecated Since 23.06 + * * Returns a column of strings after searching the delimiter defined per row from * @p delimiter_strings @p count number of times in the source @p strings from left to right * if @p count is positive or from right to left if @p count is negative. If @p count is @@ -194,7 +198,7 @@ std::unique_ptr slice_strings( * @param mr Resource for allocating device memory. * @return New strings column containing the substrings. */ -std::unique_ptr slice_strings( +[[deprecated]] std::unique_ptr slice_strings( strings_column_view const& strings, strings_column_view const& delimiter_strings, size_type count, diff --git a/cpp/tests/strings/slice_tests.cpp b/cpp/tests/strings/slice_tests.cpp index ca73e1791d6..0797470dbff 100644 --- a/cpp/tests/strings/slice_tests.cpp +++ b/cpp/tests/strings/slice_tests.cpp @@ -274,10 +274,6 @@ TEST_F(StringsSliceTest, Error) auto strings_view = cudf::strings_column_view(strings); EXPECT_THROW(cudf::strings::slice_strings(strings_view, 0, 0, 0), cudf::logic_error); - auto delim_col = cudf::test::strings_column_wrapper({"", ""}); - EXPECT_THROW(cudf::strings::slice_strings(strings_view, cudf::strings_column_view{delim_col}, -1), - cudf::logic_error); - auto indexes = cudf::test::fixed_width_column_wrapper({1, 2}); EXPECT_THROW(cudf::strings::slice_strings(strings_view, indexes, indexes), cudf::logic_error); @@ -299,16 +295,10 @@ TEST_F(StringsSliceTest, ZeroSizeStringsColumn) auto results = cudf::strings::slice_strings(strings_view, 1, 2); cudf::test::expect_column_empty(results->view()); - results = cudf::strings::slice_strings(strings_view, cudf::string_scalar("foo"), 1); - cudf::test::expect_column_empty(results->view()); - cudf::column_view starts_column(cudf::data_type{cudf::type_id::INT32}, 0, nullptr, nullptr, 0); cudf::column_view stops_column(cudf::data_type{cudf::type_id::INT32}, 0, nullptr, nullptr, 0); results = cudf::strings::slice_strings(strings_view, starts_column, stops_column); cudf::test::expect_column_empty(results->view()); - - results = cudf::strings::slice_strings(strings_view, strings_view, 1); - cudf::test::expect_column_empty(results->view()); } TEST_F(StringsSliceTest, AllEmpty) @@ -317,250 +307,8 @@ TEST_F(StringsSliceTest, AllEmpty) auto strings_view = cudf::strings_column_view(strings_col); auto exp_results = cudf::column_view(strings_col); - auto results = cudf::strings::slice_strings(strings_view, cudf::string_scalar("e"), -1); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results); - results = cudf::strings::slice_strings(strings_view, strings_view, -1); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results); -} - -TEST_F(StringsSliceTest, EmptyDelimiter) -{ - auto strings_col = cudf::test::strings_column_wrapper( - {"Héllo", "thesé", "", "lease", "tést strings", ""}, {true, true, false, true, true, true}); - ; - auto strings_view = cudf::strings_column_view(strings_col); - - auto exp_results = cudf::test::strings_column_wrapper({"", "", "", "", "", ""}, - {true, true, false, true, true, true}); - - auto results = cudf::strings::slice_strings(strings_view, cudf::string_scalar(""), 1); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results); - - auto delim_col = cudf::test::strings_column_wrapper({"", "", "", "", "", ""}, - {true, false, true, false, true, false}); - - results = cudf::strings::slice_strings(strings_view, cudf::strings_column_view{delim_col}, 1); + auto results = cudf::strings::slice_strings(strings_view, 0, -1); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results); -} - -TEST_F(StringsSliceTest, ZeroCount) -{ - auto strings_col = cudf::test::strings_column_wrapper( - {"Héllo", "thesé", "", "lease", "tést strings", ""}, {true, true, false, true, true, true}); - ; - auto strings_view = cudf::strings_column_view(strings_col); - - auto exp_results = cudf::test::strings_column_wrapper({"", "", "", "", "", ""}, - {true, true, false, true, true, true}); - - auto results = cudf::strings::slice_strings(strings_view, cudf::string_scalar("é"), 0); + results = cudf::strings::slice_strings(strings_view, 0, -1); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results); - - auto delim_col = cudf::test::strings_column_wrapper({"", "", "", "", "", ""}, - {true, false, true, false, true, false}); - - results = cudf::strings::slice_strings(strings_view, cudf::strings_column_view{delim_col}, 0); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results); -} - -TEST_F(StringsSliceTest, SearchScalarDelimiter) -{ - auto strings_col = cudf::test::strings_column_wrapper( - {"Héllo", "thesé", "", "lease", "tést strings", ""}, {true, true, false, true, true, true}); - ; - auto strings_view = cudf::strings_column_view(strings_col); - - { - auto exp_results = cudf::test::strings_column_wrapper({"H", "thes", "", "lease", "t", ""}, - {true, true, false, true, true, true}); - - auto results = cudf::strings::slice_strings(strings_view, cudf::string_scalar("é"), 1); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results); - } - - { - auto exp_results = cudf::test::strings_column_wrapper( - {"llo", "", "", "lease", "st strings", ""}, {true, true, false, true, true, true}); - - auto results = cudf::strings::slice_strings(strings_view, cudf::string_scalar("é"), -1); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results); - } - - { - auto results = cudf::strings::slice_strings(strings_view, cudf::string_scalar("é"), 2); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, strings_col); - } - - { - auto results = cudf::strings::slice_strings(strings_view, cudf::string_scalar("é"), -2); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, strings_col); - } - - { - auto col0 = cudf::test::strings_column_wrapper( - {"Hello LLollooogh", "oopppllo", "", "oppollo", "polo lop apploo po", ""}, - {true, true, false, true, true, true}); - - auto exp_results = cudf::test::strings_column_wrapper({"Hello LL", "o", "", "opp", "pol", ""}, - {true, true, false, true, true, true}); - - auto results = - cudf::strings::slice_strings(cudf::strings_column_view{col0}, cudf::string_scalar("o"), 2); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results); - } - - { - auto col0 = cudf::test::strings_column_wrapper( - {"Hello LLollooogh", "oopppllo", "", "oppollo", "polo lop apploo po", ""}, - {true, true, false, true, true, true}); - - auto exp_results = cudf::test::strings_column_wrapper({"ogh", "pppllo", "", "llo", " po", ""}, - {true, true, false, true, true, true}); - - auto results = - cudf::strings::slice_strings(cudf::strings_column_view{col0}, cudf::string_scalar("o"), -2); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results); - } - - { - auto col0 = cudf::test::strings_column_wrapper( - {"Héllo HélloHéllo", "Hélloééééé", "", "éééééé", "poloéé lopéé applooéé po", ""}, - {true, true, false, true, true, true}); - - auto exp_results = cudf::test::strings_column_wrapper( - {"Héllo HélloHéllo", "Hélloééééé", "", "éééé", "poloéé lopéé apploo", ""}, - {true, true, false, true, true, true}); - - auto results = - cudf::strings::slice_strings(cudf::strings_column_view{col0}, cudf::string_scalar("éé"), 3); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results); - } - - { - auto col0 = cudf::test::strings_column_wrapper( - {"Héllo HélloHéllo", "Hélloééééé", "", "éééééé", "poloéé lopéé applooéé po", ""}, - {true, true, false, true, true, true}); - - auto exp_results = cudf::test::strings_column_wrapper( - {"Héllo HélloHéllo", "Hélloééééé", "", "éééé", " lopéé applooéé po", ""}, - {true, true, false, true, true, true}); - - auto results = - cudf::strings::slice_strings(cudf::strings_column_view{col0}, cudf::string_scalar("éé"), -3); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results); - } - - { - auto col0 = cudf::test::strings_column_wrapper({"www.yahoo.com", - "www.apache..org", - "tennis...com", - "nvidia....com", - "google...........com", - "microsoft...c.....co..m"}); - - auto exp_results = cudf::test::strings_column_wrapper( - {"www.yahoo.com", "www.apache.", "tennis..", "nvidia..", "google..", "microsoft.."}); - - auto results = - cudf::strings::slice_strings(cudf::strings_column_view{col0}, cudf::string_scalar("."), 3); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results); - } - - { - auto col0 = cudf::test::strings_column_wrapper({"www.yahoo.com", - "www.apache..org", - "tennis..com", - "nvidia....com", - "google...........com", - ".", - "microsoft...c.....co..m"}); - - auto exp_results = cudf::test::strings_column_wrapper( - {"www.yahoo.com", "www.apache..org", "tennis..com", "..com", "..com", ".", "co..m"}); - - auto results = - cudf::strings::slice_strings(cudf::strings_column_view{col0}, cudf::string_scalar(".."), -2); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results); - } -} - -TEST_F(StringsSliceTest, SearchColumnDelimiter) -{ - { - auto col0 = cudf::test::strings_column_wrapper( - {"H™élloi ™◎oo™ff™", "thesé", "", "lease™", "tést strings", "™"}, - {true, true, false, true, true, true}); - auto delim_col = cudf::test::strings_column_wrapper({"™", "™", "", "e", "t", "™"}); - - auto exp_results = cudf::test::strings_column_wrapper({"H", "thesé", "", "l", "", ""}, - {true, true, false, true, true, true}); - - auto results = cudf::strings::slice_strings( - cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, 1); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results); - } - - { - auto col0 = cudf::test::strings_column_wrapper({"H™élloff ffffi ™◎ooff™ff™", - "tffffhffesé", - "", - "lff fooff ffff eaffse™", - "tést ffstri.nffgs", - "ffff ™ ffff ff"}, - {true, true, false, true, true, true}); - auto delim_col = cudf::test::strings_column_wrapper({"ff™", "ff", "", "ff ", "t", "ff ™"}); - - auto exp_results = cudf::test::strings_column_wrapper( - {"ff™", "esé", "", "eaffse™", "ri.nffgs", " ffff ff"}, {true, true, false, true, true, true}); - - auto results = cudf::strings::slice_strings( - cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, -1); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results); - } - - { - auto col0 = cudf::test::strings_column_wrapper({"H™élloff ffffi fooff™ barff™ gooff™ ™◎ooff™ff™", - "tffffhffesé", - "", - "lff fooff ffff eaffse™", - "tést ff™ffff™ff™ffffstri.ff™ffff™nffgs", - "ffff ™ ffff ff™ ff™ff™ff™ ff™ff™ ff"}, - {true, true, false, true, true, true}); - auto delim_col = cudf::test::strings_column_wrapper({"ff™", "ff", "", "e ", "ff™ff", "ff™ff™"}, - {true, true, false, true, true, true}); - - auto exp_results = cudf::test::strings_column_wrapper({"H™élloff ffffi fooff™ barff™ goo", - "tffffh", - "", - "lff fooff ffff eaffse™", - "tést ff™ffff™ff™ffffstri.", - "ffff ™ ffff ff™ ff™ff™ff™ ff™ff™ ff"}, - {true, true, false, true, true, true}); - - auto results = cudf::strings::slice_strings( - cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, 3); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results); - } - - { - auto col0 = cudf::test::strings_column_wrapper({"H™élloff ffffi fooff™ barff™ gooff™ ™◎ooff™ff™", - "tffffhffesé", - "", - "lff fooff ffff eaffse™", - "tést ff™ffff™ff™ffffstri.ff™ffff™nffgs", - "ffff ™ ffff ff™ ff™ff™ff™ ff™ff™ ff"}); - auto delim_col = cudf::test::strings_column_wrapper({"ff™", "ff", "", "e ", "ff™ff", "ff™ff™"}, - {true, true, false, true, true, true}); - - auto exp_results = cudf::test::strings_column_wrapper({" gooff™ ™◎ooff™ff™", - "ffhffesé", - "", - "lff fooff ffff eaffse™", - "ff™ff™ffffstri.ff™ffff™nffgs", - "ffff ™ ffff ff™ ff™ff™ff™ ff™ff™ ff"}); - - auto results = cudf::strings::slice_strings( - cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, -3); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results); - } }