From bdc1f3a6e1f383cd689ba8e92903b89e49cdb8d8 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Mon, 18 Sep 2023 19:34:29 -0400 Subject: [PATCH] Expose streams in public strings case APIs (#14056) Add stream parameter to public strings APIs: - `cudf::strings::capitalize()` - `cudf::strings::title()` - `cudf::strings::is_title()` - `cudf::strings::to_lower()` - `cudf::strings::to_upper()` - `cudf::strings::swapcase()` Reference #13744 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Mark Harris (https://github.com/harrism) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/14056 --- cpp/include/cudf/strings/capitalize.hpp | 28 ++++++++----- cpp/include/cudf/strings/case.hpp | 8 +++- cpp/src/strings/capitalize.cu | 9 ++-- cpp/src/strings/case.cu | 9 ++-- cpp/tests/CMakeLists.txt | 1 + cpp/tests/streams/strings/case_test.cpp | 55 +++++++++++++++++++++++++ 6 files changed, 92 insertions(+), 18 deletions(-) create mode 100644 cpp/tests/streams/strings/case_test.cpp diff --git a/cpp/include/cudf/strings/capitalize.hpp b/cpp/include/cudf/strings/capitalize.hpp index 6d01ab047ba..57375e9ac6a 100644 --- a/cpp/include/cudf/strings/capitalize.hpp +++ b/cpp/include/cudf/strings/capitalize.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -50,16 +50,18 @@ namespace strings { * * Any null string entries return corresponding null output column entries. * - * @throw cudf::logic_error if `delimiter.is_valid()` is `false`. + * @throw cudf::logic_error if `delimiter.is_valid()` is `false`. * - * @param input String column. - * @param delimiters Characters for identifying words to capitalize. + * @param input String column + * @param delimiters Characters for identifying words to capitalize + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory - * @return Column of strings capitalized from the input column. + * @return Column of strings capitalized from the input column */ std::unique_ptr capitalize( strings_column_view const& input, - string_scalar const& delimiters = string_scalar(""), + string_scalar const& delimiters = string_scalar("", true, cudf::get_default_stream()), + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -83,14 +85,16 @@ std::unique_ptr capitalize( * * Any null string entries return corresponding null output column entries. * - * @param input String column. - * @param sequence_type The character type that is used when identifying words. + * @param input String column + * @param sequence_type The character type that is used when identifying words + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory - * @return Column of titled strings. + * @return Column of titled strings */ std::unique_ptr title( strings_column_view const& input, string_character_types sequence_type = string_character_types::ALPHA, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -112,12 +116,14 @@ std::unique_ptr title( * * Any null string entries result in corresponding null output column entries. * - * @param input String column. + * @param input String column + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory - * @return Column of type BOOL8. + * @return Column of type BOOL8 */ std::unique_ptr is_title( strings_column_view const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/strings/case.hpp b/cpp/include/cudf/strings/case.hpp index 06ba4f8d882..94191686a92 100644 --- a/cpp/include/cudf/strings/case.hpp +++ b/cpp/include/cudf/strings/case.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,11 +38,13 @@ namespace strings { * Any null entries create null entries in the output column. * * @param strings Strings instance for this operation. + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory. * @return New column of strings with characters converted. */ std::unique_ptr to_lower( strings_column_view const& strings, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -55,11 +57,13 @@ std::unique_ptr to_lower( * Any null entries create null entries in the output column. * * @param strings Strings instance for this operation. + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory. * @return New column of strings with characters converted. */ std::unique_ptr to_upper( strings_column_view const& strings, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -73,11 +77,13 @@ std::unique_ptr to_upper( * Any null entries create null entries in the output column. * * @param strings Strings instance for this operation. + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory. * @return New column of strings with characters converted. */ std::unique_ptr swapcase( strings_column_view const& strings, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/src/strings/capitalize.cu b/cpp/src/strings/capitalize.cu index 4e248922702..c555031b588 100644 --- a/cpp/src/strings/capitalize.cu +++ b/cpp/src/strings/capitalize.cu @@ -287,25 +287,28 @@ std::unique_ptr is_title(strings_column_view const& input, std::unique_ptr capitalize(strings_column_view const& input, string_scalar const& delimiter, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::capitalize(input, delimiter, cudf::get_default_stream(), mr); + return detail::capitalize(input, delimiter, stream, mr); } std::unique_ptr title(strings_column_view const& input, string_character_types sequence_type, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::title(input, sequence_type, cudf::get_default_stream(), mr); + return detail::title(input, sequence_type, stream, mr); } std::unique_ptr is_title(strings_column_view const& input, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_title(input, cudf::get_default_stream(), mr); + return detail::is_title(input, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/case.cu b/cpp/src/strings/case.cu index c5fe7a19f53..8f4c2ee574a 100644 --- a/cpp/src/strings/case.cu +++ b/cpp/src/strings/case.cu @@ -310,24 +310,27 @@ std::unique_ptr swapcase(strings_column_view const& strings, // APIs std::unique_ptr to_lower(strings_column_view const& strings, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::to_lower(strings, cudf::get_default_stream(), mr); + return detail::to_lower(strings, stream, mr); } std::unique_ptr to_upper(strings_column_view const& strings, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::to_upper(strings, cudf::get_default_stream(), mr); + return detail::to_upper(strings, stream, mr); } std::unique_ptr swapcase(strings_column_view const& strings, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::swapcase(strings, cudf::get_default_stream(), mr); + return detail::swapcase(strings, stream, mr); } } // namespace strings diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index a69dc9bf2f8..4923ef5c903 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -627,6 +627,7 @@ ConfigureTest(STREAM_CONCATENATE_TEST streams/concatenate_test.cpp STREAM_MODE t ConfigureTest(STREAM_FILLING_TEST streams/filling_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing) +ConfigureTest(STREAM_STRINGS_TEST streams/strings/case_test.cpp STREAM_MODE testing) # ################################################################################################## # Install tests #################################################################################### diff --git a/cpp/tests/streams/strings/case_test.cpp b/cpp/tests/streams/strings/case_test.cpp new file mode 100644 index 00000000000..df3eabd773a --- /dev/null +++ b/cpp/tests/streams/strings/case_test.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +class StringsCaseTest : public cudf::test::BaseFixture {}; + +TEST_F(StringsCaseTest, LowerUpper) +{ + auto const input = + cudf::test::strings_column_wrapper({"", + "The quick brown fox", + "jumps over the lazy dog.", + "all work and no play makes Jack a dull boy", + R"(!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~)"}); + auto view = cudf::strings_column_view(input); + + cudf::strings::to_lower(view, cudf::test::get_default_stream()); + cudf::strings::to_upper(view, cudf::test::get_default_stream()); + cudf::strings::swapcase(view, cudf::test::get_default_stream()); +} + +TEST_F(StringsCaseTest, Capitalize) +{ + auto const input = + cudf::test::strings_column_wrapper({"", + "The Quick Brown Fox", + "jumps over the lazy dog", + "all work and no play makes Jack a dull boy"}); + auto view = cudf::strings_column_view(input); + + auto const delimiter = cudf::string_scalar(" ", true, cudf::test::get_default_stream()); + cudf::strings::capitalize(view, delimiter, cudf::test::get_default_stream()); + cudf::strings::is_title(view, cudf::test::get_default_stream()); + cudf::strings::title( + view, cudf::strings::string_character_types::ALPHA, cudf::test::get_default_stream()); +}