Skip to content

Commit

Permalink
Expose streams in public strings case APIs (#14056)
Browse files Browse the repository at this point in the history
Add stream parameter to public strings APIs:
- `cudf::strings::capitalize()` 
- `cudf::strings::title()`
- `cudf::strings::is_title()`
- `cudf::strings::to_lower()`
- `cudf::strings::to_upper()`
- `cudf::strings::swapcase()`

Reference #13744

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Mark Harris (https://github.com/harrism)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #14056
  • Loading branch information
davidwendt authored Sep 18, 2023
1 parent 2acd3df commit bdc1f3a
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 18 deletions.
28 changes: 17 additions & 11 deletions cpp/include/cudf/strings/capitalize.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -50,16 +50,18 @@ namespace strings {
*
* Any null string entries return corresponding null output column entries.
*
* @throw cudf::logic_error if `delimiter.is_valid()` is `false`.
* @throw cudf::logic_error if `delimiter.is_valid()` is `false`.
*
* @param input String column.
* @param delimiters Characters for identifying words to capitalize.
* @param input String column
* @param delimiters Characters for identifying words to capitalize
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return Column of strings capitalized from the input column.
* @return Column of strings capitalized from the input column
*/
std::unique_ptr<column> capitalize(
strings_column_view const& input,
string_scalar const& delimiters = string_scalar(""),
string_scalar const& delimiters = string_scalar("", true, cudf::get_default_stream()),
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -83,14 +85,16 @@ std::unique_ptr<column> capitalize(
*
* Any null string entries return corresponding null output column entries.
*
* @param input String column.
* @param sequence_type The character type that is used when identifying words.
* @param input String column
* @param sequence_type The character type that is used when identifying words
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return Column of titled strings.
* @return Column of titled strings
*/
std::unique_ptr<column> title(
strings_column_view const& input,
string_character_types sequence_type = string_character_types::ALPHA,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -112,12 +116,14 @@ std::unique_ptr<column> title(
*
* Any null string entries result in corresponding null output column entries.
*
* @param input String column.
* @param input String column
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return Column of type BOOL8.
* @return Column of type BOOL8
*/
std::unique_ptr<column> is_title(
strings_column_view const& input,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of doxygen group
Expand Down
8 changes: 7 additions & 1 deletion cpp/include/cudf/strings/case.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -38,11 +38,13 @@ namespace strings {
* Any null entries create null entries in the output column.
*
* @param strings Strings instance for this operation.
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New column of strings with characters converted.
*/
std::unique_ptr<column> to_lower(
strings_column_view const& strings,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -55,11 +57,13 @@ std::unique_ptr<column> to_lower(
* Any null entries create null entries in the output column.
*
* @param strings Strings instance for this operation.
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New column of strings with characters converted.
*/
std::unique_ptr<column> to_upper(
strings_column_view const& strings,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -73,11 +77,13 @@ std::unique_ptr<column> to_upper(
* Any null entries create null entries in the output column.
*
* @param strings Strings instance for this operation.
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New column of strings with characters converted.
*/
std::unique_ptr<column> swapcase(
strings_column_view const& strings,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of doxygen group
Expand Down
9 changes: 6 additions & 3 deletions cpp/src/strings/capitalize.cu
Original file line number Diff line number Diff line change
Expand Up @@ -287,25 +287,28 @@ std::unique_ptr<column> is_title(strings_column_view const& input,

std::unique_ptr<column> capitalize(strings_column_view const& input,
string_scalar const& delimiter,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::capitalize(input, delimiter, cudf::get_default_stream(), mr);
return detail::capitalize(input, delimiter, stream, mr);
}

std::unique_ptr<column> title(strings_column_view const& input,
string_character_types sequence_type,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::title(input, sequence_type, cudf::get_default_stream(), mr);
return detail::title(input, sequence_type, stream, mr);
}

std::unique_ptr<column> is_title(strings_column_view const& input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::is_title(input, cudf::get_default_stream(), mr);
return detail::is_title(input, stream, mr);
}

} // namespace strings
Expand Down
9 changes: 6 additions & 3 deletions cpp/src/strings/case.cu
Original file line number Diff line number Diff line change
Expand Up @@ -310,24 +310,27 @@ std::unique_ptr<column> swapcase(strings_column_view const& strings,
// APIs

std::unique_ptr<column> to_lower(strings_column_view const& strings,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::to_lower(strings, cudf::get_default_stream(), mr);
return detail::to_lower(strings, stream, mr);
}

std::unique_ptr<column> to_upper(strings_column_view const& strings,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::to_upper(strings, cudf::get_default_stream(), mr);
return detail::to_upper(strings, stream, mr);
}

std::unique_ptr<column> swapcase(strings_column_view const& strings,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::swapcase(strings, cudf::get_default_stream(), mr);
return detail::swapcase(strings, stream, mr);
}

} // namespace strings
Expand Down
1 change: 1 addition & 0 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,7 @@ ConfigureTest(STREAM_CONCATENATE_TEST streams/concatenate_test.cpp STREAM_MODE t
ConfigureTest(STREAM_FILLING_TEST streams/filling_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_STRINGS_TEST streams/strings/case_test.cpp STREAM_MODE testing)

# ##################################################################################################
# Install tests ####################################################################################
Expand Down
55 changes: 55 additions & 0 deletions cpp/tests/streams/strings/case_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf/strings/capitalize.hpp>
#include <cudf/strings/case.hpp>

#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/default_stream.hpp>

class StringsCaseTest : public cudf::test::BaseFixture {};

TEST_F(StringsCaseTest, LowerUpper)
{
auto const input =
cudf::test::strings_column_wrapper({"",
"The quick brown fox",
"jumps over the lazy dog.",
"all work and no play makes Jack a dull boy",
R"(!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~)"});
auto view = cudf::strings_column_view(input);

cudf::strings::to_lower(view, cudf::test::get_default_stream());
cudf::strings::to_upper(view, cudf::test::get_default_stream());
cudf::strings::swapcase(view, cudf::test::get_default_stream());
}

TEST_F(StringsCaseTest, Capitalize)
{
auto const input =
cudf::test::strings_column_wrapper({"",
"The Quick Brown Fox",
"jumps over the lazy dog",
"all work and no play makes Jack a dull boy"});
auto view = cudf::strings_column_view(input);

auto const delimiter = cudf::string_scalar(" ", true, cudf::test::get_default_stream());
cudf::strings::capitalize(view, delimiter, cudf::test::get_default_stream());
cudf::strings::is_title(view, cudf::test::get_default_stream());
cudf::strings::title(
view, cudf::strings::string_character_types::ALPHA, cudf::test::get_default_stream());
}

0 comments on commit bdc1f3a

Please sign in to comment.