Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add stream parameter to public cudf::strings::split APIs #13997

Merged
merged 4 commits into from
Aug 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 32 additions & 24 deletions cpp/include/cudf/strings/split/split.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -43,18 +43,20 @@ namespace strings {
*
* Any null string entries return corresponding null output columns.
*
* @param strings_column Strings instance for this operation.
* @param delimiter UTF-8 encoded string indicating the split points in each string.
* @param strings_column Strings instance for this operation
* @param delimiter UTF-8 encoded string indicating the split points in each string;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not a huge fan of the semicolon changes, but don't want to spend too much effort on that discussion.

* Default of empty string indicates split on whitespace.
* @param maxsplit Maximum number of splits to perform.
* @param maxsplit Maximum number of splits to perform;
* Default of -1 indicates all possible splits on each string.
* @param mr Device memory resource used to allocate the returned table's device memory.
* @return New table of strings columns.
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned table's device memory
* @return New table of strings columns
*/
std::unique_ptr<table> split(
strings_column_view const& strings_column,
string_scalar const& delimiter = string_scalar(""),
size_type maxsplit = -1,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -71,18 +73,20 @@ std::unique_ptr<table> split(
*
* Any null string entries return corresponding null output columns.
*
* @param strings_column Strings instance for this operation.
* @param delimiter UTF-8 encoded string indicating the split points in each string.
* @param strings_column Strings instance for this operation
* @param delimiter UTF-8 encoded string indicating the split points in each string;
* Default of empty string indicates split on whitespace.
* @param maxsplit Maximum number of splits to perform.
* @param maxsplit Maximum number of splits to perform;
* Default of -1 indicates all possible splits on each string.
* @param mr Device memory resource used to allocate the returned table's device memory.
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned table's device memory
* @return New strings columns.
*/
std::unique_ptr<table> rsplit(
strings_column_view const& strings_column,
string_scalar const& delimiter = string_scalar(""),
size_type maxsplit = -1,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand Down Expand Up @@ -141,20 +145,22 @@ std::unique_ptr<table> rsplit(
*
* @throw cudf:logic_error if `delimiter` is invalid.
*
* @param strings A column of string elements to be split.
* @param delimiter The string to identify split points in each string.
* @param strings A column of string elements to be split
* @param delimiter The string to identify split points in each string;
* Default of empty string indicates split on whitespace.
* @param maxsplit Maximum number of splits to perform.
* Default of -1 indicates all possible splits on each string.
* @param mr Device memory resource used to allocate the returned result's device memory.
* @return Lists column of strings
* Each vector of the lists column holds splits from a single row
* @param maxsplit Maximum number of splits to perform;
* Default of -1 indicates all possible splits on each string
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned result's device memory
* @return Lists column of strings;
* Each row of the lists column holds splits from a single row
* element of the input column.
*/
std::unique_ptr<column> split_record(
strings_column_view const& strings,
string_scalar const& delimiter = string_scalar(""),
size_type maxsplit = -1,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand Down Expand Up @@ -218,20 +224,22 @@ std::unique_ptr<column> split_record(
*
* @throw cudf:logic_error if `delimiter` is invalid.
*
* @param strings A column of string elements to be split.
* @param delimiter The string to identify split points in each string.
* @param strings A column of string elements to be split
* @param delimiter The string to identify split points in each string;
* Default of empty string indicates split on whitespace.
* @param maxsplit Maximum number of splits to perform.
* Default of -1 indicates all possible splits on each string.
* @param mr Device memory resource used to allocate the returned result's device memory.
* @return Lists column of strings
* Each vector of the lists column holds splits from a single row
* @param maxsplit Maximum number of splits to perform;
* Default of -1 indicates all possible splits on each string
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned result's device memory
* @return Lists column of strings;
* Each row of the lists column holds splits from a single row
* element of the input column.
*/
std::unique_ptr<column> rsplit_record(
strings_column_view const& strings,
string_scalar const& delimiter = string_scalar(""),
size_type maxsplit = -1,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of doxygen group
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/strings/split/split.cu
Original file line number Diff line number Diff line change
Expand Up @@ -431,19 +431,21 @@ std::unique_ptr<table> rsplit(strings_column_view const& strings_column,
std::unique_ptr<table> split(strings_column_view const& strings_column,
string_scalar const& delimiter,
size_type maxsplit,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::split(strings_column, delimiter, maxsplit, cudf::get_default_stream(), mr);
return detail::split(strings_column, delimiter, maxsplit, stream, mr);
}

std::unique_ptr<table> rsplit(strings_column_view const& strings_column,
string_scalar const& delimiter,
size_type maxsplit,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::rsplit(strings_column, delimiter, maxsplit, cudf::get_default_stream(), mr);
return detail::rsplit(strings_column, delimiter, maxsplit, stream, mr);
}

} // namespace strings
Expand Down
7 changes: 4 additions & 3 deletions cpp/src/strings/split/split_record.cu
Original file line number Diff line number Diff line change
Expand Up @@ -203,21 +203,22 @@ std::unique_ptr<column> split_record(strings_column_view const& strings,
std::unique_ptr<column> split_record(strings_column_view const& strings,
string_scalar const& delimiter,
size_type maxsplit,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::split_record<detail::Direction::FORWARD>(
strings, delimiter, maxsplit, cudf::get_default_stream(), mr);
return detail::split_record<detail::Direction::FORWARD>(strings, delimiter, maxsplit, stream, mr);
}

std::unique_ptr<column> rsplit_record(strings_column_view const& strings,
string_scalar const& delimiter,
size_type maxsplit,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::split_record<detail::Direction::BACKWARD>(
strings, delimiter, maxsplit, cudf::get_default_stream(), mr);
strings, delimiter, maxsplit, stream, mr);
}

} // namespace strings
Expand Down