Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose stream parameter in public strings combine APIs #14281

Merged
merged 4 commits into from
Oct 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 48 additions & 40 deletions cpp/include/cudf/strings/combine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,18 +66,20 @@ enum class output_if_empty_list {
*
* @throw cudf::logic_error if separator is not valid.
*
* @param strings Strings for this operation.
* @param input Strings for this operation
* @param separator String that should inserted between each string.
* Default is an empty string.
* @param narep String that should represent any null strings found.
* @param narep String to replace any null strings found.
* Default of invalid-scalar will ignore any null entries.
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New column containing one string.
*/
std::unique_ptr<column> join_strings(
strings_column_view const& strings,
strings_column_view const& input,
string_scalar const& separator = string_scalar(""),
string_scalar const& narep = string_scalar("", false),
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand Down Expand Up @@ -127,25 +129,25 @@ std::unique_ptr<column> join_strings(
* @throw cudf::logic_error if the number of rows from @p separators and @p strings_columns
* do not match
*
* @param strings_columns List of strings columns to concatenate.
* @param strings_columns List of strings columns to concatenate
* @param separators Strings column that provides the separator for a given row
* @param separator_narep String that should be used in place of a null separator for a given
* row. Default of invalid-scalar means no row separator value replacements.
* Default is an invalid string.
* @param col_narep String that should be used in place of any null strings
* found in any column. Default of invalid-scalar means no null column value replacements.
* Default is an invalid string.
* @param separator_narep String to replace a null separator for a given row.
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
* Default of invalid-scalar means no row separator value replacements.
* @param col_narep String that should be used in place of any null strings found in any column.
* Default of invalid-scalar means no null column value replacements.
* @param separate_nulls If YES, then the separator is included for null rows
* if `col_narep` is valid.
* @param mr Resource for allocating device memory.
* @return New column with concatenated results.
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Resource for allocating device memory
* @return New column with concatenated results
*/
std::unique_ptr<column> concatenate(
table_view const& strings_columns,
strings_column_view const& separators,
string_scalar const& separator_narep = string_scalar("", false),
string_scalar const& col_narep = string_scalar("", false),
separator_on_nulls separate_nulls = separator_on_nulls::YES,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand Down Expand Up @@ -184,21 +186,23 @@ std::unique_ptr<column> concatenate(
* @throw cudf::logic_error if separator is not valid.
* @throw cudf::logic_error if only one column is specified
*
* @param strings_columns List of string columns to concatenate.
* @param strings_columns List of string columns to concatenate
* @param separator String that should inserted between each string from each row.
* Default is an empty string.
* @param narep String that should be used in place of any null strings
* found in any column. Default of invalid-scalar means any null entry in any column will
* @param narep String to replace any null strings found in any column.
* Default of invalid-scalar means any null entry in any column will
* produces a null result for that row.
* @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New column with concatenated results.
* @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New column with concatenated results
*/
std::unique_ptr<column> concatenate(
table_view const& strings_columns,
string_scalar const& separator = string_scalar(""),
string_scalar const& narep = string_scalar("", false),
separator_on_nulls separate_nulls = separator_on_nulls::YES,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand Down Expand Up @@ -243,19 +247,20 @@ std::unique_ptr<column> concatenate(
* @throw cudf::logic_error if the number of rows from `separators` and `lists_strings_column` do
* not match
*
* @param lists_strings_column Column containing lists of strings to concatenate.
* @param separators Strings column that provides separators for concatenation.
* @param separator_narep String that should be used to replace null separator, default is an
* invalid-scalar denoting that rows containing null separator will result in null string in
* the corresponding output rows.
* @param string_narep String that should be used to replace null strings in any non-null list row,
* default is an invalid-scalar denoting that list rows containing null strings will result
* in null string in the corresponding output rows.
* @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid.
* @param empty_list_policy if set to EMPTY_STRING, any input row that is an empty list will
* @param lists_strings_column Column containing lists of strings to concatenate
* @param separators Strings column that provides separators for concatenation
* @param separator_narep String that should be used to replace a null separator.
* Default is an invalid-scalar denoting that rows containing null separator will result in
* a null string in the corresponding output rows.
* @param string_narep String to replace null strings in any non-null list row.
* Default is an invalid-scalar denoting that list rows containing null strings will result
* in a null string in the corresponding output rows.
* @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid
* @param empty_list_policy If set to EMPTY_STRING, any input row that is an empty list will
* result in an empty string. Otherwise, it will result in a null.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New strings column with concatenated results.
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New strings column with concatenated results
*/
std::unique_ptr<column> join_list_elements(
lists_column_view const& lists_strings_column,
Expand All @@ -264,6 +269,7 @@ std::unique_ptr<column> join_list_elements(
string_scalar const& string_narep = string_scalar("", false),
separator_on_nulls separate_nulls = separator_on_nulls::YES,
output_if_empty_list empty_list_policy = output_if_empty_list::EMPTY_STRING,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand Down Expand Up @@ -303,24 +309,26 @@ std::unique_ptr<column> join_list_elements(
* @throw cudf::logic_error if input column is not lists of strings column.
* @throw cudf::logic_error if separator is not valid.
*
* @param lists_strings_column Column containing lists of strings to concatenate.
* @param separator String that should inserted between strings of each list row, default is an
* empty string.
* @param narep String that should be used to replace null strings in any non-null list row, default
* is an invalid-scalar denoting that list rows containing null strings will result in null
* string in the corresponding output rows.
* @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid.
* @param empty_list_policy if set to EMPTY_STRING, any input row that is an empty list will result
* @param lists_strings_column Column containing lists of strings to concatenate
* @param separator String to insert between strings of each list row.
* Default is an empty string.
* @param narep String to replace null strings in any non-null list row.
* Default is an invalid-scalar denoting that list rows containing null strings will result
* in a null string in the corresponding output rows.
* @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid
* @param empty_list_policy If set to EMPTY_STRING, any input row that is an empty list will result
* in an empty string. Otherwise, it will result in a null.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New strings column with concatenated results.
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New strings column with concatenated results
*/
std::unique_ptr<column> join_list_elements(
lists_column_view const& lists_strings_column,
string_scalar const& separator = string_scalar(""),
string_scalar const& narep = string_scalar("", false),
separator_on_nulls separate_nulls = separator_on_nulls::YES,
output_if_empty_list empty_list_policy = output_if_empty_list::EMPTY_STRING,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of doxygen group
Expand Down
8 changes: 7 additions & 1 deletion cpp/include/cudf/strings/repeat_strings.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,14 @@ namespace strings {
*
* @param input The scalar containing the string to repeat
* @param repeat_times The number of times the input string is repeated
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned string scalar
* @return New string scalar in which the input string is repeated
*/
std::unique_ptr<string_scalar> repeat_string(
string_scalar const& input,
size_type repeat_times,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -81,12 +83,14 @@ std::unique_ptr<string_scalar> repeat_string(
*
* @param input The column containing strings to repeat
* @param repeat_times The number of times each input string is repeated
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned strings column
* @return New column containing the repeated strings
*/
std::unique_ptr<column> repeat_strings(
strings_column_view const& input,
size_type repeat_times,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand Down Expand Up @@ -115,13 +119,15 @@ std::unique_ptr<column> repeat_strings(
*
* @param input The column containing strings to repeat
* @param repeat_times The column containing numbers of times that the corresponding input strings
* are repeated
* for each row are repeated
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned strings column
* @return New column containing the repeated strings.
*/
std::unique_ptr<column> repeat_strings(
strings_column_view const& input,
column_view const& repeat_times,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of doxygen group
Expand Down
14 changes: 5 additions & 9 deletions cpp/src/strings/combine/concatenate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -267,28 +267,24 @@ std::unique_ptr<column> concatenate(table_view const& strings_columns,
string_scalar const& separator,
string_scalar const& narep,
separator_on_nulls separate_nulls,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::concatenate(
strings_columns, separator, narep, separate_nulls, cudf::get_default_stream(), mr);
return detail::concatenate(strings_columns, separator, narep, separate_nulls, stream, mr);
}

std::unique_ptr<column> concatenate(table_view const& strings_columns,
strings_column_view const& separators,
string_scalar const& separator_narep,
string_scalar const& col_narep,
separator_on_nulls separate_nulls,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::concatenate(strings_columns,
separators,
separator_narep,
col_narep,
separate_nulls,
cudf::get_default_stream(),
mr);
return detail::concatenate(
strings_columns, separators, separator_narep, col_narep, separate_nulls, stream, mr);
}

} // namespace strings
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/strings/combine/join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -180,10 +180,11 @@ std::unique_ptr<column> join_strings(strings_column_view const& input,
std::unique_ptr<column> join_strings(strings_column_view const& strings,
string_scalar const& separator,
string_scalar const& narep,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::join_strings(strings, separator, narep, cudf::get_default_stream(), mr);
return detail::join_strings(strings, separator, narep, stream, mr);
}

} // namespace strings
Expand Down
13 changes: 5 additions & 8 deletions cpp/src/strings/combine/join_list_elements.cu
Original file line number Diff line number Diff line change
Expand Up @@ -301,16 +301,12 @@ std::unique_ptr<column> join_list_elements(lists_column_view const& lists_string
string_scalar const& narep,
separator_on_nulls separate_nulls,
output_if_empty_list empty_list_policy,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::join_list_elements(lists_strings_column,
separator,
narep,
separate_nulls,
empty_list_policy,
cudf::get_default_stream(),
mr);
return detail::join_list_elements(
lists_strings_column, separator, narep, separate_nulls, empty_list_policy, stream, mr);
}

std::unique_ptr<column> join_list_elements(lists_column_view const& lists_strings_column,
Expand All @@ -319,6 +315,7 @@ std::unique_ptr<column> join_list_elements(lists_column_view const& lists_string
string_scalar const& string_narep,
separator_on_nulls separate_nulls,
output_if_empty_list empty_list_policy,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
Expand All @@ -328,7 +325,7 @@ std::unique_ptr<column> join_list_elements(lists_column_view const& lists_string
string_narep,
separate_nulls,
empty_list_policy,
cudf::get_default_stream(),
stream,
mr);
}

Expand Down
11 changes: 7 additions & 4 deletions cpp/src/strings/repeat_strings.cu
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ std::unique_ptr<string_scalar> repeat_string(string_scalar const& input,
return in_ptr[idx % str_size];
});

return std::make_unique<string_scalar>(std::move(buff));
return std::make_unique<string_scalar>(std::move(buff), true, stream, mr);
}

namespace {
Expand Down Expand Up @@ -260,26 +260,29 @@ std::unique_ptr<column> repeat_strings(strings_column_view const& input,

std::unique_ptr<string_scalar> repeat_string(string_scalar const& input,
size_type repeat_times,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::repeat_string(input, repeat_times, cudf::get_default_stream(), mr);
return detail::repeat_string(input, repeat_times, stream, mr);
}

std::unique_ptr<column> repeat_strings(strings_column_view const& input,
size_type repeat_times,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::repeat_strings(input, repeat_times, cudf::get_default_stream(), mr);
return detail::repeat_strings(input, repeat_times, stream, mr);
}

std::unique_ptr<column> repeat_strings(strings_column_view const& input,
column_view const& repeat_times,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::repeat_strings(input, repeat_times, cudf::get_default_stream(), mr);
return detail::repeat_strings(input, repeat_times, stream, mr);
}

} // namespace strings
Expand Down
1 change: 1 addition & 0 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,7 @@ ConfigureTest(STREAM_DICTIONARY_TEST streams/dictionary_test.cpp STREAM_MODE tes
ConfigureTest(
STREAM_STRINGS_TEST
streams/strings/case_test.cpp
streams/strings/combine_test.cpp
streams/strings/convert_test.cpp
streams/strings/find_test.cpp
streams/strings/replace_test.cpp
Expand Down
Loading