Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add experimental make_strings_children utility #15363

Merged
merged 39 commits into from
Apr 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
4a4fc68
Add experimental make-strings-children utility
davidwendt Mar 21, 2024
a02d47d
Merge branch 'branch-24.06' into exp-make-strings-children
davidwendt Mar 21, 2024
4847d26
Merge branch 'branch-24.06' into exp-make-strings-children
davidwendt Mar 26, 2024
f8dc360
Merge branch 'branch-24.06' into exp-make-strings-children
davidwendt Apr 1, 2024
80dd5b8
Merge branch 'branch-24.06' into exp-make-strings-children
davidwendt Apr 3, 2024
961bd8c
Merge branch 'branch-24.06' into exp-make-strings-children
davidwendt Apr 4, 2024
d33a99e
Merge branch 'branch-24.06' into exp-make-strings-children
davidwendt Apr 8, 2024
a835164
Merge branch 'branch-24.06' into exp-make-strings-children
davidwendt Apr 9, 2024
2238859
Merge branch 'branch-24.06' into exp-make-strings-children
davidwendt Apr 10, 2024
7dc2f24
fix doxygen and other comments
davidwendt Apr 10, 2024
55eed68
change rows to threads in doxygen
davidwendt Apr 10, 2024
2891538
Merge branch 'branch-24.06' into exp-make-strings-children
davidwendt Apr 10, 2024
7b1d10d
Merge branch 'exp-make-strings-children' of github.com:davidwendt/cud…
davidwendt Apr 10, 2024
85bdb36
Merge branch 'branch-24.06' into exp-make-strings-children
davidwendt Apr 10, 2024
7454786
Merge branch 'branch-24.06' into exp-make-strings-children
davidwendt Apr 10, 2024
f5fa8a9
Merge branch 'branch-24.06' into exp-make-strings-children
davidwendt Apr 11, 2024
e17f8ae
Merge branch 'branch-24.06' into exp-make-strings-children
davidwendt Apr 12, 2024
d3d297d
add example to exp doxygen
davidwendt Apr 12, 2024
abbfe6c
Merge branch 'exp-make-strings-children' of github.com:davidwendt/cud…
davidwendt Apr 12, 2024
597a77d
Merge branch 'branch-24.06' into exp-make-strings-children
davidwendt Apr 12, 2024
7f5010e
Merge branch 'branch-24.06' into exp-make-strings-children
davidwendt Apr 15, 2024
06e86ed
Merge branch 'branch-24.06' into exp-make-strings-children
davidwendt Apr 16, 2024
f65b5ec
Merge branch 'branch-24.06' into exp-make-strings-children
davidwendt Apr 17, 2024
b48a248
fix mr parameter type for new exp utility
davidwendt Apr 17, 2024
5333822
Merge branch 'exp-make-strings-children' of github.com:davidwendt/cud…
davidwendt Apr 18, 2024
90b18f8
Merge branch 'branch-24.06' into exp-make-strings-children
davidwendt Apr 18, 2024
85745cc
fix includes for strings_children_ex.cuh
davidwendt Apr 18, 2024
f363b96
Merge branch 'branch-24.06' into exp-make-strings-children
davidwendt Apr 18, 2024
06069e9
change ternary to std::max()
davidwendt Apr 18, 2024
0884fd2
Merge branch 'branch-24.06' into exp-make-strings-children
davidwendt Apr 18, 2024
b2a780b
Merge branch 'branch-24.06' into exp-make-strings-children
davidwendt Apr 19, 2024
17228b8
rework functor calling interface
davidwendt Apr 19, 2024
514acdd
Merge branch 'exp-make-strings-children' of github.com:davidwendt/cud…
davidwendt Apr 19, 2024
9d6abfb
Merge branch 'branch-24.06' into exp-make-strings-children
davidwendt Apr 19, 2024
c852950
Merge branch 'branch-24.06' into exp-make-strings-children
davidwendt Apr 19, 2024
c0a80c8
Merge branch 'branch-24.06' into exp-make-strings-children
davidwendt Apr 22, 2024
b066917
undo member to parameter change
davidwendt Apr 22, 2024
7246c46
Merge branch 'branch-24.06' into exp-make-strings-children
davidwendt Apr 22, 2024
a285ebd
fix kernel decl and doxygen
davidwendt Apr 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
186 changes: 186 additions & 0 deletions cpp/include/cudf/strings/detail/strings_children_ex.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cudf/column/column.hpp>
#include <cudf/detail/offsets_iterator_factory.cuh>
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/strings/detail/strings_children.cuh>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/for_each.h>
#include <thrust/iterator/counting_iterator.h>

namespace cudf {
namespace strings {
namespace detail {
namespace experimental {

/**
* @brief Kernel used by make_strings_children for calling the given functor
*
* @tparam SizeAndExecuteFunction Functor type to call in each thread
*
* @param fn Functor to call in each thread
* @param exec_size Total number of threads to be processed by this kernel
*/
template <typename SizeAndExecuteFunction>
CUDF_KERNEL void strings_children_kernel(SizeAndExecuteFunction fn, size_type exec_size)
{
auto tid = cudf::detail::grid_1d::global_thread_id();
if (tid < exec_size) { fn(tid); }
}

/**
* @brief Creates child offsets and chars data by applying the template function that
* can be used for computing the output size of each string as well as create the output
*
* The `size_and_exec_fn` is expected declare an operator() function with a size_type parameter
* and 3 member variables:
* - `d_sizes`: output size in bytes of each output row for the 1st pass call
* - `d_chars`: output buffer for new string data for the 2nd pass call
* - `d_offsets`: used for addressing the specific output row data in `d_chars`
*
* The 1st pass call computes the output sizes and is identified by `d_chars==nullptr`.
* Null rows should be set with an output size of 0.
*
* @code{.cpp}
* struct size_and_exec_fn {
* size_type* d_sizes;
* char* d_chars;
* input_offsetalator d_offsets;
*
* __device__ void operator()(size_type thread_idx)
* {
* // functor-specific logic to resolve out_idx from thread_idx
* if( !d_chars ) {
* d_sizes[out_idx] = output_size;
* } else {
* auto d_output = d_chars + d_offsets[out_idx];
* // write characters to d_output
* }
* }
* };
* @endcode
*
* @tparam SizeAndExecuteFunction Functor type with an operator() function accepting
* an index parameter and three member variables: `size_type* d_sizes`
* `char* d_chars`, and `input_offsetalator d_offsets`.
*
* @param size_and_exec_fn This is called twice. Once for the output size of each string
* and once again to fill in the memory pointed to by d_chars.
* @param exec_size Number of threads for executing the `size_and_exec_fn` function
karthikeyann marked this conversation as resolved.
Show resolved Hide resolved
* @param strings_count Number of strings
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned columns' device memory
* @return Offsets child column and chars vector for creating a strings column
*/
template <typename SizeAndExecuteFunction>
auto make_strings_children(SizeAndExecuteFunction size_and_exec_fn,
size_type exec_size,
size_type strings_count,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
// This is called twice -- once for computing sizes and once for writing chars.
// Reducing the number of places size_and_exec_fn is inlined speeds up compile time.
karthikeyann marked this conversation as resolved.
Show resolved Hide resolved
auto for_each_fn = [exec_size, stream](SizeAndExecuteFunction& size_and_exec_fn) {
auto constexpr block_size = 256;
auto grid = cudf::detail::grid_1d{exec_size, block_size};
strings_children_kernel<<<grid.num_blocks, block_size, 0, stream.value()>>>(size_and_exec_fn,
exec_size);
};

// Compute the output sizes
auto output_sizes = rmm::device_uvector<size_type>(strings_count, stream);
size_and_exec_fn.d_sizes = output_sizes.data();
size_and_exec_fn.d_chars = nullptr;
for_each_fn(size_and_exec_fn);

// Convert the sizes to offsets
auto [offsets_column, bytes] = cudf::strings::detail::make_offsets_child_column(
output_sizes.begin(), output_sizes.end(), stream, mr);
size_and_exec_fn.d_offsets =
cudf::detail::offsetalator_factory::make_input_iterator(offsets_column->view());

// Now build the chars column
rmm::device_uvector<char> chars(bytes, stream, mr);
size_and_exec_fn.d_chars = chars.data();

// Execute the function fn again to fill in the chars data.
if (bytes > 0) { for_each_fn(size_and_exec_fn); }

return std::pair(std::move(offsets_column), std::move(chars));
}

/**
* @brief Creates child offsets and chars columns by applying the template function that
* can be used for computing the output size of each string as well as create the output
*
* The `size_and_exec_fn` is expected declare an operator() function with a size_type parameter
* and 3 member variables:
* - `d_sizes`: output size in bytes of each output row for the 1st pass call
* - `d_chars`: output buffer for new string data for the 2nd pass call
* - `d_offsets`: used for addressing the specific output row data in `d_chars`
*
* The 1st pass call computes the output sizes and is identified by `d_chars==nullptr`.
* Null rows should be set with an output size of 0.
*
* @code{.cpp}
* struct size_and_exec_fn {
* size_type* d_sizes;
* char* d_chars;
* input_offsetalator d_offsets;
*
* __device__ void operator()(size_type idx)
* {
* if( !d_chars ) {
* d_sizes[idx] = output_size;
* } else {
* auto d_output = d_chars + d_offsets[idx];
* // write characters to d_output
* }
* }
* };
* @endcode
*
* @tparam SizeAndExecuteFunction Functor type with an operator() function accepting
* an index parameter and three member variables: `size_type* d_sizes`
* `char* d_chars`, and `input_offsetalator d_offsets`.
*
* @param size_and_exec_fn This is called twice. Once for the output size of each string
* and once again to fill in the memory pointed to by `d_chars`.
* @param strings_count Number of strings
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned columns' device memory
* @return Offsets child column and chars vector for creating a strings column
*/
template <typename SizeAndExecuteFunction>
auto make_strings_children(SizeAndExecuteFunction size_and_exec_fn,
size_type strings_count,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return make_strings_children(size_and_exec_fn, strings_count, strings_count, stream, mr);
}

} // namespace experimental
} // namespace detail
} // namespace strings
} // namespace cudf
21 changes: 12 additions & 9 deletions cpp/src/strings/combine/concatenate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@
#include <cudf/column/column_device_view.cuh>
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/offsets_iterator.cuh>
#include <cudf/detail/valid_if.cuh>
#include <cudf/scalar/scalar_device_view.cuh>
#include <cudf/strings/combine.hpp>
#include <cudf/strings/detail/combine.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/strings/detail/strings_children_ex.cuh>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/strings/string_view.cuh>
#include <cudf/strings/strings_column_view.hpp>
Expand Down Expand Up @@ -49,8 +50,9 @@ struct concat_strings_base {
table_device_view const d_table;
string_scalar_device_view const d_narep;
separator_on_nulls separate_nulls;
size_type* d_offsets{};
char* d_chars{};
size_type* d_sizes;
char* d_chars;
cudf::detail::input_offsetalator d_offsets;

/**
* @brief Concatenate each table row to a single output string.
Expand All @@ -68,7 +70,7 @@ struct concat_strings_base {
thrust::any_of(thrust::seq, d_table.begin(), d_table.end(), [idx](auto const& col) {
return col.is_null(idx);
})) {
if (!d_chars) d_offsets[idx] = 0;
if (!d_chars) { d_sizes[idx] = 0; }
return;
}

Expand All @@ -95,7 +97,7 @@ struct concat_strings_base {
write_separator || (separate_nulls == separator_on_nulls::YES) || !null_element;
}

if (!d_chars) d_offsets[idx] = bytes;
if (!d_chars) { d_sizes[idx] = bytes; }
}
};

Expand All @@ -113,7 +115,7 @@ struct concat_strings_fn : concat_strings_base {
{
}

__device__ void operator()(size_type idx) { process_row(idx, d_separator); }
__device__ void operator()(std::size_t idx) { process_row(idx, d_separator); }
};

} // namespace
Expand Down Expand Up @@ -143,7 +145,7 @@ std::unique_ptr<column> concatenate(table_view const& strings_columns,
// Create device views from the strings columns.
auto d_table = table_device_view::create(strings_columns, stream);
concat_strings_fn fn{*d_table, d_separator, d_narep, separate_nulls};
auto [offsets_column, chars] = make_strings_children(fn, strings_count, stream, mr);
auto [offsets_column, chars] = experimental::make_strings_children(fn, strings_count, stream, mr);

// create resulting null mask
auto [null_mask, null_count] = cudf::detail::valid_if(
Expand Down Expand Up @@ -188,7 +190,7 @@ struct multi_separator_concat_fn : concat_strings_base {
__device__ void operator()(size_type idx)
{
if (d_separators.is_null(idx) && !d_separator_narep.is_valid()) {
if (!d_chars) d_offsets[idx] = 0;
if (!d_chars) { d_sizes[idx] = 0; }
return;
}

Expand Down Expand Up @@ -235,7 +237,8 @@ std::unique_ptr<column> concatenate(table_view const& strings_columns,

multi_separator_concat_fn mscf{
*d_table, separator_col_view, separator_rep, col_rep, separate_nulls};
auto [offsets_column, chars] = make_strings_children(mscf, strings_count, stream, mr);
auto [offsets_column, chars] =
experimental::make_strings_children(mscf, strings_count, stream, mr);

// Create resulting null mask
auto [null_mask, null_count] = cudf::detail::valid_if(
Expand Down
58 changes: 34 additions & 24 deletions cpp/src/strings/repeat_strings.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
#include <cudf/detail/indexalator.cuh>
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/detail/offsets_iterator.cuh>
#include <cudf/strings/detail/strings_children_ex.cuh>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/strings/repeat_strings.hpp>
#include <cudf/strings/strings_column_view.hpp>
Expand Down Expand Up @@ -107,22 +108,26 @@ struct compute_size_and_repeat_fn {
column_device_view const strings_dv;
size_type const repeat_times;
bool const has_nulls;

size_type* d_offsets{nullptr};

// If d_chars == nullptr: only compute sizes of the output strings.
// If d_chars != nullptr: only repeat strings.
char* d_chars{nullptr};

// `idx` will be in the range of [0, repeat_times * strings_count).
__device__ void operator()(size_type const idx) const noexcept
size_type* d_sizes;
char* d_chars;
cudf::detail::input_offsetalator d_offsets;

/**
* @brief Called by make_strings_children to build output
*
* @param idx Thread index in the range [0,repeat_times * strings_count)
* @param d_sizes Return output size here in 1st call (d_chars==nullptr)
* @param d_chars Write output here in 2nd call
* @param d_offsets Offsets to address output row within d_chars
*/
__device__ void operator()(size_type idx) const noexcept
{
auto const str_idx = idx / repeat_times; // value cycles in [0, string_count)
auto const repeat_idx = idx % repeat_times; // value cycles in [0, repeat_times)
auto const is_valid = !has_nulls || strings_dv.is_valid_nocheck(str_idx);

if (!d_chars && repeat_idx == 0) {
d_offsets[str_idx] =
d_sizes[str_idx] =
is_valid ? repeat_times * strings_dv.element<string_view>(str_idx).size_bytes() : 0;
}

Expand Down Expand Up @@ -161,8 +166,8 @@ std::unique_ptr<column> repeat_strings(strings_column_view const& input,
auto const strings_dv_ptr = column_device_view::create(input.parent(), stream);
auto const fn = compute_size_and_repeat_fn{*strings_dv_ptr, repeat_times, input.has_nulls()};

auto [offsets_column, chars] =
make_strings_children(fn, strings_count * repeat_times, strings_count, stream, mr);
auto [offsets_column, chars] = experimental::make_strings_children(
fn, strings_count * repeat_times, strings_count, stream, mr);
return make_strings_column(strings_count,
std::move(offsets_column),
chars.release(),
Expand All @@ -182,22 +187,27 @@ struct compute_sizes_and_repeat_fn {
Iterator const repeat_times_iter;
bool const strings_has_nulls;
bool const rtimes_has_nulls;

size_type* d_offsets{nullptr};

// If d_chars == nullptr: only compute sizes of the output strings.
// If d_chars != nullptr: only repeat strings.
char* d_chars{nullptr};

__device__ void operator()(size_type const idx) const noexcept
size_type* d_sizes;
char* d_chars;
cudf::detail::input_offsetalator d_offsets;

/**
* @brief Called by make_strings_children to build output
*
* @param idx Row index
* @param d_sizes Return output size here in 1st call (d_chars==nullptr)
* @param d_chars Write output here in 2nd call
* @param d_offsets Offsets to address output row within d_chars
*/
__device__ void operator()(size_type idx) const noexcept
{
auto const string_is_valid = !strings_has_nulls || strings_dv.is_valid_nocheck(idx);
auto const rtimes_is_valid = !rtimes_has_nulls || repeat_times_dv.is_valid_nocheck(idx);

// Any null input (either string or repeat_times value) will result in a null output.
auto const is_valid = string_is_valid && rtimes_is_valid;
if (!is_valid) {
if (!d_chars) { d_offsets[idx] = 0; }
if (!d_chars) { d_sizes[idx] = 0; }
return;
}

Expand All @@ -206,7 +216,7 @@ struct compute_sizes_and_repeat_fn {

if (!d_chars) {
// repeat_times could be negative
d_offsets[idx] = (repeat_times > 0) ? (repeat_times * d_str.size_bytes()) : 0;
d_sizes[idx] = std::max(repeat_times, 0) * d_str.size_bytes();
} else {
auto output_ptr = d_chars + d_offsets[idx];
while (repeat_times-- > 0) {
Expand Down Expand Up @@ -241,7 +251,7 @@ std::unique_ptr<column> repeat_strings(strings_column_view const& input,
input.has_nulls(),
repeat_times.has_nulls()};

auto [offsets_column, chars] = make_strings_children(fn, strings_count, stream, mr);
auto [offsets_column, chars] = experimental::make_strings_children(fn, strings_count, stream, mr);

// We generate new bitmask by AND of the two input columns' bitmasks.
// Note that if either of the input columns are nullable, the output column will also be nullable
Expand Down
Loading