Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove unneeded calls to create_chars_child_column utility #14997

Merged
merged 14 commits into from
Feb 16, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 13 additions & 13 deletions cpp/include/cudf/strings/detail/gather.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -222,19 +222,19 @@ CUDF_KERNEL void gather_chars_fn_char_parallel(StringIterator strings_begin,
* @return New chars column fit for a strings column.
*/
template <typename StringIterator, typename MapIterator>
std::unique_ptr<cudf::column> gather_chars(StringIterator strings_begin,
MapIterator map_begin,
MapIterator map_end,
cudf::detail::input_offsetalator const offsets,
size_type chars_bytes,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
rmm::device_uvector<char> gather_chars(StringIterator strings_begin,
MapIterator map_begin,
MapIterator map_end,
cudf::detail::input_offsetalator const offsets,
size_type chars_bytes,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto const output_count = std::distance(map_begin, map_end);
if (output_count == 0) return make_empty_column(type_id::INT8);
if (output_count == 0) return rmm::device_uvector<char>(0, stream, mr);

auto chars_column = create_chars_child_column(chars_bytes, stream, mr);
auto const d_chars = chars_column->mutable_view().template data<char>();
auto chars_data = rmm::device_uvector<char>(chars_bytes, stream, mr);
auto d_chars = chars_data.data();

constexpr int warps_per_threadblock = 4;
// String parallel strategy will be used if average string length is above this threshold.
Expand All @@ -260,7 +260,7 @@ std::unique_ptr<cudf::column> gather_chars(StringIterator strings_begin,
stream.value()>>>(strings_begin, d_chars, offsets, map_begin, output_count);
}

return chars_column;
return chars_data;
}

/**
Expand Down Expand Up @@ -316,12 +316,12 @@ std::unique_ptr<cudf::column> gather(strings_column_view const& strings,
// build chars column
auto const offsets_view =
cudf::detail::offsetalator_factory::make_input_iterator(out_offsets_column->view());
auto out_chars_column = gather_chars(
auto out_chars_data = gather_chars(
d_strings->begin<string_view>(), begin, end, offsets_view, total_bytes, stream, mr);

return make_strings_column(output_count,
std::move(out_offsets_column),
std::move(out_chars_column->release().data.release()[0]),
out_chars_data.release(),
0, // caller sets these
rmm::device_buffer{});
}
Expand Down
72 changes: 35 additions & 37 deletions cpp/include/cudf/strings/detail/strings_column_factories.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -98,46 +98,44 @@ std::unique_ptr<column> make_strings_column(IndexPairIterator begin,
(null_count > 0) ? std::move(new_nulls.first) : rmm::device_buffer{0, stream, mr};

// build chars column
std::unique_ptr<column> chars_column =
[offsets_view, bytes = bytes, begin, strings_count, null_count, stream, mr] {
auto const avg_bytes_per_row = bytes / std::max(strings_count - null_count, 1);
// use a character-parallel kernel for long string lengths
if (avg_bytes_per_row > FACTORY_BYTES_PER_ROW_THRESHOLD) {
auto const d_offsets =
cudf::detail::offsetalator_factory::make_input_iterator(offsets_view);
auto const str_begin = thrust::make_transform_iterator(
begin, cuda::proclaim_return_type<string_view>([] __device__(auto ip) {
return string_view{ip.first, ip.second};
}));

return gather_chars(str_begin,
thrust::make_counting_iterator<size_type>(0),
thrust::make_counting_iterator<size_type>(strings_count),
d_offsets,
bytes,
stream,
mr);
} else {
// this approach is 2-3x faster for a large number of smaller string lengths
auto chars_column = create_chars_child_column(bytes, stream, mr);
auto d_chars = chars_column->mutable_view().template data<char>();
auto copy_chars = [d_chars] __device__(auto item) {
string_index_pair const str = thrust::get<0>(item);
size_type const offset = thrust::get<1>(item);
if (str.first != nullptr) memcpy(d_chars + offset, str.first, str.second);
};
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_zip_iterator(
thrust::make_tuple(begin, offsets_view.template begin<int32_t>())),
strings_count,
copy_chars);
return chars_column;
}
}();
auto chars_data = [offsets_view, bytes = bytes, begin, strings_count, null_count, stream, mr] {
auto const avg_bytes_per_row = bytes / std::max(strings_count - null_count, 1);
// use a character-parallel kernel for long string lengths
if (avg_bytes_per_row > FACTORY_BYTES_PER_ROW_THRESHOLD) {
auto const d_offsets = cudf::detail::offsetalator_factory::make_input_iterator(offsets_view);
auto const str_begin = thrust::make_transform_iterator(
begin, cuda::proclaim_return_type<string_view>([] __device__(auto ip) {
return string_view{ip.first, ip.second};
}));

return gather_chars(str_begin,
thrust::make_counting_iterator<size_type>(0),
thrust::make_counting_iterator<size_type>(strings_count),
d_offsets,
bytes,
stream,
mr);
} else {
// this approach is 2-3x faster for a large number of smaller string lengths
auto chars_data = rmm::device_uvector<char>(bytes, stream, mr);
auto d_chars = chars_data.data();
auto copy_chars = [d_chars] __device__(auto item) {
string_index_pair const str = thrust::get<0>(item);
size_type const offset = thrust::get<1>(item);
if (str.first != nullptr) memcpy(d_chars + offset, str.first, str.second);
};
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_zip_iterator(
thrust::make_tuple(begin, offsets_view.template begin<int32_t>())),
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
strings_count,
copy_chars);
return chars_data;
}
}();

return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column->release().data.release()[0]),
chars_data.release(),
null_count,
std::move(null_mask));
}
Expand Down
24 changes: 12 additions & 12 deletions cpp/src/io/csv/durations.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/offsets_iterator_factory.cuh>
#include <cudf/strings/detail/convert/int_to_string.cuh>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/strings/detail/utilities.cuh>
Expand Down Expand Up @@ -88,12 +89,12 @@ struct duration_to_string_size_fn {

template <typename T>
struct duration_to_string_fn : public duration_to_string_size_fn<T> {
int32_t const* d_offsets;
cudf::detail::input_offsetalator d_offsets;
char* d_chars;
using duration_to_string_size_fn<T>::d_durations;

duration_to_string_fn(column_device_view const d_durations,
int32_t const* d_offsets,
cudf::detail::input_offsetalator d_offsets,
char* d_chars)
: duration_to_string_size_fn<T>{d_durations}, d_offsets(d_offsets), d_chars(d_chars)
{
Expand Down Expand Up @@ -181,28 +182,27 @@ struct dispatch_from_durations_fn {

// copy null mask
rmm::device_buffer null_mask = cudf::detail::copy_bitmask(durations, stream, mr);

// build offsets column
auto offsets_transformer_itr = thrust::make_transform_iterator(
thrust::make_counting_iterator<int32_t>(0), duration_to_string_size_fn<T>{d_column});
auto [offsets_column, chars_bytes] = cudf::detail::make_offsets_child_column(
auto offsets_transformer_itr =
cudf::detail::make_counting_transform_iterator(0, duration_to_string_size_fn<T>{d_column});
auto [offsets_column, chars_bytes] = cudf::strings::detail::make_offsets_child_column(
offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr);
auto offsets_view = offsets_column->view();
auto d_new_offsets = offsets_view.template data<int32_t>();
auto d_new_offsets =
cudf::detail::offsetalator_factory::make_input_iterator(offsets_column->view());

// build chars column
auto chars_column = strings::detail::create_chars_child_column(chars_bytes, stream, mr);
auto chars_view = chars_column->mutable_view();
auto d_chars = chars_view.template data<char>();
auto chars_data = rmm::device_uvector<char>(chars_bytes, stream, mr);
auto d_chars = chars_data.data();

thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
strings_count,
duration_to_string_fn<T>{d_column, d_new_offsets, d_chars});

//
return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column->release().data.release()[0]),
chars_data.release(),
durations.null_count(),
std::move(null_mask));
}
Expand Down
Loading