Skip to content

Commit

Permalink
Rework cudf::strings::from_booleans to use make_strings_children (#12365
Browse files Browse the repository at this point in the history
)

Issue #12167
 
Use `make_strings_children` in `cudf::strings::from_booleans` to simplify code and make the function more robust.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Divye Gala (https://github.com/divyegala)

URL: #12365
  • Loading branch information
vuule authored Dec 20, 2022
1 parent 450d684 commit 7b10af9
Showing 1 changed file with 31 additions and 26 deletions.
57 changes: 31 additions & 26 deletions cpp/src/strings/convert/convert_booleans.cu
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,32 @@ std::unique_ptr<column> to_booleans(strings_column_view const& strings,
}

namespace detail {

namespace {
struct from_booleans_fn {
column_device_view const d_column;
string_view d_true;
string_view d_false;
offset_type* d_offsets{};
char* d_chars{};

__device__ void operator()(size_type idx) const
{
if (d_column.is_null(idx)) {
if (d_chars == nullptr) d_offsets[idx] = 0;
return;
}

if (d_chars != nullptr) {
auto const result = d_column.element<bool>(idx) ? d_true : d_false;
memcpy(d_chars + d_offsets[idx], result.data(), result.size_bytes());
} else {
d_offsets[idx] = d_column.element<bool>(idx) ? d_true.size_bytes() : d_false.size_bytes();
}
};
};
} // namespace

// Convert boolean column to strings column
std::unique_ptr<column> from_booleans(column_view const& booleans,
string_scalar const& true_string,
Expand All @@ -113,34 +139,13 @@ std::unique_ptr<column> from_booleans(column_view const& booleans,

// copy null mask
rmm::device_buffer null_mask = cudf::detail::copy_bitmask(booleans, stream, mr);
// build offsets column
auto offsets_transformer_itr = cudf::detail::make_counting_transform_iterator(
0, [d_column, d_true, d_false] __device__(size_type idx) {
if (d_column.is_null(idx)) return 0;
return d_column.element<bool>(idx) ? d_true.size_bytes() : d_false.size_bytes();
});
auto offsets_column = make_offsets_child_column(
offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr);
auto offsets_view = offsets_column->view();
auto d_offsets = offsets_view.data<int32_t>();

// build chars column
auto const bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column = create_chars_child_column(bytes, stream, mr);
auto d_chars = chars_column->mutable_view().data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
strings_count,
[d_column, d_true, d_false, d_offsets, d_chars] __device__(size_type idx) {
if (d_column.is_null(idx)) return;
string_view result = (d_column.element<bool>(idx) ? d_true : d_false);
memcpy(d_chars + d_offsets[idx], result.data(), result.size_bytes());
});

auto children =
make_strings_children(from_booleans_fn{d_column, d_true, d_false}, strings_count, stream, mr);

return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column),
std::move(children.first),
std::move(children.second),
booleans.null_count(),
std::move(null_mask));
}
Expand Down

0 comments on commit 7b10af9

Please sign in to comment.