Skip to content

Commit

Permalink
Update interleave lists column for large strings (#15877)
Browse files Browse the repository at this point in the history
Fixes the `compute_string_sizes_and_interleave_lists_fn` functor to use `column_device_view::element<string_view>()` method to access string row contents instead of using the strings offsets. This removes the need to add specific offsetalator logic to the logic.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Bradley Dice (https://github.com/bdice)

URL: #15877
  • Loading branch information
davidwendt authored May 30, 2024
1 parent 3a75f6d commit 5ce95f0
Showing 1 changed file with 6 additions and 8 deletions.
14 changes: 6 additions & 8 deletions cpp/src/lists/interleave_columns.cu
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,6 @@ struct compute_string_sizes_and_interleave_lists_fn {
lists_col.child(lists_column_view::offsets_column_index).template data<size_type>() +
lists_col.offset();
auto const& str_col = lists_col.child(lists_column_view::child_column_index);
auto const str_offsets =
str_col.child(strings_column_view::offsets_column_index).template data<size_type>();

// The range of indices of the strings within the source list.
auto const start_str_idx = list_offsets[list_id];
Expand All @@ -181,13 +179,13 @@ struct compute_string_sizes_and_interleave_lists_fn {
size_type write_idx = dst_list_offsets[idx];

for (auto read_idx = start_str_idx; read_idx < end_str_idx; ++read_idx, ++write_idx) {
auto const offset = str_offsets[read_idx];
auto const size = str_offsets[read_idx + 1] - offset;
string_index_pair result = {nullptr, size};
if (str_col.is_valid(read_idx)) {
result.first = size > 0 ? str_col.template head<char>() + offset : "";
if (str_col.is_null(read_idx)) {
indices[write_idx] = string_index_pair{nullptr, 0};
continue;
}
indices[write_idx] = result;
auto const d_str = str_col.element<string_view>(read_idx);
indices[write_idx] = d_str.empty() ? string_index_pair{"", 0}
: string_index_pair{d_str.data(), d_str.size_bytes()};
}
}
};
Expand Down

0 comments on commit 5ce95f0

Please sign in to comment.