From 5ce95f05eeae469f4d46516b3cf6fe19902623f6 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Thu, 30 May 2024 09:24:58 -0400 Subject: [PATCH] Update interleave lists column for large strings (#15877) Fixes the `compute_string_sizes_and_interleave_lists_fn` functor to use `column_device_view::element()` method to access string row contents instead of using the strings offsets. This removes the need to add specific offsetalator logic to the logic. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Muhammad Haseeb (https://github.com/mhaseeb123) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/15877 --- cpp/src/lists/interleave_columns.cu | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/cpp/src/lists/interleave_columns.cu b/cpp/src/lists/interleave_columns.cu index be8fad62412..45ae3671d4e 100644 --- a/cpp/src/lists/interleave_columns.cu +++ b/cpp/src/lists/interleave_columns.cu @@ -166,8 +166,6 @@ struct compute_string_sizes_and_interleave_lists_fn { lists_col.child(lists_column_view::offsets_column_index).template data() + lists_col.offset(); auto const& str_col = lists_col.child(lists_column_view::child_column_index); - auto const str_offsets = - str_col.child(strings_column_view::offsets_column_index).template data(); // The range of indices of the strings within the source list. auto const start_str_idx = list_offsets[list_id]; @@ -181,13 +179,13 @@ struct compute_string_sizes_and_interleave_lists_fn { size_type write_idx = dst_list_offsets[idx]; for (auto read_idx = start_str_idx; read_idx < end_str_idx; ++read_idx, ++write_idx) { - auto const offset = str_offsets[read_idx]; - auto const size = str_offsets[read_idx + 1] - offset; - string_index_pair result = {nullptr, size}; - if (str_col.is_valid(read_idx)) { - result.first = size > 0 ? str_col.template head() + offset : ""; + if (str_col.is_null(read_idx)) { + indices[write_idx] = string_index_pair{nullptr, 0}; + continue; } - indices[write_idx] = result; + auto const d_str = str_col.element(read_idx); + indices[write_idx] = d_str.empty() ? string_index_pair{"", 0} + : string_index_pair{d_str.data(), d_str.size_bytes()}; } } };