Skip to content

Commit

Permalink
Use offsetalator in orc rowgroup_char_counts_kernel (#15891)
Browse files Browse the repository at this point in the history
Replaces hardcoded `size_type` for accessing strings offsets data with the offsetalator to compute the number of characters in a group in `cudf::io::orc::gpu::rowgroup_char_counts_kernel`

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Yunsong Wang (https://github.com/PointKernel)

URL: #15891
  • Loading branch information
davidwendt authored May 31, 2024
1 parent e7be142 commit 7949a9c
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions cpp/src/io/orc/dict_enc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include "orc_gpu.hpp"

#include <cudf/detail/offsets_iterator.cuh>
#include <cudf/detail/utilities/integer_utils.hpp>
#include <cudf/io/orc_types.hpp>
#include <cudf/table/experimental/row_operators.cuh>
Expand Down Expand Up @@ -43,11 +44,12 @@ CUDF_KERNEL void rowgroup_char_counts_kernel(device_2dspan<size_type> char_count
auto const start_row = rowgroup_bounds[row_group_idx][col_idx].begin + str_col.offset();
auto const num_rows = rowgroup_bounds[row_group_idx][col_idx].size();

auto const& offsets = str_col.child(strings_column_view::offsets_column_index);
auto const& offsets = str_col.child(strings_column_view::offsets_column_index);
auto const offsets_itr = cudf::detail::input_offsetalator(offsets.head(), offsets.type());
char_counts[str_col_idx][row_group_idx] =
(num_rows == 0)
? 0
: offsets.element<size_type>(start_row + num_rows) - offsets.element<size_type>(start_row);
: static_cast<size_type>(offsets_itr[start_row + num_rows] - offsets_itr[start_row]);
}

void rowgroup_char_counts(device_2dspan<size_type> counts,
Expand Down

0 comments on commit 7949a9c

Please sign in to comment.