From 7d5561a8c0aeb8531913d7767faca55a5ab31fa5 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Mon, 3 Jun 2024 15:29:39 -0400 Subject: [PATCH] Fix debug assert in rowgroup_char_counts_kernel (#15902) Fixes assert triggered by `OrcWriterTest.EmptyChildStringColumn` in a Debug build. ``` $ gtests/ORC_TEST --gtest_filter=OrcWriterTest.EmptyChildStringColumn Note: Google Test filter = OrcWriterTest.EmptyChildStringColumn [==========] Running 1 test from 1 test suite. [----------] Global test environment set-up. [----------] 1 test from OrcWriterTest [ RUN ] OrcWriterTest.EmptyChildStringColumn /cudf/cpp/include/cudf/detail/offsets_iterator.cuh:79: cudf::detail::input_offsetalator::input_offsetalator(const void *, cudf::data_type, int): block: [0,0,0], thread: [0,0,0] Assertion `(dtype.id() == type_id::INT32 || dtype.id() == type_id::INT64) && "Unexpected offsets type"` failed. CUDA Error detected. cudaErrorAssert device-side assert triggered ORC_TEST: /conda/envs/rapids/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp:248: void rmm::mr::detail::stream_ordered_memory_resource::do_deallocate(void*, std::size_t, rmm::cuda_stream_view) [with PoolResource = rmm::mr::pool_memory_resource; FreeListType = rmm::mr::detail::coalescing_free_list; std::size_t = long unsigned int]: Assertion `status__ == cudaSuccess' failed. Aborted (core dumped) ``` Error introduced in #15891 where offsetalator wraps an offsets column in the `cudf::io::orc::gpu::rowgroup_char_counts_kernel`. But when `num_rows==0` the offsets column is `EMPTY` causing the assert to trigger. Checking the `num_rows` before accessing the offsets column fixes the issue. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - MithunR (https://github.com/mythrocks) URL: https://github.com/rapidsai/cudf/pull/15902 --- cpp/src/io/orc/dict_enc.cu | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/cpp/src/io/orc/dict_enc.cu b/cpp/src/io/orc/dict_enc.cu index 5181c4a1c0e..5be75350951 100644 --- a/cpp/src/io/orc/dict_enc.cu +++ b/cpp/src/io/orc/dict_enc.cu @@ -44,12 +44,13 @@ CUDF_KERNEL void rowgroup_char_counts_kernel(device_2dspan char_count auto const start_row = rowgroup_bounds[row_group_idx][col_idx].begin + str_col.offset(); auto const num_rows = rowgroup_bounds[row_group_idx][col_idx].size(); - auto const& offsets = str_col.child(strings_column_view::offsets_column_index); - auto const offsets_itr = cudf::detail::input_offsetalator(offsets.head(), offsets.type()); - char_counts[str_col_idx][row_group_idx] = - (num_rows == 0) - ? 0 - : static_cast(offsets_itr[start_row + num_rows] - offsets_itr[start_row]); + size_type char_count = 0; + if (num_rows > 0) { + auto const& offsets = str_col.child(strings_column_view::offsets_column_index); + auto const offsets_itr = cudf::detail::input_offsetalator(offsets.head(), offsets.type()); + char_count = static_cast(offsets_itr[start_row + num_rows] - offsets_itr[start_row]); + } + char_counts[str_col_idx][row_group_idx] = char_count; } void rowgroup_char_counts(device_2dspan counts,