From 77baf0990d6e453c4564662c1d81deaa4e139809 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Fri, 13 Aug 2021 18:01:35 -0400 Subject: [PATCH] Fix memory read error in get_dremel_data in page_enc.cu (#8995) Reference issue #8883 and depends on PR #8884 The `PARQUET_TEST` fails with cuda-memcheck when called with the `rmm_mode=cuda` parameter. The 4-byte read error is caused by this code logic: ``` // Scan to get distance by which each offset value is shifted due to the insertion of empties auto scan_it = cudf::detail::make_counting_transform_iterator( column_offsets[level], [off = lcv.offsets().data()] __device__(auto i) -> int { return off[i] == off[i + 1]; }); rmm::device_uvector scan_out(offset_size_at_level, stream); thrust::exclusive_scan(rmm::exec_policy(stream), scan_it, scan_it + offset_size_at_level, scan_out.begin()); ``` The `scan_it` lambda will read one offset value passed the end due to the `off[i + 1]` statement. The `exclusive_scan` does not actually use the last element so the code was modified to just return `false` if the index, `i` is greater than or equal to the size of the offsets column. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Robert Maynard (https://github.com/robertmaynard) - Devavret Makkar (https://github.com/devavret) URL: https://github.com/rapidsai/cudf/pull/8995 --- cpp/src/io/parquet/page_enc.cu | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index 3c62dcf7eea..20a7ab7ca6d 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -1966,9 +1966,9 @@ dremel_data get_dremel_data(column_view h_col, // Scan to get distance by which each offset value is shifted due to the insertion of empties auto scan_it = cudf::detail::make_counting_transform_iterator( - column_offsets[level], [off = lcv.offsets().data()] __device__(auto i) -> int { - return off[i] == off[i + 1]; - }); + column_offsets[level], + [off = lcv.offsets().data(), size = lcv.offsets().size()] __device__( + auto i) -> int { return (i + 1 < size) && (off[i] == off[i + 1]); }); rmm::device_uvector scan_out(offset_size_at_level, stream); thrust::exclusive_scan( rmm::exec_policy(stream), scan_it, scan_it + offset_size_at_level, scan_out.begin()); @@ -2053,9 +2053,9 @@ dremel_data get_dremel_data(column_view h_col, // Scan to get distance by which each offset value is shifted due to the insertion of dremel // level value fof an empty list auto scan_it = cudf::detail::make_counting_transform_iterator( - column_offsets[level], [off = lcv.offsets().data()] __device__(auto i) -> int { - return off[i] == off[i + 1]; - }); + column_offsets[level], + [off = lcv.offsets().data(), size = lcv.offsets().size()] __device__( + auto i) -> int { return (i + 1 < size) && (off[i] == off[i + 1]); }); rmm::device_uvector scan_out(offset_size_at_level, stream); thrust::exclusive_scan( rmm::exec_policy(stream), scan_it, scan_it + offset_size_at_level, scan_out.begin());