Skip to content

Commit

Permalink
Fix invalid memory access in Parquet reader (rapidsai#14637)
Browse files Browse the repository at this point in the history
Fixes rapidsai#14633

When reading files in multiple passes, some pointer fields in `ColumnChunkDesc` that point to transient memory are not cleared out at the end of each pass. This can lead to trying to dereference deallocated memory during Parquet reader string preprocessing.

Authors:
  - Ed Seidl (https://github.com/etseidl)
  - Nghia Truong (https://github.com/ttnghia)
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: rapidsai#14637
  • Loading branch information
etseidl authored and abellina committed Jan 16, 2024
1 parent fcb6540 commit d3470e7
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 3 deletions.
11 changes: 9 additions & 2 deletions cpp/src/io/parquet/decode_fixed.cu
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,11 @@ __global__ void __launch_bounds__(decode_block_size) gpuDecodePageDataFixed(

// TODO: abellina all_types_filter???
//if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, all_types_filter{}, true)) { return; }
if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, mask_filter{decode_kernel_mask::FIXED_WIDTH_NO_DICT}, true)) { return; }
if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows,
mask_filter{decode_kernel_mask::FIXED_WIDTH_NO_DICT},
page_processing_stage::DECODE)) {
return;
}

// must come after the kernel mask check
[[maybe_unused]] null_count_back_copier _{s, t};
Expand Down Expand Up @@ -420,7 +424,10 @@ __global__ void __launch_bounds__(decode_block_size) gpuDecodePageDataFixedDict(
if (!(BitAnd(pages[page_idx].kernel_mask, decode_kernel_mask::FIXED_WIDTH_DICT))) { return; }

// TODO: abellina all_types_filter???
if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, mask_filter{decode_kernel_mask::FIXED_WIDTH_DICT}, true)) { return; }
if (!setupLocalPageInfo(
s, pp, chunks, min_row, num_rows,
mask_filter{decode_kernel_mask::FIXED_WIDTH_DICT},
page_processing_stage::DECODE)) { return; }

#ifdef ABDEBUG
if (t == 0) {
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/io/parquet/page_string_decode.cu
Original file line number Diff line number Diff line change
Expand Up @@ -616,12 +616,13 @@ __global__ void __launch_bounds__(preprocess_block_size) gpuComputeStringPageBou
decoders[level_type::NUM_LEVEL_TYPES] = {{def_runs}, {rep_runs}};

// setup page info
auto const mask = BitOr(decode_kernel_mask::STRING, decode_kernel_mask::DELTA_BYTE_ARRAY);
if (!setupLocalPageInfo(s,
pp,
chunks,
min_row,
num_rows,
mask_filter{STRINGS_MASK},
mask_filter{mask},
page_processing_stage::STRING_BOUNDS)) {
return;
}
Expand Down

0 comments on commit d3470e7

Please sign in to comment.