-
Notifications
You must be signed in to change notification settings - Fork 915
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix invalid memory access in Parquet reader #14637
Changes from all commits
7b79bf9
58ab859
ca5651f
0a010b8
86e7444
64668df
10177b7
7a10e3a
a0fff21
31163c3
a30d5aa
6656dc0
1e01dd5
80d0c66
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -616,7 +616,15 @@ __global__ void __launch_bounds__(preprocess_block_size) gpuComputeStringPageBou | |
|
||
// setup page info | ||
auto const mask = BitOr(decode_kernel_mask::STRING, decode_kernel_mask::DELTA_BYTE_ARRAY); | ||
if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, mask_filter{mask}, true)) { return; } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why does this PR change There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The string preprocessing was passing that as true, leading the setup call to believe the output buffers were valid and thus accessing invalid memory. With the new flag true and the old flag false, we get the behavior that was originally desired, but can now skip the bad pointer arithmetic. |
||
if (!setupLocalPageInfo(s, | ||
pp, | ||
chunks, | ||
min_row, | ||
num_rows, | ||
mask_filter{mask}, | ||
page_processing_stage::STRING_BOUNDS)) { | ||
return; | ||
} | ||
|
||
bool const is_bounds_pg = is_bounds_page(s, min_row, num_rows, has_repetition); | ||
|
||
|
@@ -659,8 +667,15 @@ __global__ void __launch_bounds__(delta_preproc_block_size) gpuComputeDeltaPageS | |
bool const has_repetition = chunks[pp->chunk_idx].max_level[level_type::REPETITION] > 0; | ||
|
||
// setup page info | ||
auto const mask = decode_kernel_mask::DELTA_BYTE_ARRAY; | ||
if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, mask_filter{mask}, true)) { return; } | ||
if (!setupLocalPageInfo(s, | ||
pp, | ||
chunks, | ||
min_row, | ||
num_rows, | ||
mask_filter{decode_kernel_mask::DELTA_BYTE_ARRAY}, | ||
page_processing_stage::STRING_BOUNDS)) { | ||
return; | ||
} | ||
|
||
auto const start_value = pp->start_val; | ||
|
||
|
@@ -722,8 +737,13 @@ __global__ void __launch_bounds__(preprocess_block_size) gpuComputePageStringSiz | |
bool const has_repetition = chunks[pp->chunk_idx].max_level[level_type::REPETITION] > 0; | ||
|
||
// setup page info | ||
if (!setupLocalPageInfo( | ||
s, pp, chunks, min_row, num_rows, mask_filter{decode_kernel_mask::STRING}, true)) { | ||
if (!setupLocalPageInfo(s, | ||
pp, | ||
chunks, | ||
min_row, | ||
num_rows, | ||
mask_filter{decode_kernel_mask::STRING}, | ||
page_processing_stage::STRING_BOUNDS)) { | ||
return; | ||
} | ||
|
||
|
@@ -816,9 +836,13 @@ __global__ void __launch_bounds__(decode_block_size) | |
int const lane_id = t % warp_size; | ||
[[maybe_unused]] null_count_back_copier _{s, t}; | ||
|
||
auto const mask = decode_kernel_mask::STRING; | ||
if (!setupLocalPageInfo( | ||
s, &pages[page_idx], chunks, min_row, num_rows, mask_filter{mask}, true)) { | ||
if (!setupLocalPageInfo(s, | ||
&pages[page_idx], | ||
chunks, | ||
min_row, | ||
num_rows, | ||
mask_filter{decode_kernel_mask::STRING}, | ||
page_processing_stage::DECODE)) { | ||
return; | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this comment needs to be updated.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could change this to move the
is_bonds_step
test inside theelse
block...I think all that's necessary is to just not zero out those values.