Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix invalid memory access in Parquet reader #14637

Merged
merged 14 commits into from
Dec 19, 2023
Merged
4 changes: 3 additions & 1 deletion cpp/src/io/parquet/decode_preprocess.cu
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,9 @@ __global__ void __launch_bounds__(preprocess_block_size)
{rep_runs}};

// setup page info
if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, all_types_filter{}, false)) { return; }
if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, all_types_filter{}, false, false)) {
return;
}

// initialize the stream decoders (requires values computed in setupLocalPageInfo)
// the size of the rolling batch buffer
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/io/parquet/page_data.cu
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,8 @@ __global__ void __launch_bounds__(decode_block_size)
min_row,
num_rows,
mask_filter{decode_kernel_mask::GENERAL},
true)) {
true,
false)) {
return;
}

Expand Down
8 changes: 5 additions & 3 deletions cpp/src/io/parquet/page_decode.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1024,6 +1024,7 @@ struct mask_filter {
* @param[in] num_rows Maximum number of rows to read
* @param[in] filter Filtering function used to decide which pages to operate on
* @param[in] is_decode_step If we are setting up for the decode step (instead of the preprocess)
* @param[in] is_bounds_step If we are in the string bounds checking step
* @tparam Filter Function that takes a PageInfo reference and returns true if the given page should
* be operated on Currently only used by gpuComputePageSizes step)
* @return True if this page should be processed further
Expand All @@ -1035,7 +1036,8 @@ inline __device__ bool setupLocalPageInfo(page_state_s* const s,
size_t min_row,
size_t num_rows,
Filter filter,
bool is_decode_step)
bool is_decode_step,
bool is_bounds_step)
{
int t = threadIdx.x;

Expand Down Expand Up @@ -1126,7 +1128,7 @@ inline __device__ bool setupLocalPageInfo(page_state_s* const s,
//
// NOTE: this check needs to be done after the null counts have been zeroed out
bool const has_repetition = s->col.max_level[level_type::REPETITION] > 0;
if (is_decode_step && s->num_rows == 0 &&
if ((is_decode_step || is_bounds_step) && s->num_rows == 0 &&
!(has_repetition && (is_bounds_page(s, min_row, num_rows, has_repetition) ||
is_page_contained(s, min_row, num_rows)))) {
return false;
Expand Down Expand Up @@ -1387,7 +1389,7 @@ inline __device__ bool setupLocalPageInfo(page_state_s* const s,

// if we're in the decoding step, jump directly to the first
// value we care about
Comment on lines 1395 to 1396
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this comment needs to be updated.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could change this to move the is_bonds_step test inside the else block...I think all that's necessary is to just not zero out those values.

if (is_decode_step) {
if (is_decode_step || is_bounds_step) {
s->input_value_count = s->page.skipped_values > -1 ? s->page.skipped_values : 0;
} else {
s->input_value_count = 0;
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/io/parquet/page_delta_decode.cu
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ __global__ void __launch_bounds__(96)

auto const mask = decode_kernel_mask::DELTA_BINARY;
if (!setupLocalPageInfo(
s, &pages[page_idx], chunks, min_row, num_rows, mask_filter{mask}, true)) {
s, &pages[page_idx], chunks, min_row, num_rows, mask_filter{mask}, true, false)) {
return;
}

Expand Down Expand Up @@ -448,7 +448,7 @@ __global__ void __launch_bounds__(decode_block_size)

auto const mask = decode_kernel_mask::DELTA_BYTE_ARRAY;
if (!setupLocalPageInfo(
s, &pages[page_idx], chunks, min_row, num_rows, mask_filter{mask}, true)) {
s, &pages[page_idx], chunks, min_row, num_rows, mask_filter{mask}, true, false)) {
return;
}

Expand Down
12 changes: 8 additions & 4 deletions cpp/src/io/parquet/page_string_decode.cu
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,9 @@ __global__ void __launch_bounds__(preprocess_block_size) gpuComputeStringPageBou

// setup page info
auto const mask = BitOr(decode_kernel_mask::STRING, decode_kernel_mask::DELTA_BYTE_ARRAY);
if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, mask_filter{mask}, true)) { return; }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does this PR change is_decode_step value in some of these calls?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The string preprocessing was passing that as true, leading the setup call to believe the output buffers were valid and thus accessing invalid memory. With the new flag true and the old flag false, we get the behavior that was originally desired, but can now skip the bad pointer arithmetic.

if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, mask_filter{mask}, false, true)) {
return;
}

bool const is_bounds_pg = is_bounds_page(s, min_row, num_rows, has_repetition);

Expand Down Expand Up @@ -660,7 +662,9 @@ __global__ void __launch_bounds__(delta_preproc_block_size) gpuComputeDeltaPageS

// setup page info
auto const mask = decode_kernel_mask::DELTA_BYTE_ARRAY;
if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, mask_filter{mask}, true)) { return; }
if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, mask_filter{mask}, false, true)) {
return;
}

auto const start_value = pp->start_val;

Expand Down Expand Up @@ -723,7 +727,7 @@ __global__ void __launch_bounds__(preprocess_block_size) gpuComputePageStringSiz

// setup page info
if (!setupLocalPageInfo(
s, pp, chunks, min_row, num_rows, mask_filter{decode_kernel_mask::STRING}, true)) {
s, pp, chunks, min_row, num_rows, mask_filter{decode_kernel_mask::STRING}, false, true)) {
return;
}

Expand Down Expand Up @@ -818,7 +822,7 @@ __global__ void __launch_bounds__(decode_block_size)

auto const mask = decode_kernel_mask::STRING;
if (!setupLocalPageInfo(
s, &pages[page_idx], chunks, min_row, num_rows, mask_filter{mask}, true)) {
s, &pages[page_idx], chunks, min_row, num_rows, mask_filter{mask}, true, false)) {
return;
}

Expand Down
Loading