From 7b79bf915aaac6a64fafc251434b2d300a6316d9 Mon Sep 17 00:00:00 2001 From: seidl Date: Thu, 14 Dec 2023 15:14:27 -0800 Subject: [PATCH 1/8] clear out pointers to transient memory after each decode pass --- cpp/src/io/parquet/reader_impl.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp index 6e799424d01..b40bd175f61 100644 --- a/cpp/src/io/parquet/reader_impl.cpp +++ b/cpp/src/io/parquet/reader_impl.cpp @@ -276,6 +276,14 @@ void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows) } } + // clear out leftover cruft for next pass + std::for_each(chunks.begin(), chunks.end(), [](auto& chunk) { + chunk.valid_map_base = 0; + chunk.column_data_base = 0; + chunk.column_string_base = 0; + }); + chunks.host_to_device_async(_stream); + _stream.synchronize(); } From 0a010b83ccdf65d066ba3845368e62a57d1149e1 Mon Sep 17 00:00:00 2001 From: seidl Date: Thu, 14 Dec 2023 17:21:32 -0800 Subject: [PATCH 2/8] alternate fix --- cpp/src/io/parquet/page_string_decode.cu | 9 +++++++-- cpp/src/io/parquet/parquet_gpu.hpp | 2 +- cpp/src/io/parquet/reader_impl.cpp | 2 ++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/cpp/src/io/parquet/page_string_decode.cu b/cpp/src/io/parquet/page_string_decode.cu index e868625afb6..8975ec5f288 100644 --- a/cpp/src/io/parquet/page_string_decode.cu +++ b/cpp/src/io/parquet/page_string_decode.cu @@ -583,7 +583,7 @@ __device__ thrust::pair totalDeltaByteArraySize(uint8_t const* d */ template __global__ void __launch_bounds__(preprocess_block_size) gpuComputeStringPageBounds( - PageInfo* pages, device_span chunks, size_t min_row, size_t num_rows) + PageInfo* pages, device_span chunks, size_t min_row, size_t num_rows) { __shared__ __align__(16) page_state_s state_g; @@ -598,6 +598,11 @@ __global__ void __launch_bounds__(preprocess_block_size) gpuComputeStringPageBou // reset str_bytes to 0 in case it's already been calculated (esp needed for chunked reads). // TODO: need to rethink this once str_bytes is in the statistics pp->str_bytes = 0; + + // clean up potential leftovers from earlier passes + chunks[pp->chunk_idx].column_data_base = nullptr; + chunks[pp->chunk_idx].column_string_base = nullptr; + chunks[pp->chunk_idx].valid_map_base = nullptr; } // whether or not we have repetition levels (lists) @@ -986,7 +991,7 @@ struct page_tform_functor { * @copydoc cudf::io::parquet::detail::ComputePageStringSizes */ void ComputePageStringSizes(cudf::detail::hostdevice_vector& pages, - cudf::detail::hostdevice_vector const& chunks, + cudf::detail::hostdevice_vector& chunks, rmm::device_uvector& temp_string_buf, size_t min_row, size_t num_rows, diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index 7f557d092c5..06c4d9d3178 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -656,7 +656,7 @@ void ComputePageSizes(cudf::detail::hostdevice_vector& pages, * @param[in] stream CUDA stream to use */ void ComputePageStringSizes(cudf::detail::hostdevice_vector& pages, - cudf::detail::hostdevice_vector const& chunks, + cudf::detail::hostdevice_vector& chunks, rmm::device_uvector& temp_string_buf, size_t min_row, size_t num_rows, diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp index b40bd175f61..80b7f88fb5d 100644 --- a/cpp/src/io/parquet/reader_impl.cpp +++ b/cpp/src/io/parquet/reader_impl.cpp @@ -276,6 +276,7 @@ void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows) } } +#if 0 // clear out leftover cruft for next pass std::for_each(chunks.begin(), chunks.end(), [](auto& chunk) { chunk.valid_map_base = 0; @@ -283,6 +284,7 @@ void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows) chunk.column_string_base = 0; }); chunks.host_to_device_async(_stream); +#endif _stream.synchronize(); } From 64668df6159f9b0300c59c60a47a3b88462c7040 Mon Sep 17 00:00:00 2001 From: seidl Date: Mon, 18 Dec 2023 08:47:34 -0800 Subject: [PATCH 3/8] yet another fix --- cpp/src/io/parquet/decode_preprocess.cu | 4 +++- cpp/src/io/parquet/page_data.cu | 3 ++- cpp/src/io/parquet/page_decode.cuh | 8 +++++--- cpp/src/io/parquet/page_delta_decode.cu | 4 ++-- cpp/src/io/parquet/page_string_decode.cu | 10 ++++++---- 5 files changed, 18 insertions(+), 11 deletions(-) diff --git a/cpp/src/io/parquet/decode_preprocess.cu b/cpp/src/io/parquet/decode_preprocess.cu index d9f91ed564c..ad847b3eba5 100644 --- a/cpp/src/io/parquet/decode_preprocess.cu +++ b/cpp/src/io/parquet/decode_preprocess.cu @@ -232,7 +232,9 @@ __global__ void __launch_bounds__(preprocess_block_size) {rep_runs}}; // setup page info - if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, all_types_filter{}, false)) { return; } + if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, all_types_filter{}, false, false)) { + return; + } // initialize the stream decoders (requires values computed in setupLocalPageInfo) // the size of the rolling batch buffer diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu index f7c07aafb70..fdfc92b4126 100644 --- a/cpp/src/io/parquet/page_data.cu +++ b/cpp/src/io/parquet/page_data.cu @@ -446,7 +446,8 @@ __global__ void __launch_bounds__(decode_block_size) min_row, num_rows, mask_filter{decode_kernel_mask::GENERAL}, - true)) { + true, + false)) { return; } diff --git a/cpp/src/io/parquet/page_decode.cuh b/cpp/src/io/parquet/page_decode.cuh index 5b41ce8fa1f..9ba98cdb473 100644 --- a/cpp/src/io/parquet/page_decode.cuh +++ b/cpp/src/io/parquet/page_decode.cuh @@ -1024,6 +1024,7 @@ struct mask_filter { * @param[in] num_rows Maximum number of rows to read * @param[in] filter Filtering function used to decide which pages to operate on * @param[in] is_decode_step If we are setting up for the decode step (instead of the preprocess) + * @param[in] is_bounds_step If we are in the string bounds checking step * @tparam Filter Function that takes a PageInfo reference and returns true if the given page should * be operated on Currently only used by gpuComputePageSizes step) * @return True if this page should be processed further @@ -1035,7 +1036,8 @@ inline __device__ bool setupLocalPageInfo(page_state_s* const s, size_t min_row, size_t num_rows, Filter filter, - bool is_decode_step) + bool is_decode_step, + bool is_bounds_step) { int t = threadIdx.x; @@ -1126,7 +1128,7 @@ inline __device__ bool setupLocalPageInfo(page_state_s* const s, // // NOTE: this check needs to be done after the null counts have been zeroed out bool const has_repetition = s->col.max_level[level_type::REPETITION] > 0; - if (is_decode_step && s->num_rows == 0 && + if ((is_decode_step or is_bounds_step) && s->num_rows == 0 && !(has_repetition && (is_bounds_page(s, min_row, num_rows, has_repetition) || is_page_contained(s, min_row, num_rows)))) { return false; @@ -1387,7 +1389,7 @@ inline __device__ bool setupLocalPageInfo(page_state_s* const s, // if we're in the decoding step, jump directly to the first // value we care about - if (is_decode_step) { + if (is_decode_step or is_bounds_step) { s->input_value_count = s->page.skipped_values > -1 ? s->page.skipped_values : 0; } else { s->input_value_count = 0; diff --git a/cpp/src/io/parquet/page_delta_decode.cu b/cpp/src/io/parquet/page_delta_decode.cu index 528048d2fe6..74917a0fe56 100644 --- a/cpp/src/io/parquet/page_delta_decode.cu +++ b/cpp/src/io/parquet/page_delta_decode.cu @@ -325,7 +325,7 @@ __global__ void __launch_bounds__(96) auto const mask = decode_kernel_mask::DELTA_BINARY; if (!setupLocalPageInfo( - s, &pages[page_idx], chunks, min_row, num_rows, mask_filter{mask}, true)) { + s, &pages[page_idx], chunks, min_row, num_rows, mask_filter{mask}, true, false)) { return; } @@ -448,7 +448,7 @@ __global__ void __launch_bounds__(decode_block_size) auto const mask = decode_kernel_mask::DELTA_BYTE_ARRAY; if (!setupLocalPageInfo( - s, &pages[page_idx], chunks, min_row, num_rows, mask_filter{mask}, true)) { + s, &pages[page_idx], chunks, min_row, num_rows, mask_filter{mask}, true, false)) { return; } diff --git a/cpp/src/io/parquet/page_string_decode.cu b/cpp/src/io/parquet/page_string_decode.cu index 704952520e8..33749a03dcb 100644 --- a/cpp/src/io/parquet/page_string_decode.cu +++ b/cpp/src/io/parquet/page_string_decode.cu @@ -621,7 +621,9 @@ __global__ void __launch_bounds__(preprocess_block_size) gpuComputeStringPageBou // setup page info auto const mask = BitOr(decode_kernel_mask::STRING, decode_kernel_mask::DELTA_BYTE_ARRAY); - if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, mask_filter{mask}, true)) { return; } + if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, mask_filter{mask}, false, true)) { + return; + } bool const is_bounds_pg = is_bounds_page(s, min_row, num_rows, has_repetition); @@ -665,7 +667,7 @@ __global__ void __launch_bounds__(delta_preproc_block_size) gpuComputeDeltaPageS // setup page info auto const mask = decode_kernel_mask::DELTA_BYTE_ARRAY; - if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, mask_filter{mask}, true)) { return; } + if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, mask_filter{mask}, false, true)) { return; } auto const start_value = pp->start_val; @@ -728,7 +730,7 @@ __global__ void __launch_bounds__(preprocess_block_size) gpuComputePageStringSiz // setup page info if (!setupLocalPageInfo( - s, pp, chunks, min_row, num_rows, mask_filter{decode_kernel_mask::STRING}, true)) { + s, pp, chunks, min_row, num_rows, mask_filter{decode_kernel_mask::STRING}, false, true)) { return; } @@ -823,7 +825,7 @@ __global__ void __launch_bounds__(decode_block_size) auto const mask = decode_kernel_mask::STRING; if (!setupLocalPageInfo( - s, &pages[page_idx], chunks, min_row, num_rows, mask_filter{mask}, true)) { + s, &pages[page_idx], chunks, min_row, num_rows, mask_filter{mask}, true, false)) { return; } From 7a10e3a2977f16460fa228cc100ea04b6ef321d6 Mon Sep 17 00:00:00 2001 From: seidl Date: Mon, 18 Dec 2023 09:18:01 -0800 Subject: [PATCH 4/8] revert earlier fix --- cpp/src/io/parquet/page_string_decode.cu | 13 +++++-------- cpp/src/io/parquet/parquet_gpu.hpp | 2 +- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/cpp/src/io/parquet/page_string_decode.cu b/cpp/src/io/parquet/page_string_decode.cu index 33749a03dcb..c7e329a47d2 100644 --- a/cpp/src/io/parquet/page_string_decode.cu +++ b/cpp/src/io/parquet/page_string_decode.cu @@ -584,7 +584,7 @@ __device__ thrust::pair totalDeltaByteArraySize(uint8_t const* d */ template __global__ void __launch_bounds__(preprocess_block_size) gpuComputeStringPageBounds( - PageInfo* pages, device_span chunks, size_t min_row, size_t num_rows) + PageInfo* pages, device_span chunks, size_t min_row, size_t num_rows) { __shared__ __align__(16) page_state_s state_g; @@ -599,11 +599,6 @@ __global__ void __launch_bounds__(preprocess_block_size) gpuComputeStringPageBou // reset str_bytes to 0 in case it's already been calculated (esp needed for chunked reads). // TODO: need to rethink this once str_bytes is in the statistics pp->str_bytes = 0; - - // clean up potential leftovers from earlier passes - chunks[pp->chunk_idx].column_data_base = nullptr; - chunks[pp->chunk_idx].column_string_base = nullptr; - chunks[pp->chunk_idx].valid_map_base = nullptr; } // whether or not we have repetition levels (lists) @@ -667,7 +662,9 @@ __global__ void __launch_bounds__(delta_preproc_block_size) gpuComputeDeltaPageS // setup page info auto const mask = decode_kernel_mask::DELTA_BYTE_ARRAY; - if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, mask_filter{mask}, false, true)) { return; } + if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, mask_filter{mask}, false, true)) { + return; + } auto const start_value = pp->start_val; @@ -994,7 +991,7 @@ struct page_tform_functor { * @copydoc cudf::io::parquet::detail::ComputePageStringSizes */ void ComputePageStringSizes(cudf::detail::hostdevice_vector& pages, - cudf::detail::hostdevice_vector& chunks, + cudf::detail::hostdevice_vector const& chunks, rmm::device_uvector& temp_string_buf, size_t min_row, size_t num_rows, diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index 06c4d9d3178..7f557d092c5 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -656,7 +656,7 @@ void ComputePageSizes(cudf::detail::hostdevice_vector& pages, * @param[in] stream CUDA stream to use */ void ComputePageStringSizes(cudf::detail::hostdevice_vector& pages, - cudf::detail::hostdevice_vector& chunks, + cudf::detail::hostdevice_vector const& chunks, rmm::device_uvector& temp_string_buf, size_t min_row, size_t num_rows, From a0fff214754ed52bae19fe43d593c1c97cb95d31 Mon Sep 17 00:00:00 2001 From: seidl Date: Mon, 18 Dec 2023 09:24:55 -0800 Subject: [PATCH 5/8] revert earlier fix --- cpp/src/io/parquet/reader_impl.cpp | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp index 80b7f88fb5d..6e799424d01 100644 --- a/cpp/src/io/parquet/reader_impl.cpp +++ b/cpp/src/io/parquet/reader_impl.cpp @@ -276,16 +276,6 @@ void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows) } } -#if 0 - // clear out leftover cruft for next pass - std::for_each(chunks.begin(), chunks.end(), [](auto& chunk) { - chunk.valid_map_base = 0; - chunk.column_data_base = 0; - chunk.column_string_base = 0; - }); - chunks.host_to_device_async(_stream); -#endif - _stream.synchronize(); } From 31163c38a025a937a6a4c308708e4c80053e6ce4 Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Mon, 18 Dec 2023 13:16:31 -0800 Subject: [PATCH 6/8] Apply suggestions from code review Co-authored-by: Nghia Truong <7416935+ttnghia@users.noreply.github.com> --- cpp/src/io/parquet/page_decode.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/io/parquet/page_decode.cuh b/cpp/src/io/parquet/page_decode.cuh index 9ba98cdb473..ff6a68e26df 100644 --- a/cpp/src/io/parquet/page_decode.cuh +++ b/cpp/src/io/parquet/page_decode.cuh @@ -1128,7 +1128,7 @@ inline __device__ bool setupLocalPageInfo(page_state_s* const s, // // NOTE: this check needs to be done after the null counts have been zeroed out bool const has_repetition = s->col.max_level[level_type::REPETITION] > 0; - if ((is_decode_step or is_bounds_step) && s->num_rows == 0 && + if ((is_decode_step || is_bounds_step) && s->num_rows == 0 && !(has_repetition && (is_bounds_page(s, min_row, num_rows, has_repetition) || is_page_contained(s, min_row, num_rows)))) { return false; @@ -1389,7 +1389,7 @@ inline __device__ bool setupLocalPageInfo(page_state_s* const s, // if we're in the decoding step, jump directly to the first // value we care about - if (is_decode_step or is_bounds_step) { + if (is_decode_step || is_bounds_step) { s->input_value_count = s->page.skipped_values > -1 ? s->page.skipped_values : 0; } else { s->input_value_count = 0; From 1e01dd515219f5d64f20cd624963209ac253691c Mon Sep 17 00:00:00 2001 From: seidl Date: Mon, 18 Dec 2023 16:23:48 -0800 Subject: [PATCH 7/8] switch boolean flags to enum --- cpp/src/io/parquet/decode_preprocess.cu | 3 +- cpp/src/io/parquet/page_data.cu | 3 +- cpp/src/io/parquet/page_decode.cuh | 22 +++++++++------ cpp/src/io/parquet/page_delta_decode.cu | 20 +++++++++---- cpp/src/io/parquet/page_string_decode.cu | 36 ++++++++++++++++++------ 5 files changed, 59 insertions(+), 25 deletions(-) diff --git a/cpp/src/io/parquet/decode_preprocess.cu b/cpp/src/io/parquet/decode_preprocess.cu index ad847b3eba5..afe9a76a6d0 100644 --- a/cpp/src/io/parquet/decode_preprocess.cu +++ b/cpp/src/io/parquet/decode_preprocess.cu @@ -232,7 +232,8 @@ __global__ void __launch_bounds__(preprocess_block_size) {rep_runs}}; // setup page info - if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, all_types_filter{}, false, false)) { + if (!setupLocalPageInfo( + s, pp, chunks, min_row, num_rows, all_types_filter{}, page_processing_stage::PREPROCESS)) { return; } diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu index fdfc92b4126..d39edd70fcd 100644 --- a/cpp/src/io/parquet/page_data.cu +++ b/cpp/src/io/parquet/page_data.cu @@ -446,8 +446,7 @@ __global__ void __launch_bounds__(decode_block_size) min_row, num_rows, mask_filter{decode_kernel_mask::GENERAL}, - true, - false)) { + page_processing_stage::DECODE)) { return; } diff --git a/cpp/src/io/parquet/page_decode.cuh b/cpp/src/io/parquet/page_decode.cuh index ff6a68e26df..28341476728 100644 --- a/cpp/src/io/parquet/page_decode.cuh +++ b/cpp/src/io/parquet/page_decode.cuh @@ -1014,6 +1014,12 @@ struct mask_filter { } }; +enum class page_processing_stage { + PREPROCESS, + STRING_BOUNDS, + DECODE, +}; + /** * @brief Sets up block-local page state information from the global pages. * @@ -1036,8 +1042,7 @@ inline __device__ bool setupLocalPageInfo(page_state_s* const s, size_t min_row, size_t num_rows, Filter filter, - bool is_decode_step, - bool is_bounds_step) + page_processing_stage stage) { int t = threadIdx.x; @@ -1128,7 +1133,8 @@ inline __device__ bool setupLocalPageInfo(page_state_s* const s, // // NOTE: this check needs to be done after the null counts have been zeroed out bool const has_repetition = s->col.max_level[level_type::REPETITION] > 0; - if ((is_decode_step || is_bounds_step) && s->num_rows == 0 && + if ((stage == page_processing_stage::STRING_BOUNDS || stage == page_processing_stage::DECODE) && + s->num_rows == 0 && !(has_repetition && (is_bounds_page(s, min_row, num_rows, has_repetition) || is_page_contained(s, min_row, num_rows)))) { return false; @@ -1239,7 +1245,7 @@ inline __device__ bool setupLocalPageInfo(page_state_s* const s, // NOTE: in a chunked read situation, s->col.column_data_base and s->col.valid_map_base // will be aliased to memory that has been freed when we get here in the non-decode step, so // we cannot check against nullptr. we'll just check a flag directly. - if (is_decode_step) { + if (stage == page_processing_stage::DECODE) { int max_depth = s->col.max_nesting_depth; for (int idx = 0; idx < max_depth; idx++) { PageNestingDecodeInfo* nesting_info = &s->nesting_info[idx]; @@ -1389,13 +1395,13 @@ inline __device__ bool setupLocalPageInfo(page_state_s* const s, // if we're in the decoding step, jump directly to the first // value we care about - if (is_decode_step || is_bounds_step) { + if (stage == page_processing_stage::DECODE) { s->input_value_count = s->page.skipped_values > -1 ? s->page.skipped_values : 0; - } else { + } else if (stage == page_processing_stage::PREPROCESS) { s->input_value_count = 0; s->input_leaf_count = 0; - s->page.skipped_values = - -1; // magic number to indicate it hasn't been set for use inside UpdatePageSizes + // magic number to indicate it hasn't been set for use inside UpdatePageSizes + s->page.skipped_values = -1; s->page.skipped_leaf_values = 0; } } diff --git a/cpp/src/io/parquet/page_delta_decode.cu b/cpp/src/io/parquet/page_delta_decode.cu index 74917a0fe56..98f8fbb09a2 100644 --- a/cpp/src/io/parquet/page_delta_decode.cu +++ b/cpp/src/io/parquet/page_delta_decode.cu @@ -323,9 +323,13 @@ __global__ void __launch_bounds__(96) auto* const db = &db_state; [[maybe_unused]] null_count_back_copier _{s, t}; - auto const mask = decode_kernel_mask::DELTA_BINARY; - if (!setupLocalPageInfo( - s, &pages[page_idx], chunks, min_row, num_rows, mask_filter{mask}, true, false)) { + if (!setupLocalPageInfo(s, + &pages[page_idx], + chunks, + min_row, + num_rows, + mask_filter{decode_kernel_mask::DELTA_BINARY}, + page_processing_stage::DECODE)) { return; } @@ -446,9 +450,13 @@ __global__ void __launch_bounds__(decode_block_size) auto* const dba = &db_state; [[maybe_unused]] null_count_back_copier _{s, t}; - auto const mask = decode_kernel_mask::DELTA_BYTE_ARRAY; - if (!setupLocalPageInfo( - s, &pages[page_idx], chunks, min_row, num_rows, mask_filter{mask}, true, false)) { + if (!setupLocalPageInfo(s, + &pages[page_idx], + chunks, + min_row, + num_rows, + mask_filter{decode_kernel_mask::DELTA_BYTE_ARRAY}, + page_processing_stage::DECODE)) { return; } diff --git a/cpp/src/io/parquet/page_string_decode.cu b/cpp/src/io/parquet/page_string_decode.cu index c7e329a47d2..ef2e7ef42ef 100644 --- a/cpp/src/io/parquet/page_string_decode.cu +++ b/cpp/src/io/parquet/page_string_decode.cu @@ -616,7 +616,13 @@ __global__ void __launch_bounds__(preprocess_block_size) gpuComputeStringPageBou // setup page info auto const mask = BitOr(decode_kernel_mask::STRING, decode_kernel_mask::DELTA_BYTE_ARRAY); - if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, mask_filter{mask}, false, true)) { + if (!setupLocalPageInfo(s, + pp, + chunks, + min_row, + num_rows, + mask_filter{mask}, + page_processing_stage::STRING_BOUNDS)) { return; } @@ -661,8 +667,13 @@ __global__ void __launch_bounds__(delta_preproc_block_size) gpuComputeDeltaPageS bool const has_repetition = chunks[pp->chunk_idx].max_level[level_type::REPETITION] > 0; // setup page info - auto const mask = decode_kernel_mask::DELTA_BYTE_ARRAY; - if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, mask_filter{mask}, false, true)) { + if (!setupLocalPageInfo(s, + pp, + chunks, + min_row, + num_rows, + mask_filter{decode_kernel_mask::DELTA_BYTE_ARRAY}, + page_processing_stage::STRING_BOUNDS)) { return; } @@ -726,8 +737,13 @@ __global__ void __launch_bounds__(preprocess_block_size) gpuComputePageStringSiz bool const has_repetition = chunks[pp->chunk_idx].max_level[level_type::REPETITION] > 0; // setup page info - if (!setupLocalPageInfo( - s, pp, chunks, min_row, num_rows, mask_filter{decode_kernel_mask::STRING}, false, true)) { + if (!setupLocalPageInfo(s, + pp, + chunks, + min_row, + num_rows, + mask_filter{decode_kernel_mask::STRING}, + page_processing_stage::STRING_BOUNDS)) { return; } @@ -820,9 +836,13 @@ __global__ void __launch_bounds__(decode_block_size) int const lane_id = t % warp_size; [[maybe_unused]] null_count_back_copier _{s, t}; - auto const mask = decode_kernel_mask::STRING; - if (!setupLocalPageInfo( - s, &pages[page_idx], chunks, min_row, num_rows, mask_filter{mask}, true, false)) { + if (!setupLocalPageInfo(s, + &pages[page_idx], + chunks, + min_row, + num_rows, + mask_filter{decode_kernel_mask::STRING}, + page_processing_stage::DECODE)) { return; } From 80d0c667cdc05a20f36832d48a192a508461ee9b Mon Sep 17 00:00:00 2001 From: seidl Date: Mon, 18 Dec 2023 16:29:18 -0800 Subject: [PATCH 8/8] fix docstring --- cpp/src/io/parquet/page_decode.cuh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/src/io/parquet/page_decode.cuh b/cpp/src/io/parquet/page_decode.cuh index 28341476728..f6f2f9e9f18 100644 --- a/cpp/src/io/parquet/page_decode.cuh +++ b/cpp/src/io/parquet/page_decode.cuh @@ -1029,8 +1029,7 @@ enum class page_processing_stage { * @param[in] min_row Crop all rows below min_row * @param[in] num_rows Maximum number of rows to read * @param[in] filter Filtering function used to decide which pages to operate on - * @param[in] is_decode_step If we are setting up for the decode step (instead of the preprocess) - * @param[in] is_bounds_step If we are in the string bounds checking step + * @param[in] stage What stage of the decoding process is this being called from * @tparam Filter Function that takes a PageInfo reference and returns true if the given page should * be operated on Currently only used by gpuComputePageSizes step) * @return True if this page should be processed further