From 829b3a959cc5f0d41fe51dca9a4335dba0da69a5 Mon Sep 17 00:00:00 2001 From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Thu, 13 Jun 2024 20:40:56 -0700 Subject: [PATCH] Fix the int32 overflow when computing page fragment sizes for large string columns (#16028) This PR fixes the possible `int32` overflow when computing page fragment sizes for large (2B+ char) string columns. Authors: - Muhammad Haseeb (https://github.com/mhaseeb123) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/16028 --- cpp/src/io/parquet/writer_impl.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 6d466748c17..ca15b532d07 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -1763,10 +1763,10 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, // for multiple fragments per page to smooth things out. using 2 was too // unbalanced in final page sizes, so using 4 which seems to be a good // compromise at smoothing things out without getting fragment sizes too small. - auto frag_size_fn = [&](auto const& col, size_type col_size) { + auto frag_size_fn = [&](auto const& col, size_t col_size) { int const target_frags_per_page = is_col_fixed_width(col) ? 1 : 4; auto const avg_len = - target_frags_per_page * util::div_rounding_up_safe(col_size, input.num_rows()); + target_frags_per_page * util::div_rounding_up_safe(col_size, input.num_rows()); if (avg_len > 0) { auto const frag_size = util::div_rounding_up_safe(max_page_size_bytes, avg_len); return std::min(max_page_fragment_size, frag_size);