Skip to content

Commit

Permalink
Fix for Parquet writer when requested pages per row is smaller than f…
Browse files Browse the repository at this point in the history
…ragment size (#13806)

#12685 introduced a bug in page calculation. If the `max_page_size_rows` parameter is set smaller than the page fragment size, the writer will produce a spurious empty page. This PR fixes this by only checking the fragment size if there are already rows in the page, and then returns the old check for number of rows exceeding the page limit.

Interestingly, libcudf can read these files with empty pages just fine, but parquet-mr cannot.

Authors:
  - Ed Seidl (https://github.com/etseidl)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: #13806
  • Loading branch information
etseidl authored Aug 3, 2023
1 parent 11fd25c commit 399efb9
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 3 deletions.
12 changes: 9 additions & 3 deletions cpp/src/io/parquet/page_enc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -432,9 +432,15 @@ __global__ void __launch_bounds__(128)
max_RLE_page_size(col_g.num_def_level_bits(), num_vals) +
max_RLE_page_size(col_g.num_rep_level_bits(), num_vals));

if (num_rows >= ck_g.num_rows ||
(values_in_page > 0 && (page_size + fragment_data_size > this_max_page_size)) ||
rows_in_page + frag_g.num_rows > max_page_size_rows) {
// checks to see when we need to close the current page and start a new one
auto const is_last_chunk = num_rows >= ck_g.num_rows;
auto const is_page_bytes_exceeded = page_size + fragment_data_size > this_max_page_size;
auto const is_page_rows_exceeded = rows_in_page + frag_g.num_rows > max_page_size_rows;
// only check for limit overflow if there's already at least one fragment for this page
auto const is_page_too_big =
values_in_page > 0 && (is_page_bytes_exceeded || is_page_rows_exceeded);

if (is_last_chunk || is_page_too_big) {
if (ck_g.use_dictionary) {
// Additional byte to store entry bit width
page_size = 1 + max_RLE_page_size(ck_g.dict_rle_bits, values_in_page);
Expand Down
38 changes: 38 additions & 0 deletions cpp/tests/io/parquet_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3709,6 +3709,44 @@ TEST_F(ParquetWriterTest, CheckPageRowsAdjusted)
EXPECT_LE(ph.data_page_header.num_values, rows_per_page);
}

TEST_F(ParquetWriterTest, CheckPageRowsTooSmall)
{
constexpr auto rows_per_page = 1'000;
constexpr auto fragment_size = 5'000;
constexpr auto num_rows = 3 * rows_per_page;
const std::string s1(32, 'a');
auto col0_elements =
cudf::detail::make_counting_transform_iterator(0, [&](auto i) { return s1; });
auto col0 = cudf::test::strings_column_wrapper(col0_elements, col0_elements + num_rows);

auto const expected = table_view{{col0}};

auto const filepath = temp_env->get_temp_filepath("CheckPageRowsTooSmall.parquet");
const cudf::io::parquet_writer_options out_opts =
cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
.max_page_fragment_size(fragment_size)
.max_page_size_rows(rows_per_page);
cudf::io::write_parquet(out_opts);

// check that file is written correctly when rows/page < fragment size
auto const source = cudf::io::datasource::create(filepath);
cudf::io::parquet::FileMetaData fmd;

read_footer(source, &fmd);
ASSERT_TRUE(fmd.row_groups.size() > 0);
ASSERT_TRUE(fmd.row_groups[0].columns.size() == 1);
auto const& first_chunk = fmd.row_groups[0].columns[0].meta_data;
ASSERT_TRUE(first_chunk.data_page_offset > 0);

// read first data page header. sizeof(PageHeader) is not exact, but the thrift encoded
// version should be smaller than size of the struct.
auto const ph = read_page_header(
source, {first_chunk.data_page_offset, sizeof(cudf::io::parquet::PageHeader), 0});

// there should be only one page since the fragment size is larger than rows_per_page
EXPECT_EQ(ph.data_page_header.num_values, num_rows);
}

TEST_F(ParquetWriterTest, Decimal128Stats)
{
// check that decimal128 min and max statistics are written in network byte order
Expand Down

0 comments on commit 399efb9

Please sign in to comment.