Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make Parquet writer nullable option application to single table writes #12933

Merged
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions cpp/include/cudf/io/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -519,8 +519,6 @@ class column_in_metadata {
/**
* @brief Set the nullability of this column
*
* Only valid in case of chunked writes. In single writes, this option is ignored.
*
* @param nullable Whether this column is nullable
* @return this for chaining
*/
Expand Down
23 changes: 9 additions & 14 deletions cpp/src/io/parquet/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -509,20 +509,15 @@ inline bool is_col_nullable(cudf::detail::LinkedColPtr const& col,
column_in_metadata const& col_meta,
bool single_write_mode)
{
if (single_write_mode) {
return col->nullable();
} else {
if (col_meta.is_nullability_defined()) {
CUDF_EXPECTS(col_meta.nullable() || !col->nullable(),
"Mismatch in metadata prescribed nullability and input column nullability. "
"Metadata for nullable input column cannot prescribe nullability = false");
return col_meta.nullable();
} else {
// For chunked write, when not provided nullability, we assume the worst case scenario
// that all columns are nullable.
return true;
}
}
if (col_meta.is_nullability_defined()) {
CUDF_EXPECTS(col_meta.nullable() || !col->nullable(),
"Mismatch in metadata prescribed nullability and input column nullability. "
"Metadata for nullable input column cannot prescribe nullability = false");
return col_meta.nullable();
}
// For chunked write, when not provided nullability, we assume the worst case scenario
// that all columns are nullable.
return not single_write_mode or col->nullable();
}

/**
Expand Down
47 changes: 47 additions & 0 deletions cpp/tests/io/parquet_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5210,4 +5210,51 @@ TYPED_TEST(ParquetReaderSourceTest, BufferSourceArrayTypes)
}
}

TEST_F(ParquetWriterTest, UserNullability)
{
auto weight_col = cudf::test::fixed_width_column_wrapper<float>{{57.5, 51.1, 15.3}};
auto ages_col = cudf::test::fixed_width_column_wrapper<int32_t>{{30, 27, 5}};
auto struct_col = cudf::test::structs_column_wrapper{weight_col, ages_col};

auto expected = table_view({struct_col});

cudf::io::table_input_metadata expected_metadata(expected);
expected_metadata.column_metadata[0].set_nullability(false);
expected_metadata.column_metadata[0].child(0).set_nullability(true);

auto filepath = temp_env->get_temp_filepath("SingleWriteNullable.parquet");
cudf::io::parquet_writer_options write_opts =
cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
.metadata(&expected_metadata);
cudf::io::write_parquet(write_opts);

cudf::io::parquet_reader_options read_opts =
cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
auto result = cudf::io::read_parquet(read_opts);

EXPECT_FALSE(result.tbl->view().column(0).nullable());
EXPECT_TRUE(result.tbl->view().column(0).child(0).nullable());
EXPECT_FALSE(result.tbl->view().column(0).child(1).nullable());
}

TEST_F(ParquetWriterTest, UserNullabilityInvalid)
{
auto valids =
cudf::detail::make_counting_transform_iterator(0, [&](int index) { return index % 2; });
auto col = cudf::test::fixed_width_column_wrapper<double>{{57.5, 51.1, 15.3}, valids};
auto expected = table_view({col});

auto filepath = temp_env->get_temp_filepath("SingleWriteNullableInvalid.parquet");
cudf::io::parquet_writer_options write_opts =
cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
// Should work without the nullability option
EXPECT_NO_THROW(cudf::io::write_parquet(write_opts));

cudf::io::table_input_metadata expected_metadata(expected);
expected_metadata.column_metadata[0].set_nullability(false);
write_opts.set_metadata(&expected_metadata);
// Can't write a column with nulls as not nullable
EXPECT_THROW(cudf::io::write_parquet(write_opts), cudf::logic_error);
}

CUDF_TEST_PROGRAM_MAIN()