Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow ORC and Parquet writers to write nullable columns without nulls as non-nullable #13675

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions cpp/src/io/orc/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -687,9 +687,9 @@ orc_streams create_streams(host_span<orc_column_view> columns,
// For chunked write, when not provided nullability, we assume the worst case scenario
// that all columns are nullable.
auto const chunked_nullable = column.user_defined_nullable().value_or(true);
CUDF_EXPECTS(chunked_nullable or !column.nullable(),
"Mismatch in metadata prescribed nullability and input column nullability. "
"Metadata for nullable input column cannot prescribe nullability = false");
CUDF_EXPECTS(chunked_nullable or column.null_count() == 0,
ttnghia marked this conversation as resolved.
Show resolved Hide resolved
"Mismatch in metadata prescribed nullability and input column. "
"Metadata for input column with nulls cannot prescribe nullability = false");
return chunked_nullable;
}
}();
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/io/parquet/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -515,9 +515,9 @@ inline bool is_col_nullable(cudf::detail::LinkedColPtr const& col,
single_write_mode write_mode)
{
if (col_meta.is_nullability_defined()) {
CUDF_EXPECTS(col_meta.nullable() || !col->nullable(),
"Mismatch in metadata prescribed nullability and input column nullability. "
"Metadata for nullable input column cannot prescribe nullability = false");
CUDF_EXPECTS(col_meta.nullable() or col->null_count() == 0,
"Mismatch in metadata prescribed nullability and input column. "
"Metadata for input column with nulls cannot prescribe nullability = false");
return col_meta.nullable();
}
// For chunked write, when not provided nullability, we assume the worst case scenario
Expand Down
17 changes: 17 additions & 0 deletions cpp/tests/io/orc_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1806,4 +1806,21 @@ TEST_F(OrcWriterTest, EmptyRowGroup)
CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
}

TEST_F(OrcWriterTest, NoNullsAsNonNullable)
{
auto valids = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
column_wrapper<int32_t> col{{1, 2, 3}, valids};
table_view expected({col});

cudf::io::table_input_metadata expected_metadata(expected);
expected_metadata.column_metadata[0].set_nullability(false);

auto filepath = temp_env->get_temp_filepath("NonNullable.orc");
cudf::io::orc_writer_options out_opts =
cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected)
.metadata(&expected_metadata);
// Writer should be able to write a column without nulls as non-nullable
EXPECT_NO_THROW(cudf::io::write_orc(out_opts));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a nit, either way is fine, but I don't think EXPECT_NO_THROW is required. If it throws and the exception is not caught, the test fails anyway.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I use it just to make it easier to see what we're checking. I prefer to have the macro for this reason.

}

CUDF_TEST_PROGRAM_MAIN()
17 changes: 17 additions & 0 deletions cpp/tests/io/parquet_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5600,4 +5600,21 @@ TEST_F(ParquetReaderTest, ReorderedReadMultipleFiles)
CUDF_TEST_EXPECT_TABLES_EQUAL(sliced[1], swapped2);
}

TEST_F(ParquetWriterTest, NoNullsAsNonNullable)
{
auto valids = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
column_wrapper<int32_t> col{{1, 2, 3}, valids};
table_view expected({col});

cudf::io::table_input_metadata expected_metadata(expected);
expected_metadata.column_metadata[0].set_nullability(false);

auto filepath = temp_env->get_temp_filepath("NonNullable.parquet");
cudf::io::parquet_writer_options out_opts =
cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
.metadata(&expected_metadata);
// Writer should be able to write a column without nulls as non-nullable
EXPECT_NO_THROW(cudf::io::write_parquet(out_opts));
}

CUDF_TEST_PROGRAM_MAIN()