From e5dd28d4a8872fb3071ccbe892599b8faaeaca5e Mon Sep 17 00:00:00 2001 From: vuule Date: Fri, 7 Jul 2023 14:56:17 -0700 Subject: [PATCH 1/3] relax the nullability check --- cpp/src/io/orc/writer_impl.cu | 6 +++--- cpp/src/io/parquet/writer_impl.cu | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 1d8c946675f..48bb1004ac7 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -687,9 +687,9 @@ orc_streams create_streams(host_span columns, // For chunked write, when not provided nullability, we assume the worst case scenario // that all columns are nullable. auto const chunked_nullable = column.user_defined_nullable().value_or(true); - CUDF_EXPECTS(chunked_nullable or !column.nullable(), - "Mismatch in metadata prescribed nullability and input column nullability. " - "Metadata for nullable input column cannot prescribe nullability = false"); + CUDF_EXPECTS(chunked_nullable or column.null_count() == 0, + "Mismatch in metadata prescribed nullability and input column. " + "Metadata for input column with nulls cannot prescribe nullability = false"); return chunked_nullable; } }(); diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 9487125657d..17a0a903a47 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -515,9 +515,9 @@ inline bool is_col_nullable(cudf::detail::LinkedColPtr const& col, single_write_mode write_mode) { if (col_meta.is_nullability_defined()) { - CUDF_EXPECTS(col_meta.nullable() || !col->nullable(), - "Mismatch in metadata prescribed nullability and input column nullability. " - "Metadata for nullable input column cannot prescribe nullability = false"); + CUDF_EXPECTS(col_meta.nullable() or col->null_count() == 0, + "Mismatch in metadata prescribed nullability and input column. " + "Metadata for input column with nulls cannot prescribe nullability = false"); return col_meta.nullable(); } // For chunked write, when not provided nullability, we assume the worst case scenario From 8e60bd2cf0a6db09aa8d243b06cfbeb80ab2e6a0 Mon Sep 17 00:00:00 2001 From: vuule Date: Fri, 7 Jul 2023 14:56:24 -0700 Subject: [PATCH 2/3] tests --- cpp/tests/io/orc_test.cpp | 17 +++++++++++++++++ cpp/tests/io/parquet_test.cpp | 17 +++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index 82b8b6c4232..c83e16f7369 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -1806,4 +1806,21 @@ TEST_F(OrcWriterTest, EmptyRowGroup) CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } +TEST_F(OrcWriterTest, NoNullsAsNonNullable) +{ + auto valids = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; }); + column_wrapper col{{1, 2, 3}, valids}; + table_view expected({col}); + + cudf::io::table_input_metadata expected_metadata(expected); + expected_metadata.column_metadata[0].set_nullability(false); + + auto filepath = temp_env->get_temp_filepath("NonNullable.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) + .metadata(&expected_metadata); + // Writer should be able to write a column without nulls as non-nullable + EXPECT_NO_THROW(cudf::io::write_orc(out_opts)); +} + CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index 2c8e4d4efd2..d8f1d985f59 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -5600,4 +5600,21 @@ TEST_F(ParquetReaderTest, ReorderedReadMultipleFiles) CUDF_TEST_EXPECT_TABLES_EQUAL(sliced[1], swapped2); } +TEST_F(ParquetWriterTest, NoNullsAsNonNullable) +{ + auto valids = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; }); + column_wrapper col{{1, 2, 3}, valids}; + table_view expected({col}); + + cudf::io::table_input_metadata expected_metadata(expected); + expected_metadata.column_metadata[0].set_nullability(false); + + auto filepath = temp_env->get_temp_filepath("NonNullable.parquet"); + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) + .metadata(&expected_metadata); + // Writer should be able to write a column without nulls as non-nullable + EXPECT_NO_THROW(cudf::io::write_parquet(out_opts)); +} + CUDF_TEST_PROGRAM_MAIN() From 5f5f568038886a66ae38372ccbc65ba10bae6a4b Mon Sep 17 00:00:00 2001 From: vuule Date: Tue, 11 Jul 2023 16:07:52 -0700 Subject: [PATCH 3/3] update due to breaking change --- cpp/tests/io/orc_test.cpp | 2 +- cpp/tests/io/parquet_test.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index c0fea1fa402..95c8e4802a2 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -1818,7 +1818,7 @@ TEST_F(OrcWriterTest, NoNullsAsNonNullable) auto filepath = temp_env->get_temp_filepath("NonNullable.orc"); cudf::io::orc_writer_options out_opts = cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) - .metadata(&expected_metadata); + .metadata(std::move(expected_metadata)); // Writer should be able to write a column without nulls as non-nullable EXPECT_NO_THROW(cudf::io::write_orc(out_opts)); } diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index f2335ee4c21..ff8d308318a 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -5613,7 +5613,7 @@ TEST_F(ParquetWriterTest, NoNullsAsNonNullable) auto filepath = temp_env->get_temp_filepath("NonNullable.parquet"); cudf::io::parquet_writer_options out_opts = cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) - .metadata(&expected_metadata); + .metadata(std::move(expected_metadata)); // Writer should be able to write a column without nulls as non-nullable EXPECT_NO_THROW(cudf::io::write_parquet(out_opts)); }