From c62ec8a902dc85f41152538793f07b61cb6c84d7 Mon Sep 17 00:00:00 2001 From: vuule Date: Fri, 17 Feb 2023 19:51:06 -0800 Subject: [PATCH 1/4] allow nullable to apply to single writes --- cpp/src/io/parquet/writer_impl.cu | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 13ec2d652a6..0c2b579d077 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -469,20 +469,15 @@ inline bool is_col_nullable(cudf::detail::LinkedColPtr const& col, column_in_metadata const& col_meta, bool single_write_mode) { - if (single_write_mode) { - return col->nullable(); - } else { - if (col_meta.is_nullability_defined()) { - CUDF_EXPECTS(col_meta.nullable() || !col->nullable(), - "Mismatch in metadata prescribed nullability and input column nullability. " - "Metadata for nullable input column cannot prescribe nullability = false"); - return col_meta.nullable(); - } else { - // For chunked write, when not provided nullability, we assume the worst case scenario - // that all columns are nullable. - return true; - } - } + if (col_meta.is_nullability_defined()) { + CUDF_EXPECTS(col_meta.nullable() || !col->nullable(), + "Mismatch in metadata prescribed nullability and input column nullability. " + "Metadata for nullable input column cannot prescribe nullability = false"); + return col_meta.nullable(); + } + // For chunked write, when not provided nullability, we assume the worst case scenario + // that all columns are nullable. + return single_write_mode || col->nullable(); } /** From c35beb350de211f109d775a27fcf8c28a18c264f Mon Sep 17 00:00:00 2001 From: vuule Date: Fri, 10 Mar 2023 17:10:39 -0800 Subject: [PATCH 2/4] fix --- cpp/src/io/parquet/writer_impl.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index b08ee2588f4..5f407b5e774 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -517,7 +517,7 @@ inline bool is_col_nullable(cudf::detail::LinkedColPtr const& col, } // For chunked write, when not provided nullability, we assume the worst case scenario // that all columns are nullable. - return single_write_mode || col->nullable(); + return not single_write_mode or col->nullable(); } /** From 69481e8dd0e794decd9e95ac4f9ed2d49cbb10f7 Mon Sep 17 00:00:00 2001 From: vuule Date: Mon, 13 Mar 2023 13:24:49 -0700 Subject: [PATCH 3/4] tests --- cpp/tests/io/parquet_test.cpp | 47 +++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index 141c06733a6..42ce66ce58b 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -5225,4 +5225,51 @@ TYPED_TEST(ParquetReaderSourceTest, BufferSourceArrayTypes) } } +TEST_F(ParquetWriterTest, UserNullability) +{ + auto weight_col = cudf::test::fixed_width_column_wrapper{{57.5, 51.1, 15.3}}; + auto ages_col = cudf::test::fixed_width_column_wrapper{{30, 27, 5}}; + auto struct_col = cudf::test::structs_column_wrapper{weight_col, ages_col}; + + auto expected = table_view({struct_col}); + + cudf::io::table_input_metadata expected_metadata(expected); + expected_metadata.column_metadata[0].set_nullability(false); + expected_metadata.column_metadata[0].child(0).set_nullability(true); + + auto filepath = temp_env->get_temp_filepath("SingleWriteNullable.parquet"); + cudf::io::parquet_writer_options write_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) + .metadata(&expected_metadata); + cudf::io::write_parquet(write_opts); + + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); + + EXPECT_FALSE(result.tbl->view().column(0).nullable()); + EXPECT_TRUE(result.tbl->view().column(0).child(0).nullable()); + EXPECT_FALSE(result.tbl->view().column(0).child(1).nullable()); +} + +TEST_F(ParquetWriterTest, UserNullabilityInvalid) +{ + auto valids = + cudf::detail::make_counting_transform_iterator(0, [&](int index) { return index % 2; }); + auto col = cudf::test::fixed_width_column_wrapper{{57.5, 51.1, 15.3}, valids}; + auto expected = table_view({col}); + + auto filepath = temp_env->get_temp_filepath("SingleWriteNullableInvalid.parquet"); + cudf::io::parquet_writer_options write_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected); + // Should work without the nullability option + EXPECT_NO_THROW(cudf::io::write_parquet(write_opts)); + + cudf::io::table_input_metadata expected_metadata(expected); + expected_metadata.column_metadata[0].set_nullability(false); + write_opts.set_metadata(&expected_metadata); + // Can't write a column with nulls as not nullable + EXPECT_THROW(cudf::io::write_parquet(write_opts), cudf::logic_error); +} + CUDF_TEST_PROGRAM_MAIN() From 4b6305c54e30de7eef5dabfa6dada827a418d222 Mon Sep 17 00:00:00 2001 From: vuule Date: Mon, 13 Mar 2023 13:34:30 -0700 Subject: [PATCH 4/4] docs --- cpp/include/cudf/io/types.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/include/cudf/io/types.hpp b/cpp/include/cudf/io/types.hpp index 6f97eb768d9..7426811a18d 100644 --- a/cpp/include/cudf/io/types.hpp +++ b/cpp/include/cudf/io/types.hpp @@ -519,8 +519,6 @@ class column_in_metadata { /** * @brief Set the nullability of this column * - * Only valid in case of chunked writes. In single writes, this option is ignored. - * * @param nullable Whether this column is nullable * @return this for chaining */