From c82a70807849188274d21b595d5ded818aad4464 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Wed, 6 Sep 2023 10:57:10 +0800 Subject: [PATCH] Fix map column can not be non-nullable for java (#14003) Make map column non-nullable for java. Changes: - Add a new method to pass nullable; Deprecate the old one. - Update the tests. Authors: - Chong Gao (https://github.com/res-life) - Nghia Truong (https://github.com/ttnghia) Approvers: - Robert (Bobby) Evans (https://github.com/revans2) URL: https://github.com/rapidsai/cudf/pull/14003 --- .../ai/rapids/cudf/ColumnWriterOptions.java | 30 +++++++++++++++++++ .../test/java/ai/rapids/cudf/TableTest.java | 6 ++-- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java b/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java index 2177f58c9de..a95c5f58f09 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java @@ -522,7 +522,11 @@ protected String[] getFlatColumnNames(String[] ret) { * Maps are List columns with a Struct named 'key_value' with a child named 'key' and a child * named 'value'. The caller of this method doesn't need to worry about this as this method will * take care of this without the knowledge of the caller. + * + * Note: This method always returns a nullabe column, cannot return non-nullable column. + * Do not use this, use the next function with the parameter `isNullable`. */ + @Deprecated public static ColumnWriterOptions mapColumn(String name, ColumnWriterOptions key, ColumnWriterOptions value) { StructColumnWriterOptions struct = structBuilder("key_value").build(); @@ -537,6 +541,32 @@ public static ColumnWriterOptions mapColumn(String name, ColumnWriterOptions key return opt; } + /** + * Add a Map Column to the schema. + *

+ * Maps are List columns with a Struct named 'key_value' with a child named 'key' and a child + * named 'value'. The caller of this method doesn't need to worry about this as this method will + * take care of this without the knowledge of the caller. + * + * Note: If this map column is a key of another map, should pass isNullable = false. + * e.g.: map1(map2(int, int), int) the map2 should be non-nullable. + * + * @param isNullable is the returned map nullable. + */ + public static ColumnWriterOptions mapColumn(String name, ColumnWriterOptions key, + ColumnWriterOptions value, Boolean isNullable) { + if (key.isNullable) { + throw new IllegalArgumentException("key column can not be nullable"); + } + StructColumnWriterOptions struct = structBuilder("key_value").build(); + struct.childColumnOptions = new ColumnWriterOptions[]{key, value}; + ColumnWriterOptions opt = listBuilder(name, isNullable) + .withStructColumn(struct) + .build(); + opt.isMap = true; + return opt; + } + /** * Creates a ListBuilder for column called 'name' */ diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java index 5c0c738a20f..3740328615a 100644 --- a/java/src/test/java/ai/rapids/cudf/TableTest.java +++ b/java/src/test/java/ai/rapids/cudf/TableTest.java @@ -8064,7 +8064,8 @@ void testParquetWriteMap() throws IOException { ParquetWriterOptions options = ParquetWriterOptions.builder() .withMapColumn(mapColumn("my_map", new ColumnWriterOptions("key0", false), - new ColumnWriterOptions("value0"))).build(); + new ColumnWriterOptions("value0"), + true)).build(); File f = File.createTempFile("test-map", ".parquet"); List list1 = Arrays.asList(new HostColumnVector.StructData(Arrays.asList("a", "b"))); @@ -8562,7 +8563,8 @@ void testORCWriteMapChunked() throws IOException { ORCWriterOptions options = ORCWriterOptions.builder() .withMapColumn(mapColumn("my_map", new ColumnWriterOptions("key0", false), - new ColumnWriterOptions("value0"))).build(); + new ColumnWriterOptions("value0"), + true)).build(); File f = File.createTempFile("test-map", ".parquet"); List list1 = Arrays.asList(new HostColumnVector.StructData(Arrays.asList("a", "b")));