From 20c1090d1cf4cbc809fd820cc34e1e878294f560 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Wed, 30 Aug 2023 16:33:06 +0800 Subject: [PATCH 1/4] Make map column nullable Signed-off-by: Chong Gao --- .../ai/rapids/cudf/ColumnWriterOptions.java | 27 +++++++++++++++++++ .../test/java/ai/rapids/cudf/TableTest.java | 6 +++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java b/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java index 2177f58c9de..ce1e39dc6ef 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java @@ -522,7 +522,10 @@ protected String[] getFlatColumnNames(String[] ret) { * Maps are List columns with a Struct named 'key_value' with a child named 'key' and a child * named 'value'. The caller of this method doesn't need to worry about this as this method will * take care of this without the knowledge of the caller. + * Note: this method always returns a nullabe column, can not return non-nullable column. + * Do not use this, use the next function with the parameter `isNullable`. */ + @Deprecated public static ColumnWriterOptions mapColumn(String name, ColumnWriterOptions key, ColumnWriterOptions value) { StructColumnWriterOptions struct = structBuilder("key_value").build(); @@ -537,6 +540,30 @@ public static ColumnWriterOptions mapColumn(String name, ColumnWriterOptions key return opt; } + /** + * Add a Map Column to the schema. + *

+ * Maps are List columns with a Struct named 'key_value' with a child named 'key' and a child + * named 'value'. The caller of this method doesn't need to worry about this as this method will + * take care of this without the knowledge of the caller. + * @param isNullable is the returned map nullable. + * Note: If this map column is a key of another map, should pass isNullable = false. + * e.g.: map1(map2(int, int), int) the map2 should be non-nullable. + */ + public static ColumnWriterOptions mapColumn(String name, ColumnWriterOptions key, + ColumnWriterOptions value, Boolean isNullable) { + StructColumnWriterOptions struct = structBuilder("key_value").build(); + if (key.isNullable) { + throw new IllegalArgumentException("key column can not be nullable"); + } + struct.childColumnOptions = new ColumnWriterOptions[]{key, value}; + ColumnWriterOptions opt = listBuilder(name, isNullable) + .withStructColumn(struct) + .build(); + opt.isMap = true; + return opt; + } + /** * Creates a ListBuilder for column called 'name' */ diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java index 94de3c6a11c..215e4aae775 100644 --- a/java/src/test/java/ai/rapids/cudf/TableTest.java +++ b/java/src/test/java/ai/rapids/cudf/TableTest.java @@ -8061,7 +8061,8 @@ void testParquetWriteMap() throws IOException { ParquetWriterOptions options = ParquetWriterOptions.builder() .withMapColumn(mapColumn("my_map", new ColumnWriterOptions("key0", false), - new ColumnWriterOptions("value0"))).build(); + new ColumnWriterOptions("value0"), + true)).build(); File f = File.createTempFile("test-map", ".parquet"); List list1 = Arrays.asList(new HostColumnVector.StructData(Arrays.asList("a", "b"))); @@ -8559,7 +8560,8 @@ void testORCWriteMapChunked() throws IOException { ORCWriterOptions options = ORCWriterOptions.builder() .withMapColumn(mapColumn("my_map", new ColumnWriterOptions("key0", false), - new ColumnWriterOptions("value0"))).build(); + new ColumnWriterOptions("value0"), + true)).build(); File f = File.createTempFile("test-map", ".parquet"); List list1 = Arrays.asList(new HostColumnVector.StructData(Arrays.asList("a", "b"))); From fb29377c66686b9587b3457639c38ed5fea75ffe Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Thu, 31 Aug 2023 10:08:25 +0800 Subject: [PATCH 2/4] Refactor --- java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java b/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java index ce1e39dc6ef..846889b8686 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java @@ -546,16 +546,18 @@ public static ColumnWriterOptions mapColumn(String name, ColumnWriterOptions key * Maps are List columns with a Struct named 'key_value' with a child named 'key' and a child * named 'value'. The caller of this method doesn't need to worry about this as this method will * take care of this without the knowledge of the caller. - * @param isNullable is the returned map nullable. + * * Note: If this map column is a key of another map, should pass isNullable = false. * e.g.: map1(map2(int, int), int) the map2 should be non-nullable. + * + * @param isNullable is the returned map nullable. */ public static ColumnWriterOptions mapColumn(String name, ColumnWriterOptions key, ColumnWriterOptions value, Boolean isNullable) { - StructColumnWriterOptions struct = structBuilder("key_value").build(); if (key.isNullable) { throw new IllegalArgumentException("key column can not be nullable"); } + StructColumnWriterOptions struct = structBuilder("key_value").build(); struct.childColumnOptions = new ColumnWriterOptions[]{key, value}; ColumnWriterOptions opt = listBuilder(name, isNullable) .withStructColumn(struct) From 5b49e088c9de639875a8a7d689855bddd81f2879 Mon Sep 17 00:00:00 2001 From: Nghia Truong <7416935+ttnghia@users.noreply.github.com> Date: Mon, 4 Sep 2023 11:51:59 -0600 Subject: [PATCH 3/4] Update java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java --- java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java | 1 + 1 file changed, 1 insertion(+) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java b/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java index 846889b8686..cfac07ee397 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java @@ -522,6 +522,7 @@ protected String[] getFlatColumnNames(String[] ret) { * Maps are List columns with a Struct named 'key_value' with a child named 'key' and a child * named 'value'. The caller of this method doesn't need to worry about this as this method will * take care of this without the knowledge of the caller. + * * Note: this method always returns a nullabe column, can not return non-nullable column. * Do not use this, use the next function with the parameter `isNullable`. */ From 6c402ff61ca6bd038d55e125acd24d4a29e8e757 Mon Sep 17 00:00:00 2001 From: Nghia Truong <7416935+ttnghia@users.noreply.github.com> Date: Mon, 4 Sep 2023 11:52:51 -0600 Subject: [PATCH 4/4] Update java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java --- java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java b/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java index cfac07ee397..a95c5f58f09 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java @@ -523,7 +523,7 @@ protected String[] getFlatColumnNames(String[] ret) { * named 'value'. The caller of this method doesn't need to worry about this as this method will * take care of this without the knowledge of the caller. * - * Note: this method always returns a nullabe column, can not return non-nullable column. + * Note: This method always returns a nullabe column, cannot return non-nullable column. * Do not use this, use the next function with the parameter `isNullable`. */ @Deprecated