From 43c20d6970d82c5889e9761faa74af89cb5fc7d1 Mon Sep 17 00:00:00 2001 From: Jack Klamer Date: Wed, 10 Jan 2024 11:00:46 -0600 Subject: [PATCH] HIVE/AVRO: Handle all union options coersion to single type --- .../hive/formats/avro/AvroPageDataReader.java | 7 ++-- ...tAvroPageDataReaderWithoutTypeManager.java | 33 +++++++++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/avro/AvroPageDataReader.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/avro/AvroPageDataReader.java index 365f421ed7cc..527ea0ae1ffa 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/avro/AvroPageDataReader.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/avro/AvroPageDataReader.java @@ -193,11 +193,12 @@ private static BlockBuildingDecoder createBlockBuildingDecoderForAction(Resolver case RECORD -> new RowBlockBuildingDecoder(action, typeManager); case ENUM -> new EnumBlockBuildingDecoder((Resolver.EnumAdjust) action); case WRITER_UNION -> { - if (isSimpleNullableUnion(action.reader)) { - yield new WriterUnionBlockBuildingDecoder((Resolver.WriterUnion) action, typeManager); + if (action.reader.getType() == Schema.Type.UNION && !isSimpleNullableUnion(action.reader)) { + yield new WriterUnionCoercedIntoRowBlockBuildingDecoder((Resolver.WriterUnion) action, typeManager); } else { - yield new WriterUnionCoercedIntoRowBlockBuildingDecoder((Resolver.WriterUnion) action, typeManager); + // reading a union with non-union or nullable union, optimistically try to create the reader, will fail at read time with any underlying issues + yield new WriterUnionBlockBuildingDecoder((Resolver.WriterUnion) action, typeManager); } } case READER_UNION -> { diff --git a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/avro/TestAvroPageDataReaderWithoutTypeManager.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/avro/TestAvroPageDataReaderWithoutTypeManager.java index 2234c0a3bed4..123c9d29e0c9 100644 --- a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/avro/TestAvroPageDataReaderWithoutTypeManager.java +++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/avro/TestAvroPageDataReaderWithoutTypeManager.java @@ -370,4 +370,37 @@ public void testRead3UnionWith2UnionDataWith2Union() } } } + + @Test + public void testReadUnionWithNonUnionAllCoercions() + throws IOException, AvroTypeException + { + Schema nonUnion = Schema.create(Schema.Type.STRING); + Schema union = Schema.createUnion(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.BYTES)); + + Schema nonUnionRecord = SchemaBuilder.builder() + .record("aRecord") + .fields() + .name("aField") + .type(nonUnion) + .noDefault() + .endRecord(); + + Schema unionRecord = SchemaBuilder.builder() + .record("aRecord") + .fields() + .name("aField") + .type(union) + .noDefault() + .endRecord(); + + TrinoInputFile inputFile = createWrittenFileWithSchema(1000, unionRecord); + + //read the file with the non-union schema and ensure that no error thrown + try (AvroFileReader avroFileReader = new AvroFileReader(inputFile, nonUnionRecord, NoOpAvroTypeManager.INSTANCE)) { + while (avroFileReader.hasNext()) { + assertThat(avroFileReader.next()).isNotNull(); + } + } + } }