From 154c29d4f80ddf99040948e520db97220f984c1d Mon Sep 17 00:00:00 2001 From: zhangyiqun Date: Tue, 22 Aug 2023 23:58:50 +0800 Subject: [PATCH 1/4] ORC-1489: Assign a writer id to CUDF --- c++/include/orc/Common.hh | 1 + c++/src/Common.cc | 2 ++ c++/src/Reader.cc | 2 +- java/core/src/java/org/apache/orc/OrcFile.java | 4 ++++ java/core/src/java/org/apache/orc/OrcUtils.java | 3 +++ java/core/src/test/org/apache/orc/TestVectorOrcFile.java | 6 ++++++ proto/orc_proto.proto | 1 + 7 files changed, 18 insertions(+), 1 deletion(-) diff --git a/c++/include/orc/Common.hh b/c++/include/orc/Common.hh index beae9dd6f3..9da67a3f19 100644 --- a/c++/include/orc/Common.hh +++ b/c++/include/orc/Common.hh @@ -70,6 +70,7 @@ namespace orc { PRESTO_WRITER = 2, SCRITCHLEY_GO = 3, TRINO_WRITER = 4, + CUDF_WRITER = 5, UNKNOWN_WRITER = INT32_MAX }; diff --git a/c++/src/Common.cc b/c++/src/Common.cc index e220e274de..cf2ff27ef1 100644 --- a/c++/src/Common.cc +++ b/c++/src/Common.cc @@ -82,6 +82,8 @@ namespace orc { return "Scritchley Go"; case TRINO_WRITER: return "Trino"; + case CUDF_WRITER: + return "CUDF"; default: { std::ostringstream buffer; buffer << "Unknown(" << id << ")"; diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc index b52675abb3..386793f0b2 100644 --- a/c++/src/Reader.cc +++ b/c++/src/Reader.cc @@ -622,7 +622,7 @@ namespace orc { WriterId ReaderImpl::getWriterId() const { if (footer->has_writer()) { uint32_t id = footer->writer(); - if (id > WriterId::TRINO_WRITER) { + if (id > WriterId::CUDF_WRITER) { return WriterId::UNKNOWN_WRITER; } else { return static_cast(id); diff --git a/java/core/src/java/org/apache/orc/OrcFile.java b/java/core/src/java/org/apache/orc/OrcFile.java index fc164a9770..e41e79945e 100644 --- a/java/core/src/java/org/apache/orc/OrcFile.java +++ b/java/core/src/java/org/apache/orc/OrcFile.java @@ -132,6 +132,7 @@ public enum WriterImplementation { PRESTO(2), // Presto writer SCRITCHLEY_GO(3), // Go writer from https://github.com/scritchley/orc TRINO(4), // Trino writer + CUDF(5), // CUDF writer UNKNOWN(Integer.MAX_VALUE); private final int id; @@ -189,6 +190,9 @@ public enum WriterVersion { // Trino Writer TRINO_ORIGINAL(WriterImplementation.TRINO, 6), + // CUDF Writer + CUDF_ORIGINAL(WriterImplementation.CUDF, 6), + // Don't use any magic numbers here except for the below: FUTURE(WriterImplementation.UNKNOWN, Integer.MAX_VALUE); // a version from a future writer diff --git a/java/core/src/java/org/apache/orc/OrcUtils.java b/java/core/src/java/org/apache/orc/OrcUtils.java index c121537d3c..7dde0bc0fd 100644 --- a/java/core/src/java/org/apache/orc/OrcUtils.java +++ b/java/core/src/java/org/apache/orc/OrcUtils.java @@ -414,6 +414,9 @@ public static String getSoftwareVersion(int writer, case 4: base = "Trino"; break; + case 5: + base = "CUDF"; + break; default: base = String.format("Unknown(%d)", writer); break; diff --git a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java index 9a1431c682..8eae7a7cde 100644 --- a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java +++ b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java @@ -3597,6 +3597,8 @@ public void testWriterVersion(Version fileFormat) throws Exception { OrcFile.WriterImplementation.from(2)); assertEquals(OrcFile.WriterImplementation.TRINO, OrcFile.WriterImplementation.from(4)); + assertEquals(OrcFile.WriterImplementation.CUDF, + OrcFile.WriterImplementation.from(5)); assertEquals(OrcFile.WriterImplementation.UNKNOWN, OrcFile.WriterImplementation.from(99)); @@ -3615,6 +3617,8 @@ public void testWriterVersion(Version fileFormat) throws Exception { OrcFile.WriterVersion.from(OrcFile.WriterImplementation.PRESTO, 6)); assertEquals(OrcFile.WriterVersion.TRINO_ORIGINAL, OrcFile.WriterVersion.from(OrcFile.WriterImplementation.TRINO, 6)); + assertEquals(OrcFile.WriterVersion.CUDF_ORIGINAL, + OrcFile.WriterVersion.from(OrcFile.WriterImplementation.CUDF, 6)); assertEquals(OrcFile.WriterVersion.FUTURE, OrcFile.WriterVersion.from(OrcFile.WriterImplementation.UNKNOWN, 0)); @@ -3633,6 +3637,8 @@ public void testWriterVersion(Version fileFormat) throws Exception { OrcFile.WriterVersion.PRESTO_ORIGINAL)); assertTrue(OrcFile.WriterVersion.HIVE_12055.includes( OrcFile.WriterVersion.TRINO_ORIGINAL)); + assertTrue(OrcFile.WriterVersion.HIVE_12055.includes( + OrcFile.WriterVersion.CUDF_ORIGINAL)); } @ParameterizedTest diff --git a/proto/orc_proto.proto b/proto/orc_proto.proto index ff05657a54..b657c6ac80 100644 --- a/proto/orc_proto.proto +++ b/proto/orc_proto.proto @@ -367,6 +367,7 @@ message Footer { // 2 = Presto // 3 = Scritchley Go from https://github.com/scritchley/orc // 4 = Trino + // 5 = CUDF optional uint32 writer = 9; // information about the encryption in this file From 4146ae3624a4a29176c08192666e586bc3a560b6 Mon Sep 17 00:00:00 2001 From: Yiqun Zhang Date: Thu, 24 Aug 2023 11:08:22 +0800 Subject: [PATCH 2/4] ORC-1489: Assign a writer id to CUDF --- site/specification/ORCv1.md | 1 + 1 file changed, 1 insertion(+) diff --git a/site/specification/ORCv1.md b/site/specification/ORCv1.md index 28347642ec..c9c9311aab 100644 --- a/site/specification/ORCv1.md +++ b/site/specification/ORCv1.md @@ -136,6 +136,7 @@ message Footer { // 2 = Presto // 3 = Scritchley Go from https://github.com/scritchley/orc // 4 = Trino + // 5 = CUDF optional uint32 writer = 9; // information about the encryption in this file optional Encryption encryption = 10; From a05ced48aa195491bdba491941c1e7460c1dc846 Mon Sep 17 00:00:00 2001 From: Yiqun Zhang Date: Sun, 27 Aug 2023 15:18:41 +0800 Subject: [PATCH 3/4] ORC-1489: Assign a writer id to CUDF --- proto/orc_proto.proto | 3 +++ 1 file changed, 3 insertions(+) diff --git a/proto/orc_proto.proto b/proto/orc_proto.proto index b657c6ac80..45d7d2a054 100644 --- a/proto/orc_proto.proto +++ b/proto/orc_proto.proto @@ -433,6 +433,9 @@ message PostScript { // Version of the Trino writer: // 6 = original // + // Version of the CUDF writer: + // 6 = original + // optional uint32 writerVersion = 6; // the number of bytes in the encrypted stripe statistics From 6ebac836b0e6a0c5ec2e5a03e096de18500abd43 Mon Sep 17 00:00:00 2001 From: zhangyiqun Date: Wed, 30 Aug 2023 09:45:17 +0800 Subject: [PATCH 4/4] ORC-1489: Assign a writer id to CUDF --- site/specification/ORCv2.md | 1 + 1 file changed, 1 insertion(+) diff --git a/site/specification/ORCv2.md b/site/specification/ORCv2.md index 010de73c97..62d640786d 100644 --- a/site/specification/ORCv2.md +++ b/site/specification/ORCv2.md @@ -156,6 +156,7 @@ message Footer { // 2 = Presto // 3 = Scritchley Go from https://github.com/scritchley/orc // 4 = Trino + // 5 = CUDF optional uint32 writer = 9; // information about the encryption in this file optional Encryption encryption = 10;