Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ORC-1489: Assign a writer id to CUDF #1594

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions c++/include/orc/Common.hh
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ namespace orc {
PRESTO_WRITER = 2,
SCRITCHLEY_GO = 3,
TRINO_WRITER = 4,
CUDF_WRITER = 5,
UNKNOWN_WRITER = INT32_MAX
};

Expand Down
2 changes: 2 additions & 0 deletions c++/src/Common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ namespace orc {
return "Scritchley Go";
case TRINO_WRITER:
return "Trino";
case CUDF_WRITER:
return "CUDF";
default: {
std::ostringstream buffer;
buffer << "Unknown(" << id << ")";
Expand Down
2 changes: 1 addition & 1 deletion c++/src/Reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,7 @@ namespace orc {
WriterId ReaderImpl::getWriterId() const {
if (footer->has_writer()) {
uint32_t id = footer->writer();
if (id > WriterId::TRINO_WRITER) {
if (id > WriterId::CUDF_WRITER) {
return WriterId::UNKNOWN_WRITER;
} else {
return static_cast<WriterId>(id);
Expand Down
4 changes: 4 additions & 0 deletions java/core/src/java/org/apache/orc/OrcFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ public enum WriterImplementation {
PRESTO(2), // Presto writer
SCRITCHLEY_GO(3), // Go writer from https://github.com/scritchley/orc
TRINO(4), // Trino writer
CUDF(5), // CUDF writer
UNKNOWN(Integer.MAX_VALUE);

private final int id;
Expand Down Expand Up @@ -189,6 +190,9 @@ public enum WriterVersion {
// Trino Writer
TRINO_ORIGINAL(WriterImplementation.TRINO, 6),

// CUDF Writer
CUDF_ORIGINAL(WriterImplementation.CUDF, 6),

// Don't use any magic numbers here except for the below:
FUTURE(WriterImplementation.UNKNOWN, Integer.MAX_VALUE); // a version from a future writer

Expand Down
3 changes: 3 additions & 0 deletions java/core/src/java/org/apache/orc/OrcUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,9 @@ public static String getSoftwareVersion(int writer,
case 4:
base = "Trino";
break;
case 5:
base = "CUDF";
break;
default:
base = String.format("Unknown(%d)", writer);
break;
Expand Down
6 changes: 6 additions & 0 deletions java/core/src/test/org/apache/orc/TestVectorOrcFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -3597,6 +3597,8 @@ public void testWriterVersion(Version fileFormat) throws Exception {
OrcFile.WriterImplementation.from(2));
assertEquals(OrcFile.WriterImplementation.TRINO,
OrcFile.WriterImplementation.from(4));
assertEquals(OrcFile.WriterImplementation.CUDF,
OrcFile.WriterImplementation.from(5));
assertEquals(OrcFile.WriterImplementation.UNKNOWN,
OrcFile.WriterImplementation.from(99));

Expand All @@ -3615,6 +3617,8 @@ public void testWriterVersion(Version fileFormat) throws Exception {
OrcFile.WriterVersion.from(OrcFile.WriterImplementation.PRESTO, 6));
assertEquals(OrcFile.WriterVersion.TRINO_ORIGINAL,
OrcFile.WriterVersion.from(OrcFile.WriterImplementation.TRINO, 6));
assertEquals(OrcFile.WriterVersion.CUDF_ORIGINAL,
OrcFile.WriterVersion.from(OrcFile.WriterImplementation.CUDF, 6));
assertEquals(OrcFile.WriterVersion.FUTURE,
OrcFile.WriterVersion.from(OrcFile.WriterImplementation.UNKNOWN, 0));

Expand All @@ -3633,6 +3637,8 @@ public void testWriterVersion(Version fileFormat) throws Exception {
OrcFile.WriterVersion.PRESTO_ORIGINAL));
assertTrue(OrcFile.WriterVersion.HIVE_12055.includes(
OrcFile.WriterVersion.TRINO_ORIGINAL));
assertTrue(OrcFile.WriterVersion.HIVE_12055.includes(
OrcFile.WriterVersion.CUDF_ORIGINAL));
}

@ParameterizedTest
Expand Down
4 changes: 4 additions & 0 deletions proto/orc_proto.proto
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,7 @@ message Footer {
// 2 = Presto
// 3 = Scritchley Go from https://github.com/scritchley/orc
// 4 = Trino
// 5 = CUDF
optional uint32 writer = 9;

// information about the encryption in this file
Expand Down Expand Up @@ -432,6 +433,9 @@ message PostScript {
// Version of the Trino writer:
// 6 = original
//
// Version of the CUDF writer:
// 6 = original
//
optional uint32 writerVersion = 6;

// the number of bytes in the encrypted stripe statistics
Expand Down
1 change: 1 addition & 0 deletions site/specification/ORCv1.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ message Footer {
// 2 = Presto
// 3 = Scritchley Go from https://github.com/scritchley/orc
// 4 = Trino
// 5 = CUDF
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we update ORCv2.md as well?

Copy link
Member Author

@guiyanakuang guiyanakuang Aug 30, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch! I've already updated ORCv2.md.

optional uint32 writer = 9;
// information about the encryption in this file
optional Encryption encryption = 10;
Expand Down