From 69975289c2668cd939d70c866eec8377c132cad9 Mon Sep 17 00:00:00 2001 From: Liangcai Li Date: Sat, 8 Oct 2022 16:43:55 +0800 Subject: [PATCH] ArrowIPCTableWriter writes en empty batch in the case of an empty table. Signed-off-by: Liangcai Li --- java/src/main/native/src/TableJni.cpp | 10 ++++++- .../test/java/ai/rapids/cudf/TableTest.java | 29 +++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index ad280cad5fd..c23c5a3ccb2 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -258,7 +258,15 @@ class native_arrow_ipc_writer_handle final { writer = *tmp_writer; initialized = true; } - writer->WriteTable(*arrow_tab, max_chunk); + if (arrow_tab->num_rows() == 0) { + // Arrow C++ IPC writer will not write an empty batch in the case of an + // empty table, so need to write an empty batch explicitly. + // For more please see https://issues.apache.org/jira/browse/ARROW-17912. + auto empty_batch = arrow::RecordBatch::MakeEmpty(arrow_tab->schema()); + writer->WriteRecordBatch(*(*empty_batch)); + } else { + writer->WriteTable(*arrow_tab, max_chunk); + } } void close() { diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java index 194c1094caf..4649a0e3507 100644 --- a/java/src/test/java/ai/rapids/cudf/TableTest.java +++ b/java/src/test/java/ai/rapids/cudf/TableTest.java @@ -7937,6 +7937,35 @@ void testArrowIPCWriteToBufferChunked() { } } + @Test + void testArrowIPCWriteEmptyToBufferChunked() { + try (Table emptyTable = new Table.TestBuilder().timestampDayColumn().build(); + MyBufferConsumer consumer = new MyBufferConsumer()) { + ArrowIPCWriterOptions options = ArrowIPCWriterOptions.builder() + .withColumnNames("day") + .build(); + try (TableWriter writer = Table.writeArrowIPCChunked(options, consumer)) { + writer.write(emptyTable); + } + try (StreamedTableReader reader = Table.readArrowIPCChunked(new MyBufferProvider(consumer))) { + boolean done = false; + int count = 0; + while (!done) { + try (Table t = reader.getNextIfAvailable()) { + if (t == null) { + done = true; + } else { + assertTablesAreEqual(emptyTable, t); + count++; + } + } + } + // Expect one empty batch for the empty table. + assertEquals(1, count); + } + } + } + @Test void testORCWriteToBufferChunked() { String[] selectedColumns = WriteUtils.getAllColumns(false);