From 4f21fb14ada7dfd5b93bf2fb29a3f7413d90d4d1 Mon Sep 17 00:00:00 2001 From: Firestarman Date: Tue, 28 Sep 2021 13:21:12 +0800 Subject: [PATCH 1/6] JNI: Nested types support for ORC writer Including list and struct. Signed-off-by: Firestarman --- ...rOptions.java => ColumnWriterOptions.java} | 121 +++++++++--------- .../cudf/CondensedMetadataWriterOptions.java | 113 ++++++++++++++++ .../java/ai/rapids/cudf/ORCWriterOptions.java | 10 +- .../ai/rapids/cudf/ParquetWriterOptions.java | 89 +------------ java/src/main/java/ai/rapids/cudf/Table.java | 38 +++++- java/src/main/native/src/TableJni.cpp | 76 +++++------ .../test/java/ai/rapids/cudf/TableTest.java | 52 ++++++-- 7 files changed, 294 insertions(+), 205 deletions(-) rename java/src/main/java/ai/rapids/cudf/{ParquetColumnWriterOptions.java => ColumnWriterOptions.java} (75%) create mode 100644 java/src/main/java/ai/rapids/cudf/CondensedMetadataWriterOptions.java diff --git a/java/src/main/java/ai/rapids/cudf/ParquetColumnWriterOptions.java b/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java similarity index 75% rename from java/src/main/java/ai/rapids/cudf/ParquetColumnWriterOptions.java rename to java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java index 229cb0262d3..0e49636fae6 100644 --- a/java/src/main/java/ai/rapids/cudf/ParquetColumnWriterOptions.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java @@ -22,38 +22,41 @@ import java.util.List; /** - * Per column settings for writing Parquet files. + * Per column settings for writing Parquet/ORC files. + * + * The native also uses the same "column_in_metadata" for both Parquet and ORC. */ -public class ParquetColumnWriterOptions { +public class ColumnWriterOptions { + // `isTimestampTypeInt96` is ignored in ORC private boolean isTimestampTypeInt96; private int precision; private boolean isNullable; private boolean isMap = false; - private String columName; - private ParquetColumnWriterOptions(AbstractStructBuilder builder) { - this.columName = builder.name; + private String columnName; + private ColumnWriterOptions(AbstractStructBuilder builder) { + this.columnName = builder.name; this.isNullable = builder.isNullable; this.childColumnOptions = - (ParquetColumnWriterOptions[]) builder.children.toArray(new ParquetColumnWriterOptions[0]); + (ColumnWriterOptions[]) builder.children.toArray(new ColumnWriterOptions[0]); } /** * Constructor used for list */ - private ParquetColumnWriterOptions(ListBuilder builder) { + private ColumnWriterOptions(ListBuilder builder) { assert(builder.children.size() == 1) : "Lists can only have one child"; - this.columName = builder.name; + this.columnName = builder.name; this.isNullable = builder.isNullable; // we are adding the child twice even though lists have one child only because the way the cudf // has implemented this it requires two children to be set for the list, but it drops the // first one. This is something that is a lower priority and might be fixed in future this.childColumnOptions = - new ParquetColumnWriterOptions[]{DUMMY_CHILD, builder.children.get(0)}; + new ColumnWriterOptions[]{DUMMY_CHILD, builder.children.get(0)}; } - protected ParquetColumnWriterOptions[] childColumnOptions = {}; + protected ColumnWriterOptions[] childColumnOptions = {}; protected abstract static class AbstractStructBuilder extends NestedBuilder { + V extends ColumnWriterOptions> extends NestedBuilder { /** * Builder specific to build a Struct meta */ @@ -72,10 +75,10 @@ protected AbstractStructBuilder() { // https://github.com/rapidsai/cudf/pull/7461/commits/5ce33b40abb87cc7b76b5efeb0a3a0215f9ef6fb // but it was reverted later on here: // https://github.com/rapidsai/cudf/pull/7461/commits/f248eb7265de995a95f998d46d897fb0ae47f53e - static ParquetColumnWriterOptions DUMMY_CHILD = new ParquetColumnWriterOptions("DUMMY"); + static ColumnWriterOptions DUMMY_CHILD = new ColumnWriterOptions("DUMMY"); - public static abstract class NestedBuilder { - protected List children = new ArrayList<>(); + public static abstract class NestedBuilder { + protected List children = new ArrayList<>(); protected boolean isNullable = true; protected String name = ""; @@ -89,34 +92,34 @@ protected NestedBuilder(String name, boolean isNullable) { protected NestedBuilder() {} - protected ParquetColumnWriterOptions withColumns(String name, boolean isNullable) { - return new ParquetColumnWriterOptions(name, isNullable); + protected ColumnWriterOptions withColumns(String name, boolean isNullable) { + return new ColumnWriterOptions(name, isNullable); } - protected ParquetColumnWriterOptions withDecimal(String name, int precision, - boolean isNullable) { - return new ParquetColumnWriterOptions(name, false, precision, isNullable); + protected ColumnWriterOptions withDecimal(String name, int precision, + boolean isNullable) { + return new ColumnWriterOptions(name, false, precision, isNullable); } - protected ParquetColumnWriterOptions withTimestamp(String name, boolean isInt96, - boolean isNullable) { - return new ParquetColumnWriterOptions(name, isInt96, 0, isNullable); + protected ColumnWriterOptions withTimestamp(String name, boolean isInt96, + boolean isNullable) { + return new ColumnWriterOptions(name, isInt96, 0, isNullable); } /** * Set the list column meta. * Lists should have only one child in ColumnVector, but the metadata expects a * LIST column to have two children and the first child to be the - * {@link ParquetColumnWriterOptions#DUMMY_CHILD}. + * {@link ColumnWriterOptions#DUMMY_CHILD}. * This is the current behavior in cudf and will change in future * @return this for chaining. */ - public T withListColumn(ParquetListColumnWriterOptions child) { + public T withListColumn(ListColumnWriterOptions child) { assert (child.getChildColumnOptions().length == 2) : "Lists can only have two children"; if (child.getChildColumnOptions()[0] != DUMMY_CHILD) { throw new IllegalArgumentException("First child in the list has to be DUMMY_CHILD"); } - if (child.getChildColumnOptions()[1].getColumName().isEmpty()) { + if (child.getChildColumnOptions()[1].getColumnName().isEmpty()) { throw new IllegalArgumentException("Column name can't be empty"); } children.add(child); @@ -127,7 +130,7 @@ public T withListColumn(ParquetListColumnWriterOptions child) { * Set the map column meta. * @return this for chaining. */ - public T withMapColumn(ParquetColumnWriterOptions child) { + public T withMapColumn(ColumnWriterOptions child) { children.add(child); return (T) this; } @@ -136,9 +139,9 @@ public T withMapColumn(ParquetColumnWriterOptions child) { * Set a child struct meta data * @return this for chaining. */ - public T withStructColumn(ParquetStructColumnWriterOptions child) { - for (ParquetColumnWriterOptions opt: child.getChildColumnOptions()) { - if (opt.getColumName().isEmpty()) { + public T withStructColumn(StructColumnWriterOptions child) { + for (ColumnWriterOptions opt: child.getChildColumnOptions()) { + if (opt.getColumnName().isEmpty()) { throw new IllegalArgumentException("Column name can't be empty"); } } @@ -230,33 +233,33 @@ public T withNullableTimestampColumn(String name, boolean isInt96) { public abstract V build(); } - public ParquetColumnWriterOptions(String columnName, boolean isTimestampTypeInt96, - int precision, boolean isNullable) { + public ColumnWriterOptions(String columnName, boolean isTimestampTypeInt96, + int precision, boolean isNullable) { this.isTimestampTypeInt96 = isTimestampTypeInt96; this.precision = precision; this.isNullable = isNullable; - this.columName = columnName; + this.columnName = columnName; } - public ParquetColumnWriterOptions(String columnName, boolean isNullable) { + public ColumnWriterOptions(String columnName, boolean isNullable) { this.isTimestampTypeInt96 = false; this.precision = 0; this.isNullable = isNullable; - this.columName = columnName; + this.columnName = columnName; } - public ParquetColumnWriterOptions(String columnName) { + public ColumnWriterOptions(String columnName) { this(columnName, true); } @FunctionalInterface protected interface ByteArrayProducer { - boolean[] apply(ParquetColumnWriterOptions opt); + boolean[] apply(ColumnWriterOptions opt); } @FunctionalInterface protected interface IntArrayProducer { - int[] apply(ParquetColumnWriterOptions opt); + int[] apply(ColumnWriterOptions opt); } boolean[] getFlatIsTimeTypeInt96() { @@ -272,7 +275,7 @@ protected boolean[] getFlatBooleans(boolean[] ret, ByteArrayProducer producer) { boolean[][] childResults = new boolean[childColumnOptions.length][]; int totalChildrenFlatLength = ret.length; for (int i = 0 ; i < childColumnOptions.length ; i++) { - ParquetColumnWriterOptions opt = childColumnOptions[i]; + ColumnWriterOptions opt = childColumnOptions[i]; childResults[i] = producer.apply(opt); totalChildrenFlatLength += childResults[i].length; } @@ -327,7 +330,7 @@ protected int[] getFlatInts(int[] ret, IntArrayProducer producer) { int[][] childResults = new int[childColumnOptions.length][]; int totalChildrenFlatLength = ret.length; for (int i = 0 ; i < childColumnOptions.length ; i++) { - ParquetColumnWriterOptions opt = childColumnOptions[i]; + ColumnWriterOptions opt = childColumnOptions[i]; childResults[i] = producer.apply(opt); totalChildrenFlatLength += childResults[i].length; } @@ -343,7 +346,7 @@ protected int[] getFlatInts(int[] ret, IntArrayProducer producer) { } String[] getFlatColumnNames() { - String[] ret = {columName}; + String[] ret = {columnName}; if (childColumnOptions.length > 0) { return getFlatColumnNames(ret); } else { @@ -355,7 +358,7 @@ protected String[] getFlatColumnNames(String[] ret) { String[][] childResults = new String[childColumnOptions.length][]; int totalChildrenFlatLength = ret.length; for (int i = 0 ; i < childColumnOptions.length ; i++) { - ParquetColumnWriterOptions opt = childColumnOptions[i]; + ColumnWriterOptions opt = childColumnOptions[i]; childResults[i] = opt.getFlatColumnNames(); totalChildrenFlatLength += childResults[i].length; } @@ -377,14 +380,14 @@ protected String[] getFlatColumnNames(String[] ret) { * named 'value'. The caller of this method doesn't need to worry about this as this method will * take care of this without the knowledge of the caller. */ - public static ParquetColumnWriterOptions mapColumn(String name, ParquetColumnWriterOptions key, - ParquetColumnWriterOptions value) { - ParquetStructColumnWriterOptions struct = structBuilder("key_value").build(); + public static ColumnWriterOptions mapColumn(String name, ColumnWriterOptions key, + ColumnWriterOptions value) { + StructColumnWriterOptions struct = structBuilder("key_value").build(); if (key.isNullable) { throw new IllegalArgumentException("key column can not be nullable"); } - struct.childColumnOptions = new ParquetColumnWriterOptions[]{key, value}; - ParquetColumnWriterOptions opt = listBuilder(name) + struct.childColumnOptions = new ColumnWriterOptions[]{key, value}; + ColumnWriterOptions opt = listBuilder(name) .withStructColumn(struct) .build(); opt.isMap = true; @@ -422,8 +425,8 @@ public static StructBuilder structBuilder(String name) { /** * Return if the column can have null values */ - public String getColumName() { - return columName; + public String getColumnName() { + return columnName; } /** @@ -450,39 +453,39 @@ public boolean isTimestampTypeInt96() { /** * Return the child columnOptions for this column */ - public ParquetColumnWriterOptions[] getChildColumnOptions() { + public ColumnWriterOptions[] getChildColumnOptions() { return childColumnOptions; } - public static class ParquetStructColumnWriterOptions extends ParquetColumnWriterOptions { - protected ParquetStructColumnWriterOptions(AbstractStructBuilder builder) { + public static class StructColumnWriterOptions extends ColumnWriterOptions { + protected StructColumnWriterOptions(AbstractStructBuilder builder) { super(builder); } } - public static class ParquetListColumnWriterOptions extends ParquetColumnWriterOptions { - protected ParquetListColumnWriterOptions(ListBuilder builder) { + public static class ListColumnWriterOptions extends ColumnWriterOptions { + protected ListColumnWriterOptions(ListBuilder builder) { super(builder); } } - public static class StructBuilder extends AbstractStructBuilder { + public static class StructBuilder extends AbstractStructBuilder { public StructBuilder(String name, boolean isNullable) { super(name, isNullable); } - public ParquetStructColumnWriterOptions build() { - return new ParquetStructColumnWriterOptions(this); + public StructColumnWriterOptions build() { + return new StructColumnWriterOptions(this); } } - public static class ListBuilder extends NestedBuilder { + public static class ListBuilder extends NestedBuilder { public ListBuilder(String name, boolean isNullable) { super(name, isNullable); } - public ParquetListColumnWriterOptions build() { - return new ParquetListColumnWriterOptions(this); + public ListColumnWriterOptions build() { + return new ListColumnWriterOptions(this); } } } diff --git a/java/src/main/java/ai/rapids/cudf/CondensedMetadataWriterOptions.java b/java/src/main/java/ai/rapids/cudf/CondensedMetadataWriterOptions.java new file mode 100644 index 00000000000..8a730848b85 --- /dev/null +++ b/java/src/main/java/ai/rapids/cudf/CondensedMetadataWriterOptions.java @@ -0,0 +1,113 @@ +/* + * + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package ai.rapids.cudf; + +import java.util.LinkedHashMap; +import java.util.Map; + +public class CondensedMetadataWriterOptions extends ColumnWriterOptions.StructColumnWriterOptions { + private final CompressionType compressionType; + private final Map metadata; + + protected CondensedMetadataWriterOptions(Builder builder) { + super(builder); + this.compressionType = builder.compressionType; + this.metadata = builder.metadata; + } + + @Override + boolean[] getFlatIsTimeTypeInt96() { + return super.getFlatBooleans(new boolean[]{}, (opt) -> opt.getFlatIsTimeTypeInt96()); + } + + @Override + int[] getFlatPrecision() { + return super.getFlatInts(new int[]{}, (opt) -> opt.getFlatPrecision()); + } + + @Override + int[] getFlatNumChildren() { + return super.getFlatInts(new int[]{}, (opt) -> opt.getFlatNumChildren()); + } + + @Override + boolean[] getFlatIsNullable() { + return super.getFlatBooleans(new boolean[]{}, (opt) -> opt.getFlatIsNullable()); + } + + @Override + boolean[] getFlatIsMap() { + return super.getFlatBooleans(new boolean[]{}, (opt) -> opt.getFlatIsMap()); + } + + @Override + String[] getFlatColumnNames() { + return super.getFlatColumnNames(new String[]{}); + } + + String[] getMetadataKeys() { + return metadata.keySet().toArray(new String[metadata.size()]); + } + + String[] getMetadataValues() { + return metadata.values().toArray(new String[metadata.size()]); + } + + public CompressionType getCompressionType() { + return compressionType; + } + + public Map getMetadata() { + return metadata; + } + + public int getTopLevelChildren() { + return childColumnOptions.length; + } + + public abstract static class Builder extends AbstractStructBuilder { + final Map metadata = new LinkedHashMap<>(); + CompressionType compressionType = CompressionType.AUTO; + + /** + * Add a metadata key and a value + */ + public T withMetadata(String key, String value) { + this.metadata.put(key, value); + return (T) this; + } + + /** + * Add a map of metadata keys and values + */ + public T withMetadata(Map metadata) { + this.metadata.putAll(metadata); + return (T) this; + } + + /** + * Set the compression type to use for writing + */ + public T withCompressionType(CompressionType compression) { + this.compressionType = compression; + return (T) this; + } + } +} diff --git a/java/src/main/java/ai/rapids/cudf/ORCWriterOptions.java b/java/src/main/java/ai/rapids/cudf/ORCWriterOptions.java index 85443c3ae0f..23913925217 100644 --- a/java/src/main/java/ai/rapids/cudf/ORCWriterOptions.java +++ b/java/src/main/java/ai/rapids/cudf/ORCWriterOptions.java @@ -18,7 +18,11 @@ package ai.rapids.cudf; -public class ORCWriterOptions extends CompressedMetadataWriterOptions { +/** + * This class represents settings for writing ORC files. It includes meta data information + * that will be used by the ORC writer to write the file. + */ +public class ORCWriterOptions extends CondensedMetadataWriterOptions { private ORCWriterOptions(Builder builder) { super(builder); @@ -28,7 +32,9 @@ public static Builder builder() { return new Builder(); } - public static class Builder extends CMWriterBuilder { + public static class Builder extends CondensedMetadataWriterOptions.Builder + { + public ORCWriterOptions build() { return new ORCWriterOptions(this); } diff --git a/java/src/main/java/ai/rapids/cudf/ParquetWriterOptions.java b/java/src/main/java/ai/rapids/cudf/ParquetWriterOptions.java index 38f8d8e59a4..9226f834cbc 100644 --- a/java/src/main/java/ai/rapids/cudf/ParquetWriterOptions.java +++ b/java/src/main/java/ai/rapids/cudf/ParquetWriterOptions.java @@ -18,61 +18,16 @@ package ai.rapids.cudf; -import java.util.LinkedHashMap; -import java.util.Map; - /** * This class represents settings for writing Parquet files. It includes meta data information * that will be used by the Parquet writer to write the file */ -public final class ParquetWriterOptions extends ParquetColumnWriterOptions.ParquetStructColumnWriterOptions { - private final CompressionType compressionType; - private final Map metadata; +public final class ParquetWriterOptions extends CondensedMetadataWriterOptions { private final StatisticsFrequency statsGranularity; private ParquetWriterOptions(Builder builder) { super(builder); this.statsGranularity = builder.statsGranularity; - this.compressionType = builder.compressionType; - this.metadata = builder.metadata; - } - - @Override - boolean[] getFlatIsTimeTypeInt96() { - return super.getFlatBooleans(new boolean[]{}, (opt) -> opt.getFlatIsTimeTypeInt96()); - } - - @Override - int[] getFlatPrecision() { - return super.getFlatInts(new int[]{}, (opt) -> opt.getFlatPrecision()); - } - - @Override - int[] getFlatNumChildren() { - return super.getFlatInts(new int[]{}, (opt) -> opt.getFlatNumChildren()); - } - - @Override - boolean[] getFlatIsNullable() { - return super.getFlatBooleans(new boolean[]{}, (opt) -> opt.getFlatIsNullable()); - } - - @Override - boolean[] getFlatIsMap() { - return super.getFlatBooleans(new boolean[]{}, (opt) -> opt.getFlatIsMap()); - } - - @Override - String[] getFlatColumnNames() { - return super.getFlatColumnNames(new String[]{}); - } - - String[] getMetadataKeys() { - return metadata.keySet().toArray(new String[metadata.size()]); - } - - String[] getMetadataValues() { - return metadata.values().toArray(new String[metadata.size()]); } public enum StatisticsFrequency { @@ -100,52 +55,14 @@ public StatisticsFrequency getStatisticsFrequency() { return statsGranularity; } - public CompressionType getCompressionType() { - return compressionType; - } - - public Map getMetadata() { - return metadata; - } - - public int getTopLevelChildren() { - return childColumnOptions.length; - } - - public static class Builder extends ParquetColumnWriterOptions.AbstractStructBuilder { + public static class Builder extends CondensedMetadataWriterOptions.Builder + { private StatisticsFrequency statsGranularity = StatisticsFrequency.ROWGROUP; - final Map metadata = new LinkedHashMap<>(); - CompressionType compressionType = CompressionType.AUTO; public Builder() { super(); } - /** - * Add a metadata key and a value - */ - public Builder withMetadata(String key, String value) { - this.metadata.put(key, value); - return this; - } - - /** - * Add a map of metadata keys and values - */ - public Builder withMetadata(Map metadata) { - this.metadata.putAll(metadata); - return this; - } - - /** - * Set the compression type to use for writing - */ - public Builder withCompressionType(CompressionType compression) { - this.compressionType = compression; - return this; - } - public Builder withStatisticsFrequency(StatisticsFrequency statsGranularity) { this.statsGranularity = statsGranularity; return this; diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java index 0af02d1c926..6abb40be361 100644 --- a/java/src/main/java/ai/rapids/cudf/Table.java +++ b/java/src/main/java/ai/rapids/cudf/Table.java @@ -337,35 +337,53 @@ private static native long[] readORC(String[] filterColumnNames, /** * Setup everything to write ORC formatted data to a file. * @param columnNames names that correspond to the table columns + * @param numChildren Children of the top level + * @param flatNumChildren flattened list of children per column * @param nullable true if the column can have nulls else false * @param metadataKeys Metadata key names to place in the Parquet file * @param metadataValues Metadata values corresponding to metadataKeys * @param compression native compression codec ID + * @param precisions precision list containing all the precisions of the decimal types in + * the columns + * @param isMapValues true if a column is a map * @param filename local output path * @return a handle that is used in later calls to writeORCChunk and writeORCEnd. */ private static native long writeORCFileBegin(String[] columnNames, + int numChildren, + int[] flatNumChildren, boolean[] nullable, String[] metadataKeys, String[] metadataValues, int compression, + int[] precisions, + boolean[] isMapValues, String filename) throws CudfException; /** * Setup everything to write ORC formatted data to a buffer. * @param columnNames names that correspond to the table columns + * @param numChildren Children of the top level + * @param flatNumChildren flattened list of children per column * @param nullable true if the column can have nulls else false * @param metadataKeys Metadata key names to place in the Parquet file * @param metadataValues Metadata values corresponding to metadataKeys * @param compression native compression codec ID + * @param precisions precision list containing all the precisions of the decimal types in + * the columns + * @param isMapValues true if a column is a map * @param consumer consumer of host buffers produced. * @return a handle that is used in later calls to writeORCChunk and writeORCEnd. */ private static native long writeORCBufferBegin(String[] columnNames, + int numChildren, + int[] flatNumChildren, boolean[] nullable, String[] metadataKeys, String[] metadataValues, int compression, + int[] precisions, + boolean[] isMapValues, HostBufferConsumer consumer) throws CudfException; /** @@ -1079,21 +1097,29 @@ private static class ORCTableWriter implements TableWriter { HostBufferConsumer consumer; private ORCTableWriter(ORCWriterOptions options, File outputFile) { - this.handle = writeORCFileBegin(options.getColumnNames(), - options.getColumnNullability(), + this.handle = writeORCFileBegin(options.getFlatColumnNames(), + options.getTopLevelChildren(), + options.getFlatNumChildren(), + options.getFlatIsNullable(), options.getMetadataKeys(), options.getMetadataValues(), options.getCompressionType().nativeId, + options.getFlatPrecision(), + options.getFlatIsMap(), outputFile.getAbsolutePath()); this.consumer = null; } private ORCTableWriter(ORCWriterOptions options, HostBufferConsumer consumer) { - this.handle = writeORCBufferBegin(options.getColumnNames(), - options.getColumnNullability(), + this.handle = writeORCBufferBegin(options.getFlatColumnNames(), + options.getTopLevelChildren(), + options.getFlatNumChildren(), + options.getFlatIsNullable(), options.getMetadataKeys(), options.getMetadataValues(), options.getCompressionType().nativeId, + options.getFlatPrecision(), + options.getFlatIsMap(), consumer); this.consumer = consumer; } @@ -1150,7 +1176,7 @@ public void writeORC(File outputFile) { // Need to specify the number of columns but leave all column names undefined String[] names = new String[getNumberOfColumns()]; Arrays.fill(names, ""); - ORCWriterOptions opts = ORCWriterOptions.builder().withColumnNames(names).build(); + ORCWriterOptions opts = ORCWriterOptions.builder().withColumns(true, names).build(); writeORC(opts, outputFile); } @@ -1161,7 +1187,7 @@ public void writeORC(File outputFile) { */ @Deprecated public void writeORC(ORCWriterOptions options, File outputFile) { - assert options.getColumnNames().length == getNumberOfColumns() : "must specify names for all columns"; + assert options.getTopLevelChildren() == getNumberOfColumns() : "must specify names for all columns"; try (TableWriter writer = Table.writeORCChunked(options, outputFile)) { writer.write(this); } diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index ee75112a2ed..3963c31d521 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -677,8 +677,10 @@ int set_column_metadata(cudf::io::column_in_metadata &column_metadata, cudf::io::column_in_metadata child; child.set_name(col_names[read_index]) .set_decimal_precision(precisions[read_index]) - .set_int96_timestamps(is_int96[read_index]) .set_nullability(nullability[read_index]); + if (!is_int96.is_null()) { + child.set_int96_timestamps(is_int96[read_index]); + } if (is_map[read_index]) { child.set_list_column_as_map(); } @@ -696,13 +698,12 @@ int set_column_metadata(cudf::io::column_in_metadata &column_metadata, void createTableMetaData(JNIEnv *env, jint num_children, jobjectArray &j_col_names, jintArray &j_children, jbooleanArray &j_col_nullability, jobjectArray &j_metadata_keys, jobjectArray &j_metadata_values, - jint j_compression, jint j_stats_freq, jbooleanArray &j_isInt96, - jintArray &j_precisions, jbooleanArray &j_is_map, - cudf::io::table_input_metadata &metadata) { + jbooleanArray &j_is_int96, jintArray &j_precisions, + jbooleanArray &j_is_map, cudf::io::table_input_metadata &metadata) { cudf::jni::auto_set_device(env); cudf::jni::native_jstringArray col_names(env, j_col_names); cudf::jni::native_jbooleanArray col_nullability(env, j_col_nullability); - cudf::jni::native_jbooleanArray isInt96(env, j_isInt96); + cudf::jni::native_jbooleanArray is_int96(env, j_is_int96); cudf::jni::native_jstringArray meta_keys(env, j_metadata_keys); cudf::jni::native_jstringArray meta_values(env, j_metadata_values); cudf::jni::native_jintArray precisions(env, j_precisions); @@ -719,8 +720,11 @@ void createTableMetaData(JNIEnv *env, jint num_children, jobjectArray &j_col_nam metadata.column_metadata[write_index] .set_name(cpp_names[read_index]) .set_nullability(col_nullability[read_index]) - .set_int96_timestamps(isInt96[read_index]) .set_decimal_precision(precisions[read_index]); + if (!is_int96.is_null()) { + metadata.column_metadata[write_index] + .set_int96_timestamps(is_int96[read_index]); + } if (is_map[read_index]) { metadata.column_metadata[write_index].set_list_column_as_map(); } @@ -728,7 +732,7 @@ void createTableMetaData(JNIEnv *env, jint num_children, jobjectArray &j_col_nam if (childs_children > 0) { read_index = set_column_metadata(metadata.column_metadata[write_index], cpp_names, col_nullability, - isInt96, precisions, is_map, children, childs_children, read_index); + is_int96, precisions, is_map, children, childs_children, read_index); } } for (auto i = 0; i < meta_keys.size(); ++i) { @@ -736,29 +740,6 @@ void createTableMetaData(JNIEnv *env, jint num_children, jobjectArray &j_col_nam } } -cudf::io::table_input_metadata createORCTableInputMetadata(JNIEnv *env, - jobjectArray const &j_col_names, - jbooleanArray const &j_col_nullability, - jobjectArray const &j_metadata_keys, - jobjectArray const &j_metadata_values) { - cudf::jni::native_jstringArray const col_names(env, j_col_names); - cudf::jni::native_jbooleanArray const col_nullability(env, j_col_nullability); - cudf::jni::native_jstringArray const meta_keys(env, j_metadata_keys); - cudf::jni::native_jstringArray const meta_values(env, j_metadata_values); - - std::vector const cpp_names = col_names.as_cpp_vector(); - std::size_t const num_columns = cpp_names.size(); - cudf::io::table_input_metadata metadata; - metadata.column_metadata.resize(cpp_names.size()); - for (std::size_t i = 0; i < num_columns; i++) { - metadata.column_metadata[i].set_name(cpp_names[i]).set_nullability(col_nullability[i]); - } - for (int i = 0; i < meta_keys.size(); ++i) { - metadata.user_data[meta_keys[i].get()] = meta_values[i].get(); - } - return metadata; -} - // Check that window parameters are valid. bool valid_window_parameters(native_jintArray const &values, native_jpointerArray const &ops, @@ -1376,7 +1357,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeParquetBufferBegin( sink_info sink{data_sink.get()}; table_input_metadata metadata; createTableMetaData(env, j_num_children, j_col_names, j_children, j_col_nullability, - j_metadata_keys, j_metadata_values, j_compression, j_stats_freq, j_isInt96, + j_metadata_keys, j_metadata_values, j_isInt96, j_precisions, j_is_map, metadata); chunked_parquet_writer_options opts = @@ -1410,7 +1391,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeParquetFileBegin( using namespace cudf::jni; table_input_metadata metadata; createTableMetaData(env, j_num_children, j_col_names, j_children, j_col_nullability, - j_metadata_keys, j_metadata_values, j_compression, j_stats_freq, j_isInt96, + j_metadata_keys, j_metadata_values, j_isInt96, j_precisions, j_is_map, metadata); sink_info sink{output_path.get()}; chunked_parquet_writer_options opts = @@ -1512,9 +1493,9 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_readORC( } JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCBufferBegin( - JNIEnv *env, jclass, jobjectArray j_col_names, jbooleanArray j_col_nullability, - jobjectArray j_metadata_keys, jobjectArray j_metadata_values, jint j_compression, - jobject consumer) { + JNIEnv *env, jclass, jobjectArray j_col_names, jint j_num_children, jintArray j_children, + jbooleanArray j_col_nullability, jobjectArray j_metadata_keys, jobjectArray j_metadata_values, + jint j_compression, jintArray j_precisions, jbooleanArray j_is_map, jobject consumer) { JNI_NULL_CHECK(env, j_col_names, "null columns", 0); JNI_NULL_CHECK(env, j_col_nullability, "null nullability", 0); JNI_NULL_CHECK(env, j_metadata_keys, "null metadata keys", 0); @@ -1523,8 +1504,13 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCBufferBegin( try { cudf::jni::auto_set_device(env); using namespace cudf::io; - table_input_metadata metadata = cudf::jni::createORCTableInputMetadata( - env, j_col_names, j_col_nullability, j_metadata_keys, j_metadata_values); + using namespace cudf::jni; + table_input_metadata metadata; + // ORC has no `j_is_int96`, but `createTableMetaData` needs a lvalue. + jbooleanArray j_is_int96 = NULL; + createTableMetaData(env, j_num_children, j_col_names, j_children, j_col_nullability, + j_metadata_keys, j_metadata_values, j_is_int96, + j_precisions, j_is_map, metadata); std::unique_ptr data_sink( new cudf::jni::jni_writer_data_sink(env, consumer)); @@ -1543,9 +1529,9 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCBufferBegin( } JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCFileBegin( - JNIEnv *env, jclass, jobjectArray j_col_names, jbooleanArray j_col_nullability, - jobjectArray j_metadata_keys, jobjectArray j_metadata_values, jint j_compression, - jstring j_output_path) { + JNIEnv *env, jclass, jobjectArray j_col_names, jint j_num_children, jintArray j_children, + jbooleanArray j_col_nullability, jobjectArray j_metadata_keys, jobjectArray j_metadata_values, + jint j_compression, jintArray j_precisions, jbooleanArray j_is_map, jstring j_output_path) { JNI_NULL_CHECK(env, j_col_names, "null columns", 0); JNI_NULL_CHECK(env, j_col_nullability, "null nullability", 0); JNI_NULL_CHECK(env, j_metadata_keys, "null metadata keys", 0); @@ -1554,10 +1540,14 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCFileBegin( try { cudf::jni::auto_set_device(env); using namespace cudf::io; + using namespace cudf::jni; cudf::jni::native_jstring output_path(env, j_output_path); - - table_input_metadata metadata = cudf::jni::createORCTableInputMetadata( - env, j_col_names, j_col_nullability, j_metadata_keys, j_metadata_values); + table_input_metadata metadata; + // ORC has no `j_is_int96`, but `createTableMetaData` needs a lvalue. + jbooleanArray j_is_int96 = NULL; + createTableMetaData(env, j_num_children, j_col_names, j_children, j_col_nullability, + j_metadata_keys, j_metadata_values, j_is_int96, + j_precisions, j_is_map, metadata); sink_info sink{output_path.get()}; chunked_orc_writer_options opts = chunked_orc_writer_options::builder(sink) diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java index aa9ef5bf766..a727539c447 100644 --- a/java/src/test/java/ai/rapids/cudf/TableTest.java +++ b/java/src/test/java/ai/rapids/cudf/TableTest.java @@ -52,7 +52,7 @@ import java.util.*; import java.util.stream.Collectors; -import static ai.rapids.cudf.ParquetColumnWriterOptions.mapColumn; +import static ai.rapids.cudf.ColumnWriterOptions.mapColumn; import static ai.rapids.cudf.ParquetWriterOptions.listBuilder; import static ai.rapids.cudf.ParquetWriterOptions.structBuilder; import static ai.rapids.cudf.Table.TestBuilder; @@ -6757,8 +6757,8 @@ void testParquetWriteToBufferChunkedInt96() { void testParquetWriteMap() throws IOException { ParquetWriterOptions options = ParquetWriterOptions.builder() .withMapColumn(mapColumn("my_map", - new ParquetColumnWriterOptions("key0", false), - new ParquetColumnWriterOptions("value0"))).build(); + new ColumnWriterOptions("key0", false), + new ColumnWriterOptions("value0"))).build(); File f = File.createTempFile("test-map", ".parquet"); List list1 = Arrays.asList(new HostColumnVector.StructData(Arrays.asList("a", "b"))); @@ -6974,11 +6974,9 @@ void testArrowIPCWriteToBufferChunked() { @Test void testORCWriteToBufferChunked() { - try (Table table0 = getExpectedFileTable(); + try (Table table0 = getExpectedFileTable(true); MyBufferConsumer consumer = new MyBufferConsumer()) { - String[] colNames = new String[table0.getNumberOfColumns()]; - Arrays.fill(colNames, ""); - ORCWriterOptions opts = ORCWriterOptions.builder().withColumnNames(colNames).build(); + ORCWriterOptions opts = createORCWriterOptionsWithNested(); try (TableWriter writer = Table.writeORCChunked(opts, consumer)) { writer.write(table0); writer.write(table0); @@ -6991,6 +6989,22 @@ void testORCWriteToBufferChunked() { } } + @Test + void testORCWriteToFileChunked() throws IOException { + File tempFile = File.createTempFile("test", ".orc"); + try (Table table0 = getExpectedFileTable(true)) { + ORCWriterOptions opts = createORCWriterOptionsWithNested(); + try (TableWriter writer = Table.writeORCChunked(opts, tempFile.getAbsoluteFile())) { + writer.write(table0); + } + try (Table table1 = Table.readORC(tempFile.getAbsoluteFile())) { + assertTablesAreEqual(table0, table1); + } + } finally { + tempFile.delete(); + } + } + @Test void testORCWriteToFile() throws IOException { File tempFile = File.createTempFile("test", ".orc"); @@ -7010,7 +7024,7 @@ void testORCWriteToFileWithColNames() throws IOException { final String[] colNames = new String[]{"bool", "int", "byte","long","str","float","double"}; try (Table table0 = getExpectedFileTable()) { ORCWriterOptions options = ORCWriterOptions.builder() - .withColumnNames(colNames) + .withColumns(true, colNames) .withMetadata("somekey", "somevalue") .build(); table0.writeORC(options, tempFile.getAbsoluteFile()); @@ -7030,7 +7044,7 @@ void testORCWriteToFileUncompressed() throws IOException { String[] colNames = new String[table0.getNumberOfColumns()]; Arrays.fill(colNames, ""); ORCWriterOptions opts = ORCWriterOptions.builder() - .withColumnNames(colNames) + .withColumns(true, colNames) .withCompressionType(CompressionType.NONE) .build(); table0.writeORC(opts, tempFileUncompressed.getAbsoluteFile()); @@ -7132,6 +7146,26 @@ void fixedWidthRowsRoundTrip() { // utility methods to reduce typing + private ORCWriterOptions createORCWriterOptionsWithNested() { + // The column metadata should match the table returned from + // 'getExpectedFileTable(true)'. + return ORCWriterOptions.builder() + .withNullableColumns("_c0", "_c1", "_c2", "_c3", "_c4", "_c5", "_c6") + .withStructColumn(structBuilder("_c7") + .withNullableColumns("_c7-1") + .withNullableColumns("_c7-2") + .build()) + .withListColumn(listBuilder("_c8") + .withNullableColumns("_c8-1").build()) + .withListColumn(listBuilder("_c9") + .withStructColumn(structBuilder("_c9-1") + .withNullableColumns("_c9-1-1") + .withNullableColumns("_c9-1-2") + .build()) + .build()) + .build(); + } + private StructData struct(Object... values) { return new StructData(values); } From cf2a31746c8bf17852c940ba54b8ccdd2590e8f6 Mon Sep 17 00:00:00 2001 From: Firestarman Date: Wed, 29 Sep 2021 14:35:13 +0800 Subject: [PATCH 2/6] Fix a c++ code style issue. Signed-off-by: Firestarman --- java/src/main/native/src/TableJni.cpp | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index 3963c31d521..d5c715f0766 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -722,8 +722,7 @@ void createTableMetaData(JNIEnv *env, jint num_children, jobjectArray &j_col_nam .set_nullability(col_nullability[read_index]) .set_decimal_precision(precisions[read_index]); if (!is_int96.is_null()) { - metadata.column_metadata[write_index] - .set_int96_timestamps(is_int96[read_index]); + metadata.column_metadata[write_index].set_int96_timestamps(is_int96[read_index]); } if (is_map[read_index]) { metadata.column_metadata[write_index].set_list_column_as_map(); @@ -1357,8 +1356,8 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeParquetBufferBegin( sink_info sink{data_sink.get()}; table_input_metadata metadata; createTableMetaData(env, j_num_children, j_col_names, j_children, j_col_nullability, - j_metadata_keys, j_metadata_values, j_isInt96, - j_precisions, j_is_map, metadata); + j_metadata_keys, j_metadata_values, j_isInt96, j_precisions, j_is_map, + metadata); chunked_parquet_writer_options opts = chunked_parquet_writer_options::builder(sink) @@ -1391,8 +1390,8 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeParquetFileBegin( using namespace cudf::jni; table_input_metadata metadata; createTableMetaData(env, j_num_children, j_col_names, j_children, j_col_nullability, - j_metadata_keys, j_metadata_values, j_isInt96, - j_precisions, j_is_map, metadata); + j_metadata_keys, j_metadata_values, j_isInt96, j_precisions, j_is_map, + metadata); sink_info sink{output_path.get()}; chunked_parquet_writer_options opts = chunked_parquet_writer_options::builder(sink) @@ -1509,8 +1508,8 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCBufferBegin( // ORC has no `j_is_int96`, but `createTableMetaData` needs a lvalue. jbooleanArray j_is_int96 = NULL; createTableMetaData(env, j_num_children, j_col_names, j_children, j_col_nullability, - j_metadata_keys, j_metadata_values, j_is_int96, - j_precisions, j_is_map, metadata); + j_metadata_keys, j_metadata_values, j_is_int96, j_precisions, j_is_map, + metadata); std::unique_ptr data_sink( new cudf::jni::jni_writer_data_sink(env, consumer)); @@ -1546,8 +1545,8 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCFileBegin( // ORC has no `j_is_int96`, but `createTableMetaData` needs a lvalue. jbooleanArray j_is_int96 = NULL; createTableMetaData(env, j_num_children, j_col_names, j_children, j_col_nullability, - j_metadata_keys, j_metadata_values, j_is_int96, - j_precisions, j_is_map, metadata); + j_metadata_keys, j_metadata_values, j_is_int96, j_precisions, j_is_map, + metadata); sink_info sink{output_path.get()}; chunked_orc_writer_options opts = chunked_orc_writer_options::builder(sink) From 40a1720bac102c30288169e9fae1e66700317952 Mon Sep 17 00:00:00 2001 From: Firestarman Date: Fri, 8 Oct 2021 11:23:50 +0800 Subject: [PATCH 3/6] Address the new comment. Signed-off-by: Firestarman --- ...erOptions.java => CompressionMetadataWriterOptions.java} | 6 +++--- java/src/main/java/ai/rapids/cudf/ORCWriterOptions.java | 4 ++-- java/src/main/java/ai/rapids/cudf/ParquetWriterOptions.java | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) rename java/src/main/java/ai/rapids/cudf/{CondensedMetadataWriterOptions.java => CompressionMetadataWriterOptions.java} (91%) diff --git a/java/src/main/java/ai/rapids/cudf/CondensedMetadataWriterOptions.java b/java/src/main/java/ai/rapids/cudf/CompressionMetadataWriterOptions.java similarity index 91% rename from java/src/main/java/ai/rapids/cudf/CondensedMetadataWriterOptions.java rename to java/src/main/java/ai/rapids/cudf/CompressionMetadataWriterOptions.java index 8a730848b85..9292975d0ce 100644 --- a/java/src/main/java/ai/rapids/cudf/CondensedMetadataWriterOptions.java +++ b/java/src/main/java/ai/rapids/cudf/CompressionMetadataWriterOptions.java @@ -21,11 +21,11 @@ import java.util.LinkedHashMap; import java.util.Map; -public class CondensedMetadataWriterOptions extends ColumnWriterOptions.StructColumnWriterOptions { +public class CompressionMetadataWriterOptions extends ColumnWriterOptions.StructColumnWriterOptions { private final CompressionType compressionType; private final Map metadata; - protected CondensedMetadataWriterOptions(Builder builder) { + protected CompressionMetadataWriterOptions(Builder builder) { super(builder); this.compressionType = builder.compressionType; this.metadata = builder.metadata; @@ -82,7 +82,7 @@ public int getTopLevelChildren() { } public abstract static class Builder extends AbstractStructBuilder { + V extends CompressionMetadataWriterOptions> extends AbstractStructBuilder { final Map metadata = new LinkedHashMap<>(); CompressionType compressionType = CompressionType.AUTO; diff --git a/java/src/main/java/ai/rapids/cudf/ORCWriterOptions.java b/java/src/main/java/ai/rapids/cudf/ORCWriterOptions.java index 23913925217..372f919532e 100644 --- a/java/src/main/java/ai/rapids/cudf/ORCWriterOptions.java +++ b/java/src/main/java/ai/rapids/cudf/ORCWriterOptions.java @@ -22,7 +22,7 @@ * This class represents settings for writing ORC files. It includes meta data information * that will be used by the ORC writer to write the file. */ -public class ORCWriterOptions extends CondensedMetadataWriterOptions { +public class ORCWriterOptions extends CompressionMetadataWriterOptions { private ORCWriterOptions(Builder builder) { super(builder); @@ -32,7 +32,7 @@ public static Builder builder() { return new Builder(); } - public static class Builder extends CondensedMetadataWriterOptions.Builder + public static class Builder extends CompressionMetadataWriterOptions.Builder { public ORCWriterOptions build() { diff --git a/java/src/main/java/ai/rapids/cudf/ParquetWriterOptions.java b/java/src/main/java/ai/rapids/cudf/ParquetWriterOptions.java index 9226f834cbc..7b58817550d 100644 --- a/java/src/main/java/ai/rapids/cudf/ParquetWriterOptions.java +++ b/java/src/main/java/ai/rapids/cudf/ParquetWriterOptions.java @@ -22,7 +22,7 @@ * This class represents settings for writing Parquet files. It includes meta data information * that will be used by the Parquet writer to write the file */ -public final class ParquetWriterOptions extends CondensedMetadataWriterOptions { +public final class ParquetWriterOptions extends CompressionMetadataWriterOptions { private final StatisticsFrequency statsGranularity; private ParquetWriterOptions(Builder builder) { @@ -55,7 +55,7 @@ public StatisticsFrequency getStatisticsFrequency() { return statsGranularity; } - public static class Builder extends CondensedMetadataWriterOptions.Builder + public static class Builder extends CompressionMetadataWriterOptions.Builder { private StatisticsFrequency statsGranularity = StatisticsFrequency.ROWGROUP; From 600b89e5ff38af2092a1b6c9e65ce80177372c30 Mon Sep 17 00:00:00 2001 From: Firestarman Date: Sat, 9 Oct 2021 14:20:14 +0800 Subject: [PATCH 4/6] Add test for map write Signed-off-by: Firestarman --- .../test/java/ai/rapids/cudf/TableTest.java | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java index 901ac4b242e..ce3005f41ee 100644 --- a/java/src/test/java/ai/rapids/cudf/TableTest.java +++ b/java/src/test/java/ai/rapids/cudf/TableTest.java @@ -7015,6 +7015,34 @@ void testORCWriteToFileChunked() throws IOException { } } + @Test + void testORCWriteMapChunked() throws IOException { + ORCWriterOptions options = ORCWriterOptions.builder() + .withMapColumn(mapColumn("my_map", + new ColumnWriterOptions("key0", false), + new ColumnWriterOptions("value0"))).build(); + File f = File.createTempFile("test-map", ".parquet"); + List list1 = + Arrays.asList(new HostColumnVector.StructData(Arrays.asList("a", "b"))); + List list2 = + Arrays.asList(new HostColumnVector.StructData(Arrays.asList("a", "c"))); + List list3 = + Arrays.asList(new HostColumnVector.StructData(Arrays.asList("e", "d"))); + HostColumnVector.StructType structType = new HostColumnVector.StructType(true, + Arrays.asList(new HostColumnVector.BasicType(true, DType.STRING), + new HostColumnVector.BasicType(true, DType.STRING))); + try (ColumnVector listColumn = ColumnVector.fromLists(new HostColumnVector.ListType(true, + structType), list1, list2, list3); + Table t0 = new Table(listColumn)) { + try (TableWriter writer = Table.writeORCChunked(options, f)) { + writer.write(t0); + } + try (Table res = Table.readORC(f)) { + assertTablesAreEqual(t0, res); + } + } + } + @Test void testORCWriteToFile() throws IOException { File tempFile = File.createTempFile("test", ".orc"); From f622514cf6caae6a62e57df432f1e9a7b27080e0 Mon Sep 17 00:00:00 2001 From: Firestarman Date: Mon, 11 Oct 2021 18:29:00 +0800 Subject: [PATCH 5/6] JNI data writer sink supports device_write_async. Now call the sync version directly. Signed-off-by: Firestarman --- java/src/main/native/src/TableJni.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index d5c715f0766..218b03c66bc 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -143,6 +143,13 @@ class jni_writer_data_sink final : public cudf::io::data_sink { stream.synchronize(); } + std::future device_write_async(void const *gpu_data, size_t size, + rmm::cuda_stream_view stream) override { + // Call the sync version until figuring out how to write asynchronously. + device_write(gpu_data, size, stream); + return std::async(std::launch::deferred, []{}); + } + void flush() override { if (current_buffer_written > 0) { JNIEnv *env = cudf::jni::get_jni_env(jvm); From 6586fac868d29a9f0c4c7be1e389136b4971b376 Mon Sep 17 00:00:00 2001 From: Firestarman Date: Tue, 12 Oct 2021 08:53:05 +0800 Subject: [PATCH 6/6] Fix a code format issue Signed-off-by: Firestarman --- java/src/main/native/src/TableJni.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index 218b03c66bc..c66cf13a5ae 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -147,7 +147,7 @@ class jni_writer_data_sink final : public cudf::io::data_sink { rmm::cuda_stream_view stream) override { // Call the sync version until figuring out how to write asynchronously. device_write(gpu_data, size, stream); - return std::async(std::launch::deferred, []{}); + return std::async(std::launch::deferred, [] {}); } void flush() override {