diff --git a/java/src/main/java/ai/rapids/cudf/BinaryOperable.java b/java/src/main/java/ai/rapids/cudf/BinaryOperable.java index 68213c21956..2b1afb92e36 100644 --- a/java/src/main/java/ai/rapids/cudf/BinaryOperable.java +++ b/java/src/main/java/ai/rapids/cudf/BinaryOperable.java @@ -80,25 +80,22 @@ static DType implicitConversion(BinaryOp op, BinaryOperable lhs, BinaryOperable return DType.BOOL8; } if (a.isDecimalType() && b.isDecimalType()) { - // Here scale is created with value 0 as `scale` is required to create DType of - // decimal type. Dtype is discarded for binary operations for decimal types in cudf as a new - // DType is created for output type with new scale. New scale for output depends upon operator. - int scale = 0; - if (a.typeId == DType.DTypeEnum.DECIMAL32) { - if (b.typeId == DType.DTypeEnum.DECIMAL32) { - return DType.create(DType.DTypeEnum.DECIMAL32, - ColumnView.getFixedPointOutputScale(op, lhs.getType(), rhs.getType())); - } else { - throw new IllegalArgumentException("Both columns must be of the same fixed_point type"); - } - } else if (a.typeId == DType.DTypeEnum.DECIMAL64) { - if (b.typeId == DType.DTypeEnum.DECIMAL64) { - return DType.create(DType.DTypeEnum.DECIMAL64, - ColumnView.getFixedPointOutputScale(op, lhs.getType(), rhs.getType())); - } else { - throw new IllegalArgumentException("Both columns must be of the same fixed_point type"); - } + if (a.typeId != b.typeId) { + throw new IllegalArgumentException("Both columns must be of the same fixed_point type"); } + final int scale = ColumnView.getFixedPointOutputScale(op, lhs.getType(), rhs.getType()); + // The output precision/size should be at least as large as the input. + // It may be larger if room is needed for it based off of the output scale. + final DType.DTypeEnum outputEnum; + if (scale <= DType.DECIMAL32_MAX_PRECISION && a.typeId == DType.DTypeEnum.DECIMAL32) { + outputEnum = DType.DTypeEnum.DECIMAL32; + } else if (scale <= DType.DECIMAL64_MAX_PRECISION && + (a.typeId == DType.DTypeEnum.DECIMAL32 || a.typeId == DType.DTypeEnum.DECIMAL64)) { + outputEnum = DType.DTypeEnum.DECIMAL64; + } else { + outputEnum = DType.DTypeEnum.DECIMAL128; + } + return DType.create(outputEnum, scale); } throw new IllegalArgumentException("Unsupported types " + a + " and " + b); } diff --git a/java/src/main/java/ai/rapids/cudf/ColumnVector.java b/java/src/main/java/ai/rapids/cudf/ColumnVector.java index 7eb51a52a7d..3fed6316215 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnVector.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnVector.java @@ -24,6 +24,7 @@ import org.slf4j.LoggerFactory; import java.math.BigDecimal; +import java.math.BigInteger; import java.math.RoundingMode; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -1391,6 +1392,18 @@ public static ColumnVector decimalFromDoubles(DType type, RoundingMode mode, dou } } + + /** + * Create a new decimal vector from BigIntegers + * Compared with scale of [[java.math.BigDecimal]], the scale here represents the opposite meaning. + */ + public static ColumnVector decimalFromBigInt(int scale, BigInteger... values) { + try (HostColumnVector host = HostColumnVector.decimalFromBigIntegers(scale, values)) { + ColumnVector columnVector = host.copyToDevice(); + return columnVector; + } + } + /** * Create a new string vector from the given values. This API * supports inline nulls. This is really intended to be used only for testing as diff --git a/java/src/main/java/ai/rapids/cudf/DType.java b/java/src/main/java/ai/rapids/cudf/DType.java index 2d851aa2ae3..742501be375 100644 --- a/java/src/main/java/ai/rapids/cudf/DType.java +++ b/java/src/main/java/ai/rapids/cudf/DType.java @@ -23,6 +23,7 @@ public final class DType { public static final int DECIMAL32_MAX_PRECISION = 9; public static final int DECIMAL64_MAX_PRECISION = 18; + public static final int DECIMAL128_MAX_PRECISION = 38; /* enum representing various types. Whenever a new non-decimal type is added please make sure below sections are updated as well: @@ -77,7 +78,8 @@ public enum DTypeEnum { LIST(0, 24), DECIMAL32(4, 25), DECIMAL64(8, 26), - STRUCT(0, 27); + DECIMAL128(16, 27), + STRUCT(0, 28); final int sizeInBytes; final int nativeId; @@ -167,6 +169,7 @@ private DType(DTypeEnum id, int decimalScale) { LIST, null, // DECIMAL32 null, // DECIMAL64 + null, // DECIMAL128 STRUCT }; @@ -276,6 +279,13 @@ public static DType fromNative(int nativeId, int scale) { } return new DType(DTypeEnum.DECIMAL64, scale); } + if (nativeId == DTypeEnum.DECIMAL128.nativeId) { + if (-scale > DECIMAL128_MAX_PRECISION) { + throw new IllegalArgumentException( + "Scale " + (-scale) + " exceeds DECIMAL128_MAX_PRECISION " + DECIMAL128_MAX_PRECISION); + } + return new DType(DTypeEnum.DECIMAL128, scale); + } } throw new IllegalArgumentException("Could not translate " + nativeId + " into a DType"); } @@ -293,6 +303,8 @@ public static DType fromJavaBigDecimal(BigDecimal dec) { return new DType(DTypeEnum.DECIMAL32, -dec.scale()); } else if (dec.precision() <= DECIMAL64_MAX_PRECISION) { return new DType(DTypeEnum.DECIMAL64, -dec.scale()); + } else if (dec.precision() <= DECIMAL128_MAX_PRECISION) { + return new DType(DTypeEnum.DECIMAL128, -dec.scale()); } throw new IllegalArgumentException("Precision " + dec.precision() + " exceeds max precision cuDF can support " + DECIMAL64_MAX_PRECISION); @@ -450,7 +462,8 @@ public boolean hasOffsets() { private static final EnumSet DECIMALS = EnumSet.of( DTypeEnum.DECIMAL32, - DTypeEnum.DECIMAL64 + DTypeEnum.DECIMAL64, + DTypeEnum.DECIMAL128 ); private static final EnumSet NESTED_TYPE = EnumSet.of( diff --git a/java/src/main/java/ai/rapids/cudf/HostColumnVector.java b/java/src/main/java/ai/rapids/cudf/HostColumnVector.java index 46255428c1c..e21a4ac81c6 100644 --- a/java/src/main/java/ai/rapids/cudf/HostColumnVector.java +++ b/java/src/main/java/ai/rapids/cudf/HostColumnVector.java @@ -525,6 +525,23 @@ public static HostColumnVector decimalFromBoxedLongs(int scale, Long... values) }); } + /** + * Create a new decimal vector from unscaled values (BigInteger array) and scale. + * The created vector is of type DType.DECIMAL128. + * Compared with scale of [[java.math.BigDecimal]], the scale here represents the opposite meaning. + */ + public static HostColumnVector decimalFromBigIntegers(int scale, BigInteger... values) { + return build(DType.create(DType.DTypeEnum.DECIMAL128, scale), values.length, (b) -> { + for (BigInteger v : values) { + if (v == null) { + b.appendNull(); + } else { + b.appendUnscaledDecimal(v); + } + } + }); + } + /** * Create a new decimal vector from double floats with specific DecimalType and RoundingMode. * All doubles will be rescaled if necessary, according to scale of input DecimalType and RoundingMode. @@ -1222,7 +1239,12 @@ public final ColumnBuilder append(BigDecimal value) { data.setInt(currentIndex * type.getSizeInBytes(), unscaledVal.intValueExact()); } else if (type.typeId == DType.DTypeEnum.DECIMAL64) { data.setLong(currentIndex * type.getSizeInBytes(), unscaledVal.longValueExact()); - } else { + } else if (type.typeId == DType.DTypeEnum.DECIMAL128) { + assert currentIndex < rows; + byte[] unscaledValueBytes = value.unscaledValue().toByteArray(); + byte[] result = convertDecimal128FromJavaToCudf(unscaledValueBytes); + data.setBytes(currentIndex*DType.DTypeEnum.DECIMAL128.sizeInBytes, result, 0, result.length); + } else { throw new IllegalStateException(type + " is not a supported decimal type."); } currentIndex++; @@ -1450,7 +1472,7 @@ public final Builder append(BigDecimal value) { */ public final Builder append(BigDecimal value, RoundingMode roundingMode) { assert type.isDecimalType(); - assert currentIndex < rows; + assert currentIndex < rows: "appended too many values " + currentIndex + " out of total rows " + rows; BigInteger unscaledValue = value.setScale(-type.getScale(), roundingMode).unscaledValue(); if (type.typeId == DType.DTypeEnum.DECIMAL32) { assert value.precision() <= DType.DECIMAL32_MAX_PRECISION : "value exceeds maximum precision for DECIMAL32"; @@ -1458,6 +1480,10 @@ public final Builder append(BigDecimal value, RoundingMode roundingMode) { } else if (type.typeId == DType.DTypeEnum.DECIMAL64) { assert value.precision() <= DType.DECIMAL64_MAX_PRECISION : "value exceeds maximum precision for DECIMAL64 "; data.setLong(currentIndex * type.getSizeInBytes(), unscaledValue.longValueExact()); + } else if (type.typeId == DType.DTypeEnum.DECIMAL128) { + assert value.precision() <= DType.DECIMAL128_MAX_PRECISION : "value exceeds maximum precision for DECIMAL128 "; + appendUnscaledDecimal(value.unscaledValue()); + return this; } else { throw new IllegalStateException(type + " is not a supported decimal type."); } @@ -1481,6 +1507,16 @@ public final Builder appendUnscaledDecimal(long value) { return this; } + public final Builder appendUnscaledDecimal(BigInteger value) { + assert type.typeId == DType.DTypeEnum.DECIMAL128; + assert currentIndex < rows; + byte[] unscaledValueBytes = value.toByteArray(); + byte[] result = convertDecimal128FromJavaToCudf(unscaledValueBytes); + data.setBytes(currentIndex*DType.DTypeEnum.DECIMAL128.sizeInBytes, result, 0, result.length); + currentIndex++; + return this; + } + public Builder append(String value) { assert value != null : "appendNull must be used to append null strings"; return appendUTF8String(value.getBytes(StandardCharsets.UTF_8)); diff --git a/java/src/main/java/ai/rapids/cudf/HostColumnVectorCore.java b/java/src/main/java/ai/rapids/cudf/HostColumnVectorCore.java index e4fb71033af..dd07df16553 100644 --- a/java/src/main/java/ai/rapids/cudf/HostColumnVectorCore.java +++ b/java/src/main/java/ai/rapids/cudf/HostColumnVectorCore.java @@ -22,6 +22,8 @@ import org.slf4j.LoggerFactory; import java.math.BigDecimal; +import java.math.BigInteger; +import java.nio.ByteOrder; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; @@ -341,6 +343,13 @@ public final BigDecimal getBigDecimal(long index) { } else if (type.typeId == DType.DTypeEnum.DECIMAL64) { long unscaledValue = offHeap.data.getLong(index * type.getSizeInBytes()); return BigDecimal.valueOf(unscaledValue, -type.getScale()); + } else if (type.typeId == DType.DTypeEnum.DECIMAL128) { + int sizeInBytes = DType.DTypeEnum.DECIMAL128.sizeInBytes; + byte[] dst = new byte[sizeInBytes]; + // We need to switch the endianness for decimal128 byte arrays between java and native code. + offHeap.data.getBytes(dst, 0, (index * sizeInBytes), sizeInBytes); + convertInPlaceToBigEndian(dst); + return new BigDecimal(new BigInteger(dst), -type.getScale()); } else { throw new IllegalStateException(type + " is not a supported decimal type."); } @@ -534,6 +543,34 @@ public String toString() { '}'; } + protected static byte[] convertDecimal128FromJavaToCudf(byte[] bytes) { + byte[] finalBytes = new byte[DType.DTypeEnum.DECIMAL128.sizeInBytes]; + byte lastByte = bytes[0]; + //Convert to 2's complement representation and make sure the sign bit is extended correctly + byte setByte = (lastByte & 0x80) > 0 ? (byte)0xff : (byte)0x00; + for(int i = bytes.length; i < finalBytes.length; i++) { + finalBytes[i] = setByte; + } + // After setting the sign bits, reverse the rest of the bytes for endianness + for(int k = 0; k < bytes.length; k++) { + finalBytes[k] = bytes[bytes.length - k - 1]; + } + return finalBytes; + } + + private void convertInPlaceToBigEndian(byte[] dst) { + assert ByteOrder.nativeOrder().equals(ByteOrder.LITTLE_ENDIAN); + int i =0; + int j = dst.length -1; + while (j > i) { + byte tmp; + tmp = dst[j]; + dst[j] = dst[i]; + dst[i] = tmp; + j--; + i++; + } + } ///////////////////////////////////////////////////////////////////////////// // HELPER CLASSES ///////////////////////////////////////////////////////////////////////////// @@ -557,15 +594,9 @@ protected synchronized boolean cleanImpl(boolean logErrorIfNotClean) { boolean neededCleanup = false; if (data != null || valid != null || offsets != null) { try { - if (data != null) { - data.close(); - } - if (offsets != null) { - offsets.close(); - } - if (valid != null) { - valid.close(); - } + ColumnVector.closeBuffers(data); + ColumnVector.closeBuffers(offsets); + ColumnVector.closeBuffers(valid); } finally { // Always mark the resource as freed even if an exception is thrown. // We cannot know how far it progressed before the exception, and diff --git a/java/src/main/java/ai/rapids/cudf/ORCOptions.java b/java/src/main/java/ai/rapids/cudf/ORCOptions.java index 359a6b96628..2ff253060f0 100644 --- a/java/src/main/java/ai/rapids/cudf/ORCOptions.java +++ b/java/src/main/java/ai/rapids/cudf/ORCOptions.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,10 @@ package ai.rapids.cudf; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + /** * Options for reading a ORC file */ @@ -27,9 +31,11 @@ public class ORCOptions extends ColumnFilterOptions { private final boolean useNumPyTypes; private final DType unit; + private final String[] decimal128Columns; private ORCOptions(Builder builder) { super(builder); + decimal128Columns = builder.decimal128Columns.toArray(new String[0]); useNumPyTypes = builder.useNumPyTypes; unit = builder.unit; } @@ -42,6 +48,10 @@ DType timeUnit() { return unit; } + String[] getDecimal128Columns() { + return decimal128Columns; + } + public static Builder builder() { return new Builder(); } @@ -50,6 +60,8 @@ public static class Builder extends ColumnFilterOptions.Builder { private boolean useNumPyTypes = true; private DType unit = DType.EMPTY; + final List decimal128Columns = new ArrayList<>(); + /** * Specify whether the parser should implicitly promote TIMESTAMP_DAYS * columns to TIMESTAMP_MILLISECONDS for compatibility with NumPy. @@ -73,6 +85,23 @@ public ORCOptions.Builder withTimeUnit(DType unit) { return this; } + /** + * Specify decimal columns which shall be read as DECIMAL128. Otherwise, decimal columns + * will be read as DECIMAL64 by default in ORC. + * + * In terms of child columns of nested types, their parents need to be prepended as prefix + * of the column name, in case of ambiguity. For struct columns, the names of child columns + * are formatted as `{struct_col_name}.{child_col_name}`. For list columns, the data(child) + * columns are named as `{list_col_name}.1`. + * + * @param names names of columns which read as DECIMAL128 + * @return builder for chaining + */ + public Builder decimal128Column(String... names) { + decimal128Columns.addAll(Arrays.asList(names)); + return this; + } + public ORCOptions build() { return new ORCOptions(this); } } } diff --git a/java/src/main/java/ai/rapids/cudf/Scalar.java b/java/src/main/java/ai/rapids/cudf/Scalar.java index 631f091005a..03e77573695 100644 --- a/java/src/main/java/ai/rapids/cudf/Scalar.java +++ b/java/src/main/java/ai/rapids/cudf/Scalar.java @@ -22,6 +22,8 @@ import org.slf4j.LoggerFactory; import java.math.BigDecimal; +import java.math.BigInteger; +import java.nio.ByteOrder; import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.List; @@ -86,6 +88,8 @@ public static Scalar fromNull(DType type) { return new Scalar(type, makeDecimal32Scalar(0, type.getScale(), false)); case DECIMAL64: return new Scalar(type, makeDecimal64Scalar(0L, type.getScale(), false)); + case DECIMAL128: + return new Scalar(type, makeDecimal128Scalar(BigInteger.ZERO.toByteArray(), type.getScale(), false)); case LIST: throw new IllegalArgumentException("Please call 'listFromNull' to create a null list scalar."); default: @@ -227,6 +231,13 @@ public static Scalar fromDecimal(int scale, long unscaledValue) { return new Scalar(DType.create(DType.DTypeEnum.DECIMAL64, scale), handle); } + public static Scalar fromDecimal(int scale, BigInteger unscaledValue) { + byte[] unscaledValueBytes = unscaledValue.toByteArray(); + byte[] finalBytes = convertDecimal128FromJavaToCudf(unscaledValueBytes); + long handle = makeDecimal128Scalar(finalBytes, scale, true); + return new Scalar(DType.create(DType.DTypeEnum.DECIMAL128, scale), handle); + } + public static Scalar fromFloat(Float value) { if (value == null) { return Scalar.fromNull(DType.FLOAT32); @@ -253,8 +264,12 @@ public static Scalar fromDecimal(BigDecimal value) { long handle; if (dt.typeId == DType.DTypeEnum.DECIMAL32) { handle = makeDecimal32Scalar(value.unscaledValue().intValueExact(), -value.scale(), true); - } else { + } else if (dt.typeId == DType.DTypeEnum.DECIMAL64) { handle = makeDecimal64Scalar(value.unscaledValue().longValueExact(), -value.scale(), true); + } else { + byte[] unscaledValueBytes = value.unscaledValue().toByteArray(); + byte[] finalBytes = convertDecimal128FromJavaToCudf(unscaledValueBytes); + handle = makeDecimal128Scalar(finalBytes, -value.scale(), true); } return new Scalar(dt, handle); } @@ -470,6 +485,7 @@ private static ColumnVector buildNullColumnVector(HostColumnVector.DataType host private static native short getShort(long scalarHandle); private static native int getInt(long scalarHandle); private static native long getLong(long scalarHandle); + private static native byte[] getBigIntegerBytes(long scalarHandle); private static native float getFloat(long scalarHandle); private static native double getDouble(long scalarHandle); private static native byte[] getUTF8(long scalarHandle); @@ -493,6 +509,7 @@ private static ColumnVector buildNullColumnVector(HostColumnVector.DataType host private static native long makeTimestampTimeScalar(int dtypeNativeId, long value, boolean isValid); private static native long makeDecimal32Scalar(int value, int scale, boolean isValid); private static native long makeDecimal64Scalar(long value, int scale, boolean isValid); + private static native long makeDecimal128Scalar(byte[] value, int scale, boolean isValid); private static native long makeListScalar(long viewHandle, boolean isValid); private static native long makeStructScalar(long[] viewHandles, boolean isValid); private static native long repeatString(long scalarHandle, int repeatTimes); @@ -579,6 +596,15 @@ public long getLong() { return getLong(getScalarHandle()); } + /** + * Returns the BigDecimal unscaled scalar value as a byte array. + */ + public byte[] getBigInteger() { + byte[] res = getBigIntegerBytes(getScalarHandle()); + convertInPlaceToBigEndian(res); + return res; + } + /** * Returns the scalar value as a float. */ @@ -601,6 +627,8 @@ public BigDecimal getBigDecimal() { return BigDecimal.valueOf(getInt(), -type.getScale()); } else if (this.type.typeId == DType.DTypeEnum.DECIMAL64) { return BigDecimal.valueOf(getLong(), -type.getScale()); + } else if (this.type.typeId == DType.DTypeEnum.DECIMAL128) { + return new BigDecimal(new BigInteger(getBigInteger()), -type.getScale()); } throw new IllegalArgumentException("Couldn't getBigDecimal from nonDecimal scalar"); } @@ -844,6 +872,8 @@ public String toString() { case DECIMAL32: // FALL THROUGH case DECIMAL64: + // FALL THROUGH + case DECIMAL128: sb.append(getBigDecimal()); break; case LIST: @@ -879,6 +909,35 @@ public Scalar repeatString(int repeatTimes) { return new Scalar(DType.STRING, repeatString(getScalarHandle(), repeatTimes)); } + private static byte[] convertDecimal128FromJavaToCudf(byte[] bytes) { + byte[] finalBytes = new byte[DType.DTypeEnum.DECIMAL128.sizeInBytes]; + byte lastByte = bytes[0]; + //Convert to 2's complement representation and make sure the sign bit is extended correctly + byte setByte = (lastByte & 0x80) > 0 ? (byte)0xff : (byte)0x00; + for(int i = bytes.length; i < finalBytes.length; i++) { + finalBytes[i] = setByte; + } + // After setting the sign bits, reverse the rest of the bytes for endianness + for(int k = 0; k < bytes.length; k++) { + finalBytes[k] = bytes[bytes.length - k - 1]; + } + return finalBytes; + } + + private void convertInPlaceToBigEndian(byte[] res) { + assert ByteOrder.nativeOrder().equals(ByteOrder.LITTLE_ENDIAN); + int i =0; + int j = res.length -1; + while (j > i) { + byte tmp; + tmp = res[j]; + res[j] = res[i]; + res[i] = tmp; + j--; + i++; + } + } + /** * Holds the off-heap state of the scalar so it can be cleaned up, even if it is leaked. */ diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java index 68e7a21988a..b0791fb440f 100644 --- a/java/src/main/java/ai/rapids/cudf/Table.java +++ b/java/src/main/java/ai/rapids/cudf/Table.java @@ -27,6 +27,8 @@ import java.io.File; import java.math.BigDecimal; +import java.math.BigInteger; +import java.math.MathContext; import java.math.RoundingMode; import java.nio.ByteBuffer; import java.util.*; @@ -329,10 +331,12 @@ private static native long writeParquetBufferBegin(String[] columnNames, * @param usingNumPyTypes whether the parser should implicitly promote TIMESTAMP * columns to TIMESTAMP_MILLISECONDS for compatibility with NumPy. * @param timeUnit return type of TimeStamp in units + * @param decimal128Columns name of the columns which are read as Decimal128 rather than Decimal64 */ private static native long[] readORC(String[] filterColumnNames, String filePath, long address, long length, - boolean usingNumPyTypes, int timeUnit) throws CudfException; + boolean usingNumPyTypes, int timeUnit, + String[] decimal128Columns) throws CudfException; /** * Setup everything to write ORC formatted data to a file. @@ -881,7 +885,9 @@ public static Table readORC(File path) { */ public static Table readORC(ORCOptions opts, File path) { return new Table(readORC(opts.getIncludeColumnNames(), - path.getAbsolutePath(), 0, 0, opts.usingNumPyTypes(), opts.timeUnit().typeId.getNativeId())); + path.getAbsolutePath(), 0, 0, + opts.usingNumPyTypes(), opts.timeUnit().typeId.getNativeId(), + opts.getDecimal128Columns())); } /** @@ -941,8 +947,9 @@ public static Table readORC(ORCOptions opts, HostMemoryBuffer buffer, assert len <= buffer.getLength() - offset; assert offset >= 0 && offset < buffer.length; return new Table(readORC(opts.getIncludeColumnNames(), - null, buffer.getAddress() + offset, len, opts.usingNumPyTypes(), - opts.timeUnit().typeId.getNativeId())); + null, buffer.getAddress() + offset, len, + opts.usingNumPyTypes(), opts.timeUnit().typeId.getNativeId(), + opts.getDecimal128Columns())); } private static class ParquetTableWriter implements TableWriter { @@ -3808,6 +3815,16 @@ public TestBuilder decimal64Column(int scale, RoundingMode mode, String... value return this; } + public TestBuilder decimal128Column(int scale, RoundingMode mode, BigInteger... values) { + types.add(new BasicType(true, DType.create(DType.DTypeEnum.DECIMAL128, scale))); + BigDecimal[] data = Arrays.stream(values).map((x) -> { + if (x == null) return null; + return new BigDecimal(x, scale, new MathContext(38, mode)); + }).toArray(BigDecimal[]::new); + typeErasedData.add(data); + return this; + } + private static ColumnVector from(DType type, Object dataArray) { ColumnVector ret = null; switch (type.typeId) { @@ -3852,6 +3869,7 @@ private static ColumnVector from(DType type, Object dataArray) { break; case DECIMAL32: case DECIMAL64: + case DECIMAL128: int scale = type.getScale(); if (dataArray instanceof Integer[]) { BigDecimal[] data = Arrays.stream(((Integer[]) dataArray)) diff --git a/java/src/main/native/src/ScalarJni.cpp b/java/src/main/native/src/ScalarJni.cpp index fb4f14fdb80..b00b066742a 100644 --- a/java/src/main/native/src/ScalarJni.cpp +++ b/java/src/main/native/src/ScalarJni.cpp @@ -109,6 +109,20 @@ JNIEXPORT jdouble JNICALL Java_ai_rapids_cudf_Scalar_getDouble(JNIEnv *env, jcla CATCH_STD(env, 0); } +JNIEXPORT jbyteArray JNICALL Java_ai_rapids_cudf_Scalar_getBigIntegerBytes(JNIEnv *env, jclass, + jlong scalar_handle) { + try { + cudf::jni::auto_set_device(env); + using ScalarType = cudf::scalar_type_t<__int128_t>; + auto s = reinterpret_cast(scalar_handle); + auto val = s->value(); + jbyte const *ptr = reinterpret_cast(&val); + cudf::jni::native_jbyteArray jbytes{env, ptr, sizeof(__int128_t)}; + return jbytes.get_jArray(); + } + CATCH_STD(env, 0); +} + JNIEXPORT jbyteArray JNICALL Java_ai_rapids_cudf_Scalar_getUTF8(JNIEnv *env, jclass, jlong scalar_handle) { try { @@ -455,6 +469,23 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Scalar_makeDecimal64Scalar(JNIEnv *e CATCH_STD(env, 0); } +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Scalar_makeDecimal128Scalar(JNIEnv *env, jclass, + jbyteArray value, + jint scale, + jboolean is_valid) { + try { + cudf::jni::auto_set_device(env); + auto const scale_ = numeric::scale_type{static_cast(scale)}; + cudf::jni::native_jbyteArray jbytes{env, value}; + auto const value_ = reinterpret_cast<__int128_t *>(jbytes.data()); + std::unique_ptr s = + cudf::make_fixed_point_scalar(*value_, scale_); + s->set_valid_async(is_valid); + return reinterpret_cast(s.release()); + } + CATCH_STD(env, 0); +} + JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Scalar_binaryOpSV(JNIEnv *env, jclass, jlong lhs_ptr, jlong rhs_view, jint int_op, jint out_dtype, jint scale) { diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index c66cf13a5ae..a78d40a58f7 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -1455,7 +1455,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Table_writeParquetEnd(JNIEnv *env, jc JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_readORC( JNIEnv *env, jclass, jobjectArray filter_col_names, jstring inputfilepath, jlong buffer, - jlong buffer_length, jboolean usingNumPyTypes, jint unit) { + jlong buffer_length, jboolean usingNumPyTypes, jint unit, jobjectArray dec128_col_names) { bool read_buffer = true; if (buffer == 0) { JNI_NULL_CHECK(env, inputfilepath, "input file or buffer must be supplied", NULL); @@ -1478,6 +1478,8 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_readORC( cudf::jni::native_jstringArray n_filter_col_names(env, filter_col_names); + cudf::jni::native_jstringArray n_dec128_col_names(env, dec128_col_names); + std::unique_ptr source; if (read_buffer) { source.reset(new cudf::io::source_info(reinterpret_cast(buffer), buffer_length)); @@ -1491,6 +1493,7 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_readORC( .use_index(false) .use_np_dtypes(static_cast(usingNumPyTypes)) .timestamp_type(cudf::data_type(static_cast(unit))) + .decimal128_columns(n_dec128_col_names.as_cpp_vector()) .build(); cudf::io::table_with_metadata result = cudf::io::read_orc(opts); return cudf::jni::convert_table_for_return(env, result.tbl); diff --git a/java/src/main/native/src/dtype_utils.hpp b/java/src/main/native/src/dtype_utils.hpp index 9fae0c585e6..53108ee7268 100644 --- a/java/src/main/native/src/dtype_utils.hpp +++ b/java/src/main/native/src/dtype_utils.hpp @@ -45,7 +45,8 @@ inline cudf::data_type timestamp_to_duration(cudf::data_type dt) { } inline bool is_decimal_type(cudf::type_id n_type) { - return n_type == cudf::type_id::DECIMAL32 || n_type == cudf::type_id::DECIMAL64; + return n_type == cudf::type_id::DECIMAL32 || n_type == cudf::type_id::DECIMAL64 || + n_type == cudf::type_id::DECIMAL128; } // create data_type including scale for decimal type diff --git a/java/src/test/java/ai/rapids/cudf/BinaryOpTest.java b/java/src/test/java/ai/rapids/cudf/BinaryOpTest.java index df4afb5ff60..894861b8c44 100644 --- a/java/src/test/java/ai/rapids/cudf/BinaryOpTest.java +++ b/java/src/test/java/ai/rapids/cudf/BinaryOpTest.java @@ -22,6 +22,7 @@ import org.junit.jupiter.api.Test; import java.math.BigDecimal; +import java.math.BigInteger; import java.math.RoundingMode; import java.util.Arrays; import java.util.stream.IntStream; @@ -54,7 +55,12 @@ public class BinaryOpTest extends CudfTestBase { private static final int[] DECIMAL32_1 = new int[]{1000, 2000, 3000, 4000, 5000}; private static final int[] DECIMAL32_2 = new int[]{100, 200, 300, 400, 50}; private static final long[] DECIMAL64_1 = new long[]{10L, 23L, 12L, 24L, 123456789L}; - private static final long[] DECIMAL64_2 = new long[]{20L, 13L, 22L, 14L, 132457689L}; + private static final long[] DECIMAL64_2 = new long[]{33041L, 97290L, 36438L, 25379L, 48473L}; + + private static final BigInteger[] DECIMAL128_1 = new BigInteger[]{new BigInteger("1234567891234567"), new BigInteger("1234567891234567"), + new BigInteger("1234567891234567"), new BigInteger("1234567891234567"), new BigInteger("1234567891234567")}; + private static final BigInteger[] DECIMAL128_2 = new BigInteger[]{new BigInteger("234567891234567"), new BigInteger("234567891234567"), + new BigInteger("234567891234567"), new BigInteger("234567891234567"), new BigInteger("234567891234567")}; private static final BigDecimal[] BIGDECIMAL32_1 = new BigDecimal[]{ BigDecimal.valueOf(12, dec32Scale_1), @@ -250,7 +256,9 @@ public void testAdd() { ColumnVector dec32cv1 = ColumnVector.fromDecimals(BIGDECIMAL32_1); ColumnVector dec32cv2 = ColumnVector.fromDecimals(BIGDECIMAL32_2); ColumnVector dec64cv1 = ColumnVector.decimalFromLongs(-dec64Scale_1, DECIMAL64_1); - ColumnVector dec64cv2 = ColumnVector.decimalFromLongs(-dec64Scale_2, DECIMAL64_2)) { + ColumnVector dec64cv2 = ColumnVector.decimalFromLongs(-dec64Scale_2, DECIMAL64_2); + ColumnVector dec128cv1 = ColumnVector.decimalFromBigInt(-dec64Scale_1, DECIMAL128_1); + ColumnVector dec128cv2 = ColumnVector.decimalFromBigInt(-dec64Scale_2, DECIMAL128_2)) { try (ColumnVector add = icv1.add(icv2); ColumnVector expected = forEach(DType.INT32, icv1, icv2, (b, l, r, i) -> b.append(l.getInt(i) + r.getInt(i)))) { @@ -331,6 +339,14 @@ public void testAdd() { } } + try (ColumnVector add = dec128cv1.add(dec128cv2)) { + try (ColumnVector expected = forEach( + DType.create(DType.DTypeEnum.DECIMAL128, -6), dec128cv1, dec128cv2, + (b, l, r, i) -> b.append(l.getBigDecimal(i).add(r.getBigDecimal(i))))) { + assertColumnsAreEqual(expected, add, "dec128"); + } + } + try (Scalar s = Scalar.fromDecimal(2, 100); ColumnVector add = dec32cv1.add(s)) { try (ColumnVector expected = forEachS( @@ -381,7 +397,9 @@ public void testSub() { ColumnVector dec32cv1 = ColumnVector.fromDecimals(BIGDECIMAL32_1); ColumnVector dec32cv2 = ColumnVector.fromDecimals(BIGDECIMAL32_2); ColumnVector dec64cv1 = ColumnVector.decimalFromLongs(-dec64Scale_1, DECIMAL64_1); - ColumnVector dec64cv2 = ColumnVector.decimalFromLongs(-dec64Scale_2, DECIMAL64_2)) { + ColumnVector dec64cv2 = ColumnVector.decimalFromLongs(-dec64Scale_2, DECIMAL64_2); + ColumnVector dec128cv1 = ColumnVector.decimalFromBigInt(-dec64Scale_1, DECIMAL128_1); + ColumnVector dec128cv2 = ColumnVector.decimalFromBigInt(-dec64Scale_2, DECIMAL128_2)) { try (ColumnVector sub = icv1.sub(icv2); ColumnVector expected = forEach(DType.INT32, icv1, icv2, (b, l, r, i) -> b.append(l.getInt(i) - r.getInt(i)))) { @@ -473,6 +491,14 @@ public void testSub() { } } + try (ColumnVector sub = dec128cv1.sub(dec128cv2)) { + try (ColumnVector expected = forEach( + DType.create(DType.DTypeEnum.DECIMAL128, -6), dec128cv1, dec128cv2, + (b, l, r, i) -> b.append(l.getBigDecimal(i).subtract(r.getBigDecimal(i))))) { + assertColumnsAreEqual(expected, sub, "dec128"); + } + } + try (Scalar s = Scalar.fromFloat(1.1f); ColumnVector sub = lcv1.sub(s); ColumnVector expected = forEachS(DType.FLOAT32, lcv1, 1.1f, @@ -507,7 +533,9 @@ public void testMul() { ColumnVector dec32cv1 = ColumnVector.fromDecimals(BIGDECIMAL32_1); ColumnVector dec32cv2 = ColumnVector.fromDecimals(BIGDECIMAL32_2); ColumnVector dec64cv1 = ColumnVector.decimalFromLongs(-dec64Scale_1, DECIMAL64_1); - ColumnVector dec64cv2 = ColumnVector.decimalFromLongs(-dec64Scale_2, DECIMAL64_2)) { + ColumnVector dec64cv2 = ColumnVector.decimalFromLongs(-dec64Scale_2, DECIMAL64_2); + ColumnVector dec128cv1 = ColumnVector.decimalFromBigInt(-dec64Scale_1, DECIMAL128_1); + ColumnVector dec128cv2 = ColumnVector.decimalFromBigInt(-dec64Scale_2, DECIMAL128_2)) { try (ColumnVector answer = icv.mul(dcv); ColumnVector expected = forEach(DType.FLOAT64, icv, dcv, (b, l, r, i) -> b.append(l.getInt(i) * r.getDouble(i)))) { @@ -560,6 +588,14 @@ public void testMul() { (b, l, r, i) -> b.append(Short.toUnsignedInt(l) * r.getInt(i)))) { assertColumnsAreEqual(expected, answer, "scalar uint16 * uint32"); } + + try (ColumnVector mul = dec128cv1.mul(dec128cv2)) { + try (ColumnVector expected = forEach( + DType.create(DType.DTypeEnum.DECIMAL128, dec128cv1.type.getScale() + dec128cv2.type.getScale()), dec128cv1, dec128cv2, + (b, l, r, i) -> b.append(l.getBigDecimal(i).multiply(r.getBigDecimal(i))))) { + assertColumnsAreEqual(expected, mul, "dec128"); + } + } } } diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 0d007aa0ed7..b7c276d4956 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -28,6 +28,7 @@ import org.junit.jupiter.api.Test; import java.math.BigDecimal; +import java.math.BigInteger; import java.math.RoundingMode; import java.nio.charset.StandardCharsets; import java.util.ArrayList; @@ -1063,6 +1064,21 @@ void roundDecimal() { } } + @Test + void decimal128Cv() { + final int dec32Scale1 = -2; + BigInteger bigInteger1 = new BigInteger("-831457"); + BigInteger bigInteger2 = new BigInteger("14"); + BigInteger bigInteger3 = new BigInteger("152345742357340573405745"); + final BigInteger[] bigInts = new BigInteger[] {bigInteger1, bigInteger2, bigInteger3}; + try (ColumnVector v = ColumnVector.decimalFromBigInt(-dec32Scale1, bigInts)) { + HostColumnVector hostColumnVector = v.copyToHost(); + assertEquals(bigInteger1, hostColumnVector.getBigDecimal(0).unscaledValue()); + assertEquals(bigInteger2, hostColumnVector.getBigDecimal(1).unscaledValue()); + assertEquals(bigInteger3, hostColumnVector.getBigDecimal(2).unscaledValue()); + } + } + @Test void testGetDeviceMemorySizeNonStrings() { try (ColumnVector v0 = ColumnVector.fromBoxedInts(1, 2, 3, 4, 5, 6); @@ -1260,6 +1276,9 @@ void testFromScalarZeroRows() { case DECIMAL64: s = Scalar.fromDecimal(mockScale, 1234567890123456789L); break; + case DECIMAL128: + s = Scalar.fromDecimal(mockScale, new BigInteger("1234567890123456789")); + break; case TIMESTAMP_DAYS: s = Scalar.timestampDaysFromInt(12345); break; @@ -3558,6 +3577,30 @@ void testCastLongToDecimal() { ); } + @Test + void testCastDecimal64ToDecimal128() { + testCastDecimal128(DType.DTypeEnum.DECIMAL64, DType.DTypeEnum.DECIMAL128, 0, + () -> ColumnVector.fromBoxedLongs(1L, -21L, 345L, null, 8008L, Long.MIN_VALUE, Long.MAX_VALUE), + () -> ColumnVector.fromDecimals(new BigDecimal(1), new BigDecimal(-21), new BigDecimal(345), + null, new BigDecimal(8008), new BigDecimal(Long.MIN_VALUE), new BigDecimal(Long.MAX_VALUE)), + new BigInteger[]{new BigInteger("1"), new BigInteger("-21"), + new BigInteger("345"), null, new BigInteger("8008"), + new BigInteger(String.valueOf(Long.MIN_VALUE)), + new BigInteger(String.valueOf(Long.MAX_VALUE))} + ); + testCastDecimal128(DType.DTypeEnum.DECIMAL32, DType.DTypeEnum.DECIMAL128, 0, + () -> ColumnVector.fromBoxedInts(1, 21, 345, null, 8008, Integer.MIN_VALUE, Integer.MAX_VALUE), + () -> ColumnVector.decimalFromBigInt(0, new BigInteger("1"), new BigInteger("21"), + new BigInteger("345"), null, new BigInteger("8008"), + new BigInteger(String.valueOf(Integer.MIN_VALUE)), + new BigInteger(String.valueOf(Integer.MAX_VALUE))), + new BigInteger[]{new BigInteger("1"), new BigInteger("21"), + new BigInteger("345"), null, new BigInteger("8008"), + new BigInteger(String.valueOf(Integer.MIN_VALUE)), + new BigInteger(String.valueOf(Integer.MAX_VALUE))} + ); + } + @Test void testCastFloatToDecimal() { testCastNumericToDecimalsAndBack(DType.FLOAT32, true, 0, @@ -3651,6 +3694,26 @@ private static void testCastNumericToDecimalsAndBack(DType sourceType, boolean i } } + private static void testCastDecimal128(DType.DTypeEnum sourceType, DType.DTypeEnum targetType, int scale, + Supplier sourceData, + Supplier returnData, + Object[] unscaledDecimal) { + DType decimalType = DType.create(targetType, scale); + try (ColumnVector sourceColumn = sourceData.get(); + ColumnVector expectedColumn = returnData.get(); + ColumnVector decimalColumn = sourceColumn.castTo(decimalType); + HostColumnVector hostDecimalColumn = decimalColumn.copyToHost(); + ColumnVector returnColumn = decimalColumn.castTo(DType.create(decimalType.typeId, scale))) { + for (int i = 0; i < sourceColumn.rows; i++) { + Object actual = hostDecimalColumn.isNull(i) ? null : + (decimalType.typeId == DType.DTypeEnum.DECIMAL128 ? hostDecimalColumn.getBigDecimal(i).unscaledValue() : + ((decimalType.typeId == DType.DTypeEnum.DECIMAL64) ? hostDecimalColumn.getLong(i) : hostDecimalColumn.getInt(i))); + assertEquals(unscaledDecimal[i], actual); + } + assertColumnsAreEqual(expectedColumn, returnColumn); + } + } + @Test void testIsTimestamp() { final String[] TIMESTAMP_STRINGS = { diff --git a/java/src/test/java/ai/rapids/cudf/DecimalColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/DecimalColumnVectorTest.java index 5f4d20dc8e3..c2772520f57 100644 --- a/java/src/test/java/ai/rapids/cudf/DecimalColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/DecimalColumnVectorTest.java @@ -51,6 +51,10 @@ public class DecimalColumnVectorTest extends CudfTestBase { private final BigDecimal[] overflowDecimal64 = new BigDecimal[]{ BigDecimal.valueOf(Long.MAX_VALUE), BigDecimal.valueOf(Long.MIN_VALUE)}; + private final BigDecimal[] overflowDecimal128 = new BigDecimal[]{ + new BigDecimal("340282367000000000000000000000000000001"), + new BigDecimal("-340282367000000000000000000000000000001")}; + @BeforeAll public static void setup() { for (int i = 0; i < decimal32Zoo.length; i++) { @@ -139,7 +143,8 @@ public void testOverrunningTheBuffer() { @Test public void testDecimalValidation() { // precision overflow - assertThrows(IllegalArgumentException.class, () -> HostColumnVector.fromDecimals(overflowDecimal64)); + assertThrows(IllegalArgumentException.class, () -> HostColumnVector.fromDecimals(overflowDecimal128)); + assertThrows(IllegalArgumentException.class, () -> { try (ColumnVector ignored = ColumnVector.decimalFromInts( -(DType.DECIMAL32_MAX_PRECISION + 1), unscaledDec32Zoo)) { @@ -153,13 +158,13 @@ public void testDecimalValidation() { // precision overflow due to rescaling by min scale assertThrows(IllegalArgumentException.class, () -> { try (ColumnVector ignored = ColumnVector.fromDecimals( - BigDecimal.valueOf(1.23e10), BigDecimal.valueOf(1.2e-7))) { + BigDecimal.valueOf(1.23e30), BigDecimal.valueOf(1.2e-7))) { } }); - // exactly hit the MAX_PRECISION_DECIMAL64 after rescaling + // exactly hit the MAX_PRECISION_DECIMAL128 after rescaling assertDoesNotThrow(() -> { try (ColumnVector ignored = ColumnVector.fromDecimals( - BigDecimal.valueOf(1.23e10), BigDecimal.valueOf(1.2e-6))) { + BigDecimal.valueOf(1.23e30), BigDecimal.valueOf(1.2e-6))) { } }); } @@ -170,6 +175,10 @@ public void testDecimalGeneral() { try (ColumnVector cv = ColumnVector.fromDecimals(overflowDecimal32)) { assertEquals(DType.create(DType.DTypeEnum.DECIMAL64, 0), cv.getType()); } + + try (ColumnVector cv = ColumnVector.fromDecimals(overflowDecimal64)) { + assertEquals(DType.create(DType.DTypeEnum.DECIMAL128, 0), cv.getType()); + } // Create DECIMAL64 vector with small values try (ColumnVector cv = ColumnVector.decimalFromLongs(0, 0L)) { try (HostColumnVector hcv = cv.copyToHost()) { diff --git a/java/src/test/java/ai/rapids/cudf/ScalarTest.java b/java/src/test/java/ai/rapids/cudf/ScalarTest.java index 37fd2ecb714..0889363c2d0 100644 --- a/java/src/test/java/ai/rapids/cudf/ScalarTest.java +++ b/java/src/test/java/ai/rapids/cudf/ScalarTest.java @@ -25,6 +25,7 @@ import org.junit.jupiter.api.Test; import java.math.BigDecimal; +import java.math.BigInteger; import java.nio.charset.StandardCharsets; import java.util.Arrays; @@ -186,6 +187,7 @@ public void testDecimal() { BigDecimal.valueOf(1234, 0), BigDecimal.valueOf(12345678, 2), BigDecimal.valueOf(1234567890123L, 6), + new BigDecimal(new BigInteger("12312341234123412341234123412341234120"), 4) }; for (BigDecimal dec : bigDecimals) { try (Scalar s = Scalar.fromDecimal(dec)) { @@ -194,17 +196,24 @@ public void testDecimal() { assertTrue(s.isValid()); if (dtype.getTypeId() == DType.DTypeEnum.DECIMAL64) { assertEquals(dec.unscaledValue().longValueExact(), s.getLong()); - } else { + } else if (dtype.getTypeId() == DType.DTypeEnum.DECIMAL32) { assertEquals(dec.unscaledValue().intValueExact(), s.getInt()); + } else if (dtype.getTypeId() == DType.DTypeEnum.DECIMAL128) { + assertEquals(dec.unscaledValue(), s.getBigDecimal().unscaledValue()); } assertEquals(dec, s.getBigDecimal()); } + try (Scalar s = Scalar.fromDecimal(-dec.scale(), dec.unscaledValue().intValueExact())) { assertEquals(dec, s.getBigDecimal()); } catch (java.lang.ArithmeticException ex) { try (Scalar s = Scalar.fromDecimal(-dec.scale(), dec.unscaledValue().longValueExact())) { assertEquals(dec, s.getBigDecimal()); assertTrue(s.getType().isBackedByLong()); + } catch (java.lang.ArithmeticException e) { + try (Scalar s = Scalar.fromDecimal(-dec.scale(), dec.unscaledValue())) { + assertEquals(dec, s.getBigDecimal()); + } } } } diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java index 280a4d33ae9..4512a08430c 100644 --- a/java/src/test/java/ai/rapids/cudf/TableTest.java +++ b/java/src/test/java/ai/rapids/cudf/TableTest.java @@ -30,6 +30,8 @@ import ai.rapids.cudf.ast.ColumnReference; import ai.rapids.cudf.ast.CompiledExpression; import ai.rapids.cudf.ast.TableReference; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.parquet.hadoop.ParquetFileReader; @@ -45,11 +47,13 @@ import java.io.FileInputStream; import java.io.IOException; import java.math.BigDecimal; +import java.math.BigInteger; import java.math.RoundingMode; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.util.*; +import java.util.function.Function; import java.util.stream.Collectors; import static ai.rapids.cudf.ColumnWriterOptions.mapColumn; @@ -224,6 +228,10 @@ public static void assertPartialColumnsAreEqual(HostColumnVectorCore expected, l assertEquals(expected.getLong(expectedRow), cv.getLong(tableRow), "Column " + colName + " Row " + tableRow); break; + case DECIMAL128: + assertEquals(expected.getBigDecimal(expectedRow), cv.getBigDecimal(tableRow), + "Column " + colName + " Row " + tableRow); + break; case FLOAT32: assertEqualsWithinPercentage(expected.getFloat(expectedRow), cv.getFloat(tableRow), 0.0001, "Column " + colName + " Row " + tableRow); @@ -3659,6 +3667,97 @@ void testMergeApproxPercentile2() { } } + @Test + void testGroupByMinMaxDecimal() { + try (Table t1 = new Table.TestBuilder() + .column( "1", "1", "1", "1", "2") + .column(0, 1, 3 , 3, 4) + .decimal128Column(-4, RoundingMode.HALF_UP, + new BigInteger("123456789123456789"), + new BigInteger("7979879879879798"), + new BigInteger("17979879879879798"), + new BigInteger("2234563472398472398"), + null) + .build()) { + try (Table result = t1 + .groupBy(GroupByOptions.builder() + .withKeysSorted(true) + .withKeysDescending(false, false) + .build(), 0, 1) + .scan(GroupByScanAggregation.min().onColumn(2), + GroupByScanAggregation.max().onColumn(2)); + Table expected = new Table.TestBuilder() + .column( "1", "1", "1", "1", "2") + .column(0, 1, 3, 3, 4) + .decimal128Column(-4, RoundingMode.HALF_UP, + new BigInteger("123456789123456789"), + new BigInteger("7979879879879798"), + new BigInteger("17979879879879798"), + new BigInteger("17979879879879798"), + null) + .decimal128Column(-4, RoundingMode.HALF_UP, + new BigInteger("123456789123456789"), + new BigInteger("7979879879879798"), + new BigInteger("17979879879879798"), + new BigInteger("2234563472398472398"), + null) + .build()) { + assertTablesAreEqual(expected, result); + } + } + } + + @Test + void testGroupByMinMaxDecimalAgg() { + try (Table t1 = new Table.TestBuilder() + .column(-341142443, 48424546) + .decimal128Column(-2, RoundingMode.HALF_DOWN, + new BigInteger("2978603952268112009"), + new BigInteger("571526248386900094")) + .build()) { + try (Table result = t1 + .groupBy(GroupByOptions.builder() + .build(), 0) + .aggregate(GroupByAggregation.max().onColumn(1)); + Table expected = new Table.TestBuilder() + .column(-341142443, 48424546) + .decimal128Column(-2, RoundingMode.HALF_DOWN, + new BigInteger("2978603952268112009"), + new BigInteger("571526248386900094")) + .build()) { + assertTablesAreEqual(expected, result); + } + } + } + + @Test + void testGroupByCountDecimal() { + try (Table t1 = new Table.TestBuilder() + .column( "1", "1", "1", "1", "2") + .column(0, 1, 3 , 3, 4) + .decimal128Column(-4, RoundingMode.HALF_UP, + new BigInteger("123456789123456789"), + new BigInteger("7979879879879798"), + new BigInteger("17979879879879798"), + new BigInteger("2234563472398472398"), + null) + .build()) { + try (Table result = t1 + .groupBy(GroupByOptions.builder() + .withKeysSorted(true) + .withKeysDescending(false, false) + .build(), 0, 1) + .aggregate(GroupByAggregation.count().onColumn(2)); + Table expected = new Table.TestBuilder() + .column( "1", "1", "1", "2") + .column(0, 1, 3, 4) + .column(1, 1, 2, 0) + .build()) { + assertTablesAreEqual(expected, result); + } + } + } + @Test void testGroupByUniqueCount() { try (Table t1 = new Table.TestBuilder() @@ -3680,6 +3779,33 @@ void testGroupByUniqueCount() { } } + @Test + void testOrderByDecimal() { + try (Table t1 = new Table.TestBuilder() + .column( "1", "1", "1", "1") + .column(0, 1, 3 , 3) + .decimal64Column(4, + 123456L, + 124567L, + 125678L, + 126789L) + .build()) { + try (Table sorted = t1.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); + Table expected = new Table.TestBuilder() + .column( "1", "1", "1", "1") + .column( 0, 1, 3, 3) + .decimal64Column(4, + 123456L, + 124567L, + 125678L, + 126789L) + .build()) { + assertTablesAreEqual(expected, sorted); + + } + } + } + @Test void testGroupByUniqueCountNulls() { try (Table t1 = new Table.TestBuilder() @@ -6707,43 +6833,202 @@ void testTableBasedFilter() { } } - private Table getExpectedFileTable() { - return getExpectedFileTable(false, false); + private enum Columns { + BOOL("BOOL"), + INT("INT"), + BYTE("BYTE"), + LONG("LONG"), + STRING("STRING"), + FLOAT("FLOAT"), + DOUBLE("DOUBLE"), + DECIMAL64("DECIMAL64"), + DECIMAL128("DECIMAL128"), + STRUCT("STRUCT"), + STRUCT_DEC128("STRUCT_DEC128"), + LIST("LIST"), + LIST_STRUCT("LIST_STRUCT"), + LIST_DEC128("LIST_DEC128"); + + final String name; + + Columns(String columnName) { + this.name = columnName; + } + } + + private static class WriteUtils { + + private static final Map> addColumnFn = Maps.newHashMap(); + + static { + addColumnFn.put(Columns.BOOL, (t) -> t.column(true, false, false, true, false)); + addColumnFn.put(Columns.INT, (t) -> t.column(5, 1, 0, 2, 7)); + addColumnFn.put(Columns.LONG, (t) -> t.column(3l, 9l, 4l, 2l, 20l)); + addColumnFn.put(Columns.BYTE, (t) -> t.column(new Byte[]{2, 3, 4, 5, 9})); + addColumnFn.put(Columns.STRING, (t) -> t.column("this", "is", "a", "test", "string")); + addColumnFn.put(Columns.FLOAT, (t) -> t.column(1.0f, 3.5f, 5.9f, 7.1f, 9.8f)); + addColumnFn.put(Columns.DOUBLE, (t) -> t.column(5.0d, 9.5d, 0.9d, 7.23d, 2.8d)); + addColumnFn.put(Columns.DECIMAL64, (t) -> + t.decimal64Column(-5, 1L, 323L, 12398423L, -231312412L, 239893414231L)); + addColumnFn.put(Columns.DECIMAL128, (t) -> + t.decimal128Column(-10, RoundingMode.UNNECESSARY, BigInteger.ONE, BigInteger.ZERO, + BigInteger.TEN, new BigInteger("100000000000000000000000000000"), + new BigInteger("-1234567890123456789012345678"))); + + BasicType dec64Type = new BasicType(true, DType.create(DType.DTypeEnum.DECIMAL64, 0)); + StructType structType = new StructType(true, + new BasicType(true, DType.INT32), new BasicType(true, DType.STRING), dec64Type); + addColumnFn.put(Columns.STRUCT, (t) -> t.column(structType, + struct(1, "k1", BigDecimal.ONE), + struct(2, "k2", BigDecimal.ZERO), + struct(3, "k3", BigDecimal.TEN), + struct(4, "k4", BigDecimal.valueOf(Long.MAX_VALUE)), + new HostColumnVector.StructData((List) null))); + BasicType dec128Type = new BasicType(true, DType.create(DType.DTypeEnum.DECIMAL128, -5)); + addColumnFn.put(Columns.STRUCT_DEC128, (t) -> + t.column(new StructType(false, dec128Type), + struct(BigDecimal.valueOf(Integer.MAX_VALUE, 5)), + struct(BigDecimal.valueOf(Long.MAX_VALUE, 5)), + struct(new BigDecimal("111111111122222222223333333333").setScale(5)), + struct(new BigDecimal("123456789123456789123456789").setScale(5)), + struct((BigDecimal) null))); + + addColumnFn.put(Columns.LIST, (t) -> + t.column(new ListType(false, new BasicType(false, DType.INT32)), + Arrays.asList(1, 2), + Arrays.asList(3, 4), + Arrays.asList(5), + Arrays.asList(6, 7), + Arrays.asList(8, 9, 10))); + addColumnFn.put(Columns.LIST_STRUCT, (t) -> + t.column(new ListType(true, structType), + Arrays.asList(struct(1, "k1", BigDecimal.ONE), struct(2, "k2", BigDecimal.ONE), + struct(3, "k3", BigDecimal.ONE)), + Arrays.asList(struct(4, "k4", BigDecimal.ONE), struct(5, "k5", BigDecimal.ONE)), + Arrays.asList(struct(6, "k6", BigDecimal.ONE)), + Arrays.asList(new HostColumnVector.StructData((List) null)), + (List) null)); + addColumnFn.put(Columns.LIST_DEC128, (t) -> + t.column(new ListType(true, new StructType(false, dec128Type)), + Arrays.asList(struct(BigDecimal.valueOf(Integer.MAX_VALUE, 5)), + struct(BigDecimal.valueOf(Integer.MIN_VALUE, 5))), + Arrays.asList(struct(BigDecimal.valueOf(Long.MAX_VALUE, 5)), + struct(BigDecimal.valueOf(0, 5)), struct(BigDecimal.valueOf(-1, 5))), + Arrays.asList(struct(new BigDecimal("111111111122222222223333333333").setScale(5))), + Arrays.asList(struct(new BigDecimal("123456789123456789123456789").setScale(5))), + Arrays.asList(struct((BigDecimal) null)))); + } + + static TestBuilder addColumn(TestBuilder tb, String colName) { + if (!addColumnFn.containsKey(Columns.valueOf(colName))) { + throw new IllegalArgumentException("Unknown column name: " + colName); + } + return addColumnFn.get(Columns.valueOf(colName)).apply(tb); + } + + static String[] getAllColumns(boolean withDecimal128) { + List columns = Lists.newArrayList( + Columns.BOOL.name, Columns.INT.name, Columns.BYTE.name, Columns.LONG.name, + Columns.STRING.name, Columns.FLOAT.name, Columns.DOUBLE.name, Columns.DECIMAL64.name, + Columns.STRUCT.name, Columns.LIST.name, Columns.LIST_STRUCT.name); + if (withDecimal128) { + columns.add(Columns.DECIMAL128.name); + columns.add(Columns.STRUCT_DEC128.name); + columns.add(Columns.LIST_DEC128.name); + } + String[] ret = new String[columns.size()]; + columns.toArray(ret); + return ret; + } + + static String[] getNonNestedColumns(boolean withDecimal128) { + List columns = Lists.newArrayList( + Columns.BOOL.name, Columns.INT.name, Columns.BYTE.name, Columns.LONG.name, + Columns.STRING.name, Columns.FLOAT.name, Columns.DOUBLE.name, Columns.DECIMAL64.name); + if (withDecimal128) { + columns.add(Columns.DECIMAL128.name); + } + String[] ret = new String[columns.size()]; + columns.toArray(ret); + return ret; + } + + static void buildWriterOptions(ColumnWriterOptions.NestedBuilder builder, List columns) { + for (String colName : columns) { + buildWriterOptions(builder, colName); + } + } + + static void buildWriterOptions(ColumnWriterOptions.NestedBuilder builder, String... columns) { + for (String colName : columns) { + buildWriterOptions(builder, colName); + } + } + + static void buildWriterOptions(ColumnWriterOptions.NestedBuilder builder, String colName) { + switch (Columns.valueOf(colName)) { + case BOOL: + case INT: + case LONG: + case FLOAT: + case DOUBLE: + case BYTE: + case STRING: + builder.withColumns(false, colName); + break; + case DECIMAL64: + builder.withDecimalColumn(colName, DType.DECIMAL64_MAX_PRECISION); + break; + case DECIMAL128: + builder.withDecimalColumn(colName, DType.DECIMAL128_MAX_PRECISION); + break; + case STRUCT: + builder.withStructColumn(structBuilder(colName) + .withNullableColumns("ch_int") + .withNullableColumns("ch_str") + .withDecimalColumn("ch_dec64", DType.DECIMAL64_MAX_PRECISION, true) + .build()); + break; + case LIST: + builder.withListColumn(listBuilder(colName, false) + .withNonNullableColumns("ch_int") + .build()); + break; + case LIST_STRUCT: + builder.withListColumn(listBuilder(colName) + .withStructColumn(structBuilder(colName) + .withNullableColumns("ch_int") + .withNullableColumns("ch_str") + .withDecimalColumn("ch_dec64", DType.DECIMAL64_MAX_PRECISION, true) + .build()) + .build()); + break; + case STRUCT_DEC128: + builder.withStructColumn(structBuilder(colName, false) + .withDecimalColumn("ch_dec128", DType.DECIMAL128_MAX_PRECISION, true) + .build()); + break; + case LIST_DEC128: + builder.withListColumn(listBuilder(colName) + .withStructColumn(structBuilder(colName, false) + .withDecimalColumn("ch_dec128", DType.DECIMAL128_MAX_PRECISION, true) + .build()) + .build()); + break; + default: + throw new IllegalArgumentException("should NOT reach here"); + } + } } - private Table getExpectedFileTable(boolean withNestedColumns) { - return getExpectedFileTable(true, true); + private Table getExpectedFileTable(String... selectColumns) { + return getExpectedFileTable(Lists.newArrayList(selectColumns)); } - private Table getExpectedFileTable(boolean withStructColumns, boolean withListColumn) { - TestBuilder tb = new TestBuilder() - .column(true, false, false, true, false) - .column(5, 1, 0, 2, 7) - .column(new Byte[]{2, 3, 4, 5, 9}) - .column(3l, 9l, 4l, 2l, 20l) - .column("this", "is", "a", "test", "string") - .column(1.0f, 3.5f, 5.9f, 7.1f, 9.8f) - .column(5.0d, 9.5d, 0.9d, 7.23d, 2.8d); - StructType nestedType = new StructType(true, - new BasicType(false, DType.INT32), new BasicType(false, DType.STRING)); - if (withStructColumns) { - tb.column(nestedType, - struct(1, "k1"), struct(2, "k2"), struct(3, "k3"), - struct(4, "k4"), new HostColumnVector.StructData((List) null)); - } - if (withListColumn) { - tb.column(new ListType(false, new BasicType(false, DType.INT32)), - Arrays.asList(1, 2), - Arrays.asList(3, 4), - Arrays.asList(5), - Arrays.asList(6, 7), - Arrays.asList(8, 9, 10)) - .column(new ListType(false, nestedType), - Arrays.asList(struct(1, "k1"), struct(2, "k2"), struct(3, "k3")), - Arrays.asList(struct(4, "k4"), struct(5, "k5")), - Arrays.asList(struct(6, "k6")), - Arrays.asList(new HostColumnVector.StructData((List) null)), - Arrays.asList()); + private Table getExpectedFileTable(List selectColumns) { + TestBuilder tb = new TestBuilder(); + for (String c : selectColumns) { + WriteUtils.addColumn(tb, c); } return tb.build(); } @@ -6865,21 +7150,10 @@ void testParquetWriteMap() throws IOException { @Test void testParquetWriteToBufferChunkedWithNested() { - ParquetWriterOptions options = ParquetWriterOptions.builder() - .withNullableColumns("_c0", "_c1", "_c2", "_c3", "_c4", "_c5", "_c6") - .withStructColumn(structBuilder("_c7") - .withNullableColumns("_c7-1") - .withNullableColumns("_c7-2") - .build()) - .withListColumn(listBuilder("_c8") - .withNullableColumns("c8-1").build()) - .withListColumn(listBuilder("c9") - .withStructColumn(structBuilder("c9-1") - .withNullableColumns("c9-1-1") - .withNullableColumns("c9-1-2").build()) - .build()) - .build(); - try (Table table0 = getExpectedFileTable(true); + ParquetWriterOptions.Builder optBuilder = ParquetWriterOptions.builder(); + WriteUtils.buildWriterOptions(optBuilder, WriteUtils.getAllColumns(false)); + ParquetWriterOptions options = optBuilder.build(); + try (Table table0 = getExpectedFileTable(WriteUtils.getAllColumns(false)); MyBufferConsumer consumer = new MyBufferConsumer()) { try (TableWriter writer = Table.writeParquetChunked(options, consumer)) { writer.write(table0); @@ -6896,20 +7170,18 @@ void testParquetWriteToBufferChunkedWithNested() { @Test void testParquetWriteToBufferChunked() { - ParquetWriterOptions options = ParquetWriterOptions.builder() - .withNullableColumns("_c0", "_c1", "_c2", "_c3", "_c4", "_c5", "_c6") - .withStructColumn(structBuilder("_c7") - .withNullableColumns("_c7-1") - .withNullableColumns("_c7-2") - .build()) - .build(); - try (Table table0 = getExpectedFileTable(true, false); + ParquetWriterOptions.Builder optBuilder = ParquetWriterOptions.builder(); + List columns = Lists.newArrayList(WriteUtils.getNonNestedColumns(false)); + columns.add(Columns.STRUCT.name); + WriteUtils.buildWriterOptions(optBuilder, columns); + ParquetWriterOptions options = optBuilder.build(); + try (Table table0 = getExpectedFileTable(columns); MyBufferConsumer consumer = new MyBufferConsumer()) { - try (TableWriter writer = Table.writeParquetChunked(options, consumer)) { - writer.write(table0); - writer.write(table0); - writer.write(table0); - } + try (TableWriter writer = Table.writeParquetChunked(options, consumer)) { + writer.write(table0); + writer.write(table0); + writer.write(table0); + } try (Table table1 = Table.readParquet(ParquetOptions.DEFAULT, consumer.buffer, 0, consumer.offset); Table concat = Table.concatenate(table0, table0, table0)) { assertTablesAreEqual(concat, table1); @@ -6987,9 +7259,10 @@ void testParquetWriteToFileUncompressedNoStats() throws IOException { @Test void testArrowIPCWriteToFileWithNamesAndMetadata() throws IOException { File tempFile = File.createTempFile("test-names-metadata", ".arrow"); - try (Table table0 = getExpectedFileTable()) { + String[] columnNames = WriteUtils.getNonNestedColumns(false); + try (Table table0 = getExpectedFileTable(columnNames)) { ArrowIPCWriterOptions options = ArrowIPCWriterOptions.builder() - .withColumnNames("first", "second", "third", "fourth", "fifth", "sixth", "seventh") + .withColumnNames(columnNames) .build(); try (TableWriter writer = Table.writeArrowIPCChunked(options, tempFile.getAbsoluteFile())) { writer.write(table0); @@ -7016,13 +7289,18 @@ void testArrowIPCWriteToFileWithNamesAndMetadata() throws IOException { @Test void testArrowIPCWriteToBufferChunked() { - try (Table table0 = getExpectedFileTable(true); + String[] nonNestedCols = WriteUtils.getNonNestedColumns(false); + List columns = Lists.newArrayList(nonNestedCols); + columns.add(Columns.STRUCT.name); + columns.add(Columns.LIST.name); + columns.add(Columns.LIST_STRUCT.name); + try (Table table0 = getExpectedFileTable(columns); MyBufferConsumer consumer = new MyBufferConsumer()) { ArrowIPCWriterOptions options = ArrowIPCWriterOptions.builder() - .withColumnNames("first", "second", "third", "fourth", "fifth", "sixth", "seventh") - .withColumnNames("eighth", "eighth_id", "eighth_name") - .withColumnNames("ninth") - .withColumnNames("tenth", "child_id", "child_name") + .withColumnNames(nonNestedCols) + .withColumnNames(Columns.STRUCT.name, "int", "str", "dec64") + .withColumnNames(Columns.LIST.name) + .withColumnNames(Columns.LIST_STRUCT.name, "int", "str", "dec64") .build(); try (TableWriter writer = Table.writeArrowIPCChunked(options, consumer)) { writer.write(table0); @@ -7049,9 +7327,12 @@ void testArrowIPCWriteToBufferChunked() { @Test void testORCWriteToBufferChunked() { - try (Table table0 = getExpectedFileTable(true); + String[] selectedColumns = WriteUtils.getAllColumns(false); + try (Table table0 = getExpectedFileTable(selectedColumns); MyBufferConsumer consumer = new MyBufferConsumer()) { - ORCWriterOptions opts = createORCWriterOptionsWithNested(); + ORCWriterOptions.Builder builder = ORCWriterOptions.builder(); + WriteUtils.buildWriterOptions(builder, selectedColumns); + ORCWriterOptions opts = builder.build(); try (TableWriter writer = Table.writeORCChunked(opts, consumer)) { writer.write(table0); writer.write(table0); @@ -7067,8 +7348,11 @@ void testORCWriteToBufferChunked() { @Test void testORCWriteToFileChunked() throws IOException { File tempFile = File.createTempFile("test", ".orc"); - try (Table table0 = getExpectedFileTable(true)) { - ORCWriterOptions opts = createORCWriterOptionsWithNested(); + String[] selectedColumns = WriteUtils.getAllColumns(false); + try (Table table0 = getExpectedFileTable(selectedColumns)) { + ORCWriterOptions.Builder builder = ORCWriterOptions.builder(); + WriteUtils.buildWriterOptions(builder, selectedColumns); + ORCWriterOptions opts = builder.build(); try (TableWriter writer = Table.writeORCChunked(opts, tempFile.getAbsoluteFile())) { writer.write(table0); } @@ -7111,7 +7395,7 @@ void testORCWriteMapChunked() throws IOException { @Test void testORCWriteToFile() throws IOException { File tempFile = File.createTempFile("test", ".orc"); - try (Table table0 = getExpectedFileTable()) { + try (Table table0 = getExpectedFileTable(WriteUtils.getNonNestedColumns(false))) { table0.writeORC(tempFile.getAbsoluteFile()); try (Table table1 = Table.readORC(tempFile.getAbsoluteFile())) { assertTablesAreEqual(table0, table1); @@ -7124,12 +7408,11 @@ void testORCWriteToFile() throws IOException { @Test void testORCWriteToFileWithColNames() throws IOException { File tempFile = File.createTempFile("test", ".orc"); - final String[] colNames = new String[]{"bool", "int", "byte","long","str","float","double"}; - try (Table table0 = getExpectedFileTable()) { - ORCWriterOptions options = ORCWriterOptions.builder() - .withColumns(true, colNames) - .withMetadata("somekey", "somevalue") - .build(); + String[] colNames = WriteUtils.getNonNestedColumns(false); + try (Table table0 = getExpectedFileTable(colNames)) { + ORCWriterOptions.Builder optBuilder = ORCWriterOptions.builder(); + WriteUtils.buildWriterOptions(optBuilder, colNames); + ORCWriterOptions options = optBuilder.build(); table0.writeORC(options, tempFile.getAbsoluteFile()); ORCOptions opts = ORCOptions.builder().includeColumn(colNames).build(); try (Table table1 = Table.readORC(opts, tempFile.getAbsoluteFile())) { @@ -7140,10 +7423,34 @@ void testORCWriteToFileWithColNames() throws IOException { } } + @Test + void testORCReadAndWriteForDecimal128() throws IOException { + File tempFile = File.createTempFile("test", ".orc"); + String[] colNames = new String[]{Columns.DECIMAL64.name, + Columns.DECIMAL128.name, Columns.STRUCT_DEC128.name, Columns.LIST_DEC128.name}; + try (Table table0 = getExpectedFileTable(colNames)) { + ORCWriterOptions.Builder optBuilder = ORCWriterOptions.builder(); + WriteUtils.buildWriterOptions(optBuilder, colNames); + ORCWriterOptions options = optBuilder.build(); + table0.writeORC(options, tempFile.getAbsoluteFile()); + ORCOptions opts = ORCOptions.builder() + .includeColumn(colNames) + .decimal128Column(Columns.DECIMAL128.name, + String.format("%s.%s", Columns.STRUCT_DEC128.name, "ch_dec128"), + String.format("%s.1.%s", Columns.LIST_DEC128.name, "ch_dec128")) + .build(); + try (Table table1 = Table.readORC(opts, tempFile.getAbsoluteFile())) { + assertTablesAreEqual(table0, table1); + } + } finally { + tempFile.delete(); + } + } + @Test void testORCWriteToFileUncompressed() throws IOException { File tempFileUncompressed = File.createTempFile("test-uncompressed", ".orc"); - try (Table table0 = getExpectedFileTable()) { + try (Table table0 = getExpectedFileTable(WriteUtils.getNonNestedColumns(false))) { String[] colNames = new String[table0.getNumberOfColumns()]; Arrays.fill(colNames, ""); ORCWriterOptions opts = ORCWriterOptions.builder() @@ -7249,27 +7556,7 @@ void fixedWidthRowsRoundTrip() { // utility methods to reduce typing - private ORCWriterOptions createORCWriterOptionsWithNested() { - // The column metadata should match the table returned from - // 'getExpectedFileTable(true)'. - return ORCWriterOptions.builder() - .withNullableColumns("_c0", "_c1", "_c2", "_c3", "_c4", "_c5", "_c6") - .withStructColumn(structBuilder("_c7") - .withNullableColumns("_c7-1") - .withNullableColumns("_c7-2") - .build()) - .withListColumn(listBuilder("_c8") - .withNullableColumns("_c8-1").build()) - .withListColumn(listBuilder("_c9") - .withStructColumn(structBuilder("_c9-1") - .withNullableColumns("_c9-1-1") - .withNullableColumns("_c9-1-2") - .build()) - .build()) - .build(); - } - - private StructData struct(Object... values) { + private static StructData struct(Object... values) { return new StructData(values); }