From a9af8b6da6e2694f6e5825fc53e1084895980032 Mon Sep 17 00:00:00 2001 From: sperlingxx Date: Mon, 21 Feb 2022 19:08:28 +0800 Subject: [PATCH] support appending Decimal128 in terms of byte array --- .../java/ai/rapids/cudf/HostColumnVector.java | 17 ++++++++++++++ .../ai/rapids/cudf/ColumnBuilderHelper.java | 12 ++++++++++ .../rapids/cudf/DecimalColumnVectorTest.java | 22 +++++++++++++++++-- 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/HostColumnVector.java b/java/src/main/java/ai/rapids/cudf/HostColumnVector.java index 3abc6db385d..9392bb5c336 100644 --- a/java/src/main/java/ai/rapids/cudf/HostColumnVector.java +++ b/java/src/main/java/ai/rapids/cudf/HostColumnVector.java @@ -1344,6 +1344,23 @@ public ColumnBuilder appendUTF8String(byte[] value, int srcOffset, int length) { return this; } + /** + * Accepts a byte array containing the two's-complement representation of the unscaled value, which + * is in big-endian byte-order. Then, transforms it into the representation of cuDF Decimal128 for + * appending. + * This method is more efficient than `append(BigInteger unscaledVal)` if we can directly access the + * two's-complement representation of a BigDecimal without encoding via the method `toByteArray`. + */ + public ColumnBuilder appendDecimal128(byte[] binary) { + growFixedWidthBuffersAndRows(); + assert type.getTypeId().equals(DType.DTypeEnum.DECIMAL128); + assert currentIndex < rows; + assert binary.length <= type.getSizeInBytes(); + byte[] cuBinary = convertDecimal128FromJavaToCudf(binary); + data.setBytes(currentIndex++ << bitShiftBySize, cuBinary, 0, cuBinary.length); + return this; + } + public ColumnBuilder getChild(int index) { return childBuilders.get(index); } diff --git a/java/src/test/java/ai/rapids/cudf/ColumnBuilderHelper.java b/java/src/test/java/ai/rapids/cudf/ColumnBuilderHelper.java index 263244b2413..679386c00b6 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnBuilderHelper.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnBuilderHelper.java @@ -18,6 +18,7 @@ package ai.rapids.cudf; import java.math.BigDecimal; +import java.math.BigInteger; import java.math.RoundingMode; import java.util.Arrays; import java.util.Comparator; @@ -49,6 +50,17 @@ public static ColumnVector buildOnDevice( } } + public static HostColumnVector decimalFromBigInts(int scale, BigInteger... values) { + return ColumnBuilderHelper.build( + new HostColumnVector.BasicType(true, DType.create(DType.DTypeEnum.DECIMAL128, -scale)), + values.length, + (b) -> { + for (BigInteger v : values) + if (v == null) b.appendNull(); + else b.appendDecimal128(v.toByteArray()); + }); + } + public static HostColumnVector fromBoxedBytes(boolean signed, Byte... values) { DType dt = signed ? DType.INT8 : DType.UINT8; return ColumnBuilderHelper.build( diff --git a/java/src/test/java/ai/rapids/cudf/DecimalColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/DecimalColumnVectorTest.java index 994066c5df0..87d01c6a4e0 100644 --- a/java/src/test/java/ai/rapids/cudf/DecimalColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/DecimalColumnVectorTest.java @@ -42,6 +42,7 @@ public class DecimalColumnVectorTest extends CudfTestBase { private static final BigDecimal[] decimal128Zoo = new BigDecimal[20]; private static final int[] unscaledDec32Zoo = new int[decimal32Zoo.length]; private static final long[] unscaledDec64Zoo = new long[decimal64Zoo.length]; + private static final BigInteger[] unscaledDec128Zoo = new BigInteger[decimal128Zoo.length]; private final BigDecimal[] boundaryDecimal32 = new BigDecimal[]{ new BigDecimal("999999999"), new BigDecimal("-999999999")}; @@ -67,6 +68,7 @@ public static void setup() { for (int i = 0; i < decimal32Zoo.length; i++) { unscaledDec32Zoo[i] = rdSeed.nextInt() / 100; unscaledDec64Zoo[i] = rdSeed.nextLong() / 100; + unscaledDec128Zoo[i] = BigInteger.valueOf(rdSeed.nextLong()).multiply(BigInteger.valueOf(rdSeed.nextLong())); if (rdSeed.nextBoolean()) { // Create BigDecimal with slight variance on scale, in order to test building cv from inputs with different scales. decimal32Zoo[i] = BigDecimal.valueOf(rdSeed.nextInt() / 100, dec32Scale - rdSeed.nextInt(2)); @@ -245,7 +247,7 @@ private static void testDecimalImpl(DType.DTypeEnum decimalType, int scale, BigD } @Test - private void testDecimalFromInts() { + public void testDecimalFromInts() { try (ColumnVector cv = ColumnVector.decimalFromInts(-DecimalColumnVectorTest.dec32Scale, DecimalColumnVectorTest.unscaledDec32Zoo)) { try (HostColumnVector hcv = cv.copyToHost()) { for (int i = 0; i < DecimalColumnVectorTest.unscaledDec32Zoo.length; i++) { @@ -257,7 +259,7 @@ private void testDecimalFromInts() { } @Test - private static void testDecimalFromLongs() { + public void testDecimalFromLongs() { try (ColumnVector cv = ColumnVector.decimalFromLongs(-DecimalColumnVectorTest.dec64Scale, DecimalColumnVectorTest.unscaledDec64Zoo)) { try (HostColumnVector hcv = cv.copyToHost()) { for (int i = 0; i < DecimalColumnVectorTest.unscaledDec64Zoo.length; i++) { @@ -268,6 +270,22 @@ private static void testDecimalFromLongs() { } } + @Test + public void testDecimalFromBigInts() { + try (ColumnVector cv = ColumnVector.decimalFromBigInt(-DecimalColumnVectorTest.dec128Scale, DecimalColumnVectorTest.unscaledDec128Zoo)) { + try (HostColumnVector hcv = cv.copyToHost()) { + for (int i = 0; i < DecimalColumnVectorTest.unscaledDec128Zoo.length; i++) { + assertEquals(DecimalColumnVectorTest.unscaledDec128Zoo[i], hcv.getBigDecimal(i).unscaledValue()); + } + } + } + try (HostColumnVector hcv = ColumnBuilderHelper.decimalFromBigInts(-DecimalColumnVectorTest.dec128Scale, DecimalColumnVectorTest.unscaledDec128Zoo)) { + for (int i = 0; i < DecimalColumnVectorTest.unscaledDec128Zoo.length; i++) { + assertEquals(DecimalColumnVectorTest.unscaledDec128Zoo[i], hcv.getBigDecimal(i).unscaledValue()); + } + } + } + @Test public void testDecimalFromDoubles() { DType dt = DType.create(DType.DTypeEnum.DECIMAL32, -3);