Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "[REVIEW] Support fixed-point decimal for HostColumnVector [skip ci]" #6681

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
- PR #6622 Update `to_pandas` api docs
- PR #6623 Add operator overloading to column and clean up error messages
- PR #6635 Add cudf::test::dictionary_column_wrapper class
- PR #6609 Support fixed-point decimal for HostColumnVector

## Bug Fixes

Expand Down
50 changes: 0 additions & 50 deletions java/src/main/java/ai/rapids/cudf/ColumnVector.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
Expand Down Expand Up @@ -3777,41 +3775,6 @@ public static ColumnVector timestampNanoSecondsFromLongs(long... values) {
return build(DType.TIMESTAMP_NANOSECONDS, values.length, (b) -> b.appendArray(values));
}

/**
* Create a new decimal vector from unscaled values (int array) and scale.
* The created vector is of type DType.DECIMAL32, whose max precision is 9.
* Compared with scale of [[java.math.BigDecimal]], the scale here represents the opposite meaning.
*/
public static ColumnVector decimalFromInts(int scale, int... values) {
try (HostColumnVector host = HostColumnVector.decimalFromInts(scale, values)) {
return host.copyToDevice();
}
}

/**
* Create a new decimal vector from unscaled values (long array) and scale.
* The created vector is of type DType.DECIMAL64, whose max precision is 18.
* Compared with scale of [[java.math.BigDecimal]], the scale here represents the opposite meaning.
*/
public static ColumnVector decimalFromLongs(int scale, long... values) {
try (HostColumnVector host = HostColumnVector.decimalFromLongs(scale, values)) {
return host.copyToDevice();
}
}

/**
* Create a new decimal vector from double floats with specific DecimalType and RoundingMode.
* All doubles will be rescaled if necessary, according to scale of input DecimalType and RoundingMode.
* If any overflow occurs in extracting integral part, an IllegalArgumentException will be thrown.
* This API is inefficient because of slow double -> decimal conversion, so it is mainly for testing.
* Compared with scale of [[java.math.BigDecimal]], the scale here represents the opposite meaning.
*/
public static ColumnVector decimalFromDoubles(DType type, RoundingMode mode, double... values) {
try (HostColumnVector host = HostColumnVector.decimalFromDoubles(type, mode, values)) {
return host.copyToDevice();
}
}

/**
* Create a new string vector from the given values. This API
* supports inline nulls. This is really intended to be used only for testing as
Expand All @@ -3823,19 +3786,6 @@ public static ColumnVector fromStrings(String... values) {
}
}

/**
* Create a new vector from the given values. This API supports inline nulls,
* but is much slower than building from primitive array of unscaledValues.
* Notice:
* 1. All input BigDecimals should share same scale.
* 2. The scale will be zero if all input values are null.
*/
public static ColumnVector fromDecimals(BigDecimal... values) {
try (HostColumnVector hcv = HostColumnVector.fromDecimals(values)) {
return hcv.copyToDevice();
}
}

/**
* Create a new vector from the given values. This API supports inline nulls,
* but is much slower than using a regular array and should really only be used
Expand Down
26 changes: 9 additions & 17 deletions java/src/main/java/ai/rapids/cudf/DType.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@

public final class DType {

public static final int DECIMAL32_MAX_PRECISION = 9;
public static final int DECIMAL64_MAX_PRECISION = 18;
public static final int DECIMAL32_MAX_PRECISION = 10;
public static final int DECIMAL64_MAX_PRECISION = 19;

/* enum representing various types. Whenever a new non-decimal type is added please make sure
below sections are updated as well:
Expand Down Expand Up @@ -92,8 +92,6 @@ public enum DTypeEnum {
}

public int getNativeId() { return nativeId; }

public boolean isDecimalType() { return DType.DECIMALS.contains(this); }
}

final DTypeEnum typeId;
Expand Down Expand Up @@ -233,7 +231,7 @@ public String toString() {
* @return DType
*/
public static DType create(DTypeEnum dt) {
if (DType.DECIMALS.contains(dt)) {
if (dt == DTypeEnum.DECIMAL32 || dt == DTypeEnum.DECIMAL64) {
throw new IllegalArgumentException("Could not create a Decimal DType without scale");
}
return DType.fromNative(dt.nativeId, 0);
Expand All @@ -249,7 +247,7 @@ public static DType create(DTypeEnum dt) {
* @return DType
*/
public static DType create(DTypeEnum dt, int scale) {
if (!DType.DECIMALS.contains(dt)) {
if (dt != DTypeEnum.DECIMAL32 && dt != DTypeEnum.DECIMAL64) {
throw new IllegalArgumentException("Could not create a non-Decimal DType with scale");
}
return DType.fromNative(dt.nativeId, scale);
Expand Down Expand Up @@ -321,8 +319,7 @@ public boolean hasTimeResolution() {
* DType.INT32,
* DType.UINT32,
* DType.DURATION_DAYS,
* DType.TIMESTAMP_DAYS,
* DType.DECIMAL32
* DType.TIMESTAMP_DAYS
*/
public boolean isBackedByInt() {
return INTS.contains(this.typeId);
Expand All @@ -340,8 +337,7 @@ public boolean isBackedByInt() {
* DType.TIMESTAMP_SECONDS,
* DType.TIMESTAMP_MILLISECONDS,
* DType.TIMESTAMP_MICROSECONDS,
* DType.TIMESTAMP_NANOSECONDS,
* DType.DECIMAL64
* DType.TIMESTAMP_NANOSECONDS
*/
public boolean isBackedByLong() {
return LONGS.contains(this.typeId);
Expand Down Expand Up @@ -370,7 +366,7 @@ public boolean isBackedByLong() {
* DType.DECIMAL32,
* DType.DECIMAL64
*/
public boolean isDecimalType() { return this.typeId.isDecimalType(); }
public boolean isDecimalType() { return DECIMALS.contains(this.typeId); }

/**
* Returns true for duration types
Expand Down Expand Up @@ -426,18 +422,14 @@ public boolean isTimestampType() {
DTypeEnum.TIMESTAMP_SECONDS,
DTypeEnum.TIMESTAMP_MILLISECONDS,
DTypeEnum.TIMESTAMP_MICROSECONDS,
DTypeEnum.TIMESTAMP_NANOSECONDS,
// The unscaledValue of DECIMAL64 is of type INT64, which means it can be fetched by getLong.
DTypeEnum.DECIMAL64
DTypeEnum.TIMESTAMP_NANOSECONDS
);

private static final EnumSet<DTypeEnum> INTS = EnumSet.of(
DTypeEnum.INT32,
DTypeEnum.UINT32,
DTypeEnum.DURATION_DAYS,
DTypeEnum.TIMESTAMP_DAYS,
// The unscaledValue of DECIMAL32 is of type INT32, which means it can be fetched by getInt.
DTypeEnum.DECIMAL32
DTypeEnum.TIMESTAMP_DAYS
);

private static final EnumSet<DTypeEnum> SHORTS = EnumSet.of(
Expand Down
178 changes: 0 additions & 178 deletions java/src/main/java/ai/rapids/cudf/HostColumnVector.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,9 @@

package ai.rapids.cudf;

import java.math.BigDecimal;
import java.math.BigInteger;
import java.math.RoundingMode;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.StringJoiner;
import java.util.function.Consumer;
Expand Down Expand Up @@ -447,50 +441,6 @@ public static HostColumnVector timestampNanoSecondsFromLongs(long... values) {
return build(DType.TIMESTAMP_NANOSECONDS, values.length, (b) -> b.appendArray(values));
}

/**
* Create a new decimal vector from unscaled values (int array) and scale.
* The created vector is of type DType.DECIMAL32, whose max precision is 9.
* Compared with scale of [[java.math.BigDecimal]], the scale here represents the opposite meaning.
*/
public static HostColumnVector decimalFromInts(int scale, int... values) {
return build(DType.create(DType.DTypeEnum.DECIMAL32, scale), values.length, (b) -> b.appendUnscaledDecimalArray(values));
}

/**
* Create a new decimal vector from unscaled values (long array) and scale.
* The created vector is of type DType.DECIMAL64, whose max precision is 18.
* Compared with scale of [[java.math.BigDecimal]], the scale here represents the opposite meaning.
*/
public static HostColumnVector decimalFromLongs(int scale, long... values) {
return build(DType.create(DType.DTypeEnum.DECIMAL64, scale), values.length, (b) -> b.appendUnscaledDecimalArray(values));
}

/**
* Create a new decimal vector from double floats with specific DecimalType and RoundingMode.
* All doubles will be rescaled if necessary, according to scale of input DecimalType and RoundingMode.
* If any overflow occurs in extracting integral part, an IllegalArgumentException will be thrown.
* This API is inefficient because of slow double -> decimal conversion, so it is mainly for testing.
* Compared with scale of [[java.math.BigDecimal]], the scale here represents the opposite meaning.
*/
public static HostColumnVector decimalFromDoubles(DType type, RoundingMode mode, double... values) {
assert type.isDecimalType();
if (type.typeId == DType.DTypeEnum.DECIMAL64) {
long[] data = new long[values.length];
for (int i = 0; i < values.length; i++) {
BigDecimal dec = BigDecimal.valueOf(values[i]).setScale(-type.getScale(), mode);
data[i] = dec.unscaledValue().longValueExact();
}
return build(type, values.length, (b) -> b.appendUnscaledDecimalArray(data));
} else {
int[] data = new int[values.length];
for (int i = 0; i < values.length; i++) {
BigDecimal dec = BigDecimal.valueOf(values[i]).setScale(-type.getScale(), mode);
data[i] = dec.unscaledValue().intValueExact();
}
return build(type, values.length, (b) -> b.appendUnscaledDecimalArray(data));
}
}

/**
* Create a new string vector from the given values. This API
* supports inline nulls. This is really intended to be used only for testing as
Expand Down Expand Up @@ -518,30 +468,6 @@ public static HostColumnVector fromStrings(String... values) {
});
}

/**
* Create a new vector from the given values. This API supports inline nulls,
* but is much slower than building from primitive array of unscaledValues.
* Notice:
* 1. Input values will be rescaled with min scale (max scale in terms of java.math.BigDecimal),
* which avoids potential precision loss due to rounding. But there exists risk of precision overflow.
* 2. The scale will be zero if all input values are null.
*/
public static HostColumnVector fromDecimals(BigDecimal... values) {
// 1. Fetch the element with max precision (maxDec). Fill with ZERO if inputs is empty.
// 2. Fetch the max scale. Fill with ZERO if inputs is empty.
// 3. Rescale the maxDec with the max scale, so to come out the max precision capacity we need.
BigDecimal maxDec = Arrays.stream(values).filter(Objects::nonNull)
.max(Comparator.comparingInt(BigDecimal::precision))
.orElse(BigDecimal.ZERO);
int maxScale = Arrays.stream(values)
.map(decimal -> (decimal == null) ? 0 : decimal.scale())
.max(Comparator.naturalOrder())
.orElse(0);
maxDec = maxDec.setScale(maxScale, RoundingMode.UNNECESSARY);

return build(DType.fromJavaBigDecimal(maxDec), values.length, (b) -> b.appendBoxed(values));
}

/**
* Create a new vector from the given values. This API supports inline nulls,
* but is much slower than using a regular array and should really only be used
Expand Down Expand Up @@ -977,8 +903,6 @@ private void appendChildOrNull(ColumnBuilder childBuilder, Object listElement) {
childBuilder.append((Byte) listElement);
} else if (listElement instanceof Short) {
childBuilder.append((Short) listElement);
} else if (listElement instanceof BigDecimal) {
childBuilder.append((BigDecimal) listElement);
} else if (listElement instanceof List) {
childBuilder.append((List) listElement);
} else if (listElement instanceof StructData) {
Expand Down Expand Up @@ -1068,23 +992,6 @@ public final ColumnBuilder append(boolean value) {
return this;
}

public final ColumnBuilder append(BigDecimal value) {
growBuffersAndRows(false, currentIndex * type.getSizeInBytes() + type.getSizeInBytes());
assert currentIndex < rows;
// Rescale input decimal with UNNECESSARY policy, which accepts no precision loss.
BigInteger unscaledVal = value.setScale(-type.getScale(), RoundingMode.UNNECESSARY).unscaledValue();
if (type.typeId == DType.DTypeEnum.DECIMAL32) {
data.setInt(currentIndex * type.getSizeInBytes(), unscaledVal.intValueExact());
} else if (type.typeId == DType.DTypeEnum.DECIMAL64) {
data.setLong(currentIndex * type.getSizeInBytes(), unscaledVal.longValueExact());
} else {
throw new IllegalStateException(type + " is not a supported decimal type.");
}
currentIndex++;
currentByteIndex += type.getSizeInBytes();
return this;
}

public ColumnBuilder append(String value) {
assert value != null : "appendNull must be used to append null strings";
return appendUTF8String(value.getBytes(StandardCharsets.UTF_8));
Expand Down Expand Up @@ -1280,57 +1187,6 @@ public final Builder append(double value) {
return this;
}

/**
* Append java.math.BigDecimal into HostColumnVector with UNNECESSARY RoundingMode.
* Input decimal should have a larger scale than column vector.Otherwise, an ArithmeticException will be thrown while rescaling.
* If unscaledValue after rescaling exceeds the max precision of rapids type,
* an ArithmeticException will be thrown while extracting integral.
*
* @param value BigDecimal value to be appended
*/
public final Builder append(BigDecimal value) {
return append(value, RoundingMode.UNNECESSARY);
}

/**
* Append java.math.BigDecimal into HostColumnVector with user-defined RoundingMode.
* Input decimal will be rescaled according to scale of column type and RoundingMode before appended.
* If unscaledValue after rescaling exceeds the max precision of rapids type, an ArithmeticException will be thrown.
*
* @param value BigDecimal value to be appended
* @param roundingMode rounding mode determines rescaling behavior
*/
public final Builder append(BigDecimal value, RoundingMode roundingMode) {
assert type.isDecimalType();
assert currentIndex < rows;
BigInteger unscaledValue = value.setScale(-type.getScale(), roundingMode).unscaledValue();
if (type.typeId == DType.DTypeEnum.DECIMAL32) {
data.setInt(currentIndex * type.getSizeInBytes(), unscaledValue.intValueExact());
} else if (type.typeId == DType.DTypeEnum.DECIMAL64) {
data.setLong(currentIndex * type.getSizeInBytes(), unscaledValue.longValueExact());
} else {
throw new IllegalStateException(type + " is not a supported decimal type.");
}
currentIndex++;
return this;
}

public final Builder appendUnscaledDecimal(int value) {
assert type.typeId == DType.DTypeEnum.DECIMAL32;
assert currentIndex < rows;
data.setInt(currentIndex * type.getSizeInBytes(), value);
currentIndex++;
return this;
}

public final Builder appendUnscaledDecimal(long value) {
assert type.typeId == DType.DTypeEnum.DECIMAL64;
assert currentIndex < rows;
data.setLong(currentIndex * type.getSizeInBytes(), value);
currentIndex++;
return this;
}

public Builder append(String value) {
assert value != null : "appendNull must be used to append null strings";
return appendUTF8String(value.getBytes(StandardCharsets.UTF_8));
Expand Down Expand Up @@ -1427,40 +1283,6 @@ public Builder appendArray(double... values) {
return this;
}

public Builder appendUnscaledDecimalArray(int... values) {
assert type.typeId == DType.DTypeEnum.DECIMAL32;
assert (values.length + currentIndex) <= rows;
data.setInts(currentIndex * type.getSizeInBytes(), values, 0, values.length);
currentIndex += values.length;
return this;
}

public Builder appendUnscaledDecimalArray(long... values) {
assert type.typeId == DType.DTypeEnum.DECIMAL64;
assert (values.length + currentIndex) <= rows;
data.setLongs(currentIndex * type.getSizeInBytes(), values, 0, values.length);
currentIndex += values.length;
return this;
}

/**
* Append multiple values. This is very slow and should really only be used for tests.
* @param values the values to append, including nulls.
* @return this for chaining.
* @throws {@link IndexOutOfBoundsException}
*/
public Builder appendBoxed(BigDecimal... values) throws IndexOutOfBoundsException {
assert type.isDecimalType();
for (BigDecimal v : values) {
if (v == null) {
appendNull();
} else {
append(v);
}
}
return this;
}

/**
* Append multiple values. This is very slow and should really only be used for tests.
* @param values the values to append, including nulls.
Expand Down
Loading