rapidsai · sperlingxx · Nov 5, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -53,7 +53,6 @@
 - PR #6622 Update `to_pandas` api docs
 - PR #6623 Add operator overloading to column and clean up error messages
 - PR #6635 Add cudf::test::dictionary_column_wrapper class
-- PR #6609 Support fixed-point decimal for HostColumnVector
 
 ## Bug Fixes
 

@@ -24,8 +24,6 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.math.BigDecimal;
-import java.math.RoundingMode;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Optional;
@@ -3777,41 +3775,6 @@ public static ColumnVector timestampNanoSecondsFromLongs(long... values) {
     return build(DType.TIMESTAMP_NANOSECONDS, values.length, (b) -> b.appendArray(values));
   }
 
-  /**
-   * Create a new decimal vector from unscaled values (int array) and scale.
-   * The created vector is of type DType.DECIMAL32, whose max precision is 9.
-   * Compared with scale of [[java.math.BigDecimal]], the scale here represents the opposite meaning.
-   */
-  public static ColumnVector decimalFromInts(int scale, int... values) {
-    try (HostColumnVector host = HostColumnVector.decimalFromInts(scale, values)) {
-      return host.copyToDevice();
-    }
-  }
-
-  /**
-   * Create a new decimal vector from unscaled values (long array) and scale.
-   * The created vector is of type DType.DECIMAL64, whose max precision is 18.
-   * Compared with scale of [[java.math.BigDecimal]], the scale here represents the opposite meaning.
-   */
-  public static ColumnVector decimalFromLongs(int scale, long... values) {
-    try (HostColumnVector host = HostColumnVector.decimalFromLongs(scale, values)) {
-      return host.copyToDevice();
-    }
-  }
-
-  /**
-   * Create a new decimal vector from double floats with specific DecimalType and RoundingMode.
-   * All doubles will be rescaled if necessary, according to scale of input DecimalType and RoundingMode.
-   * If any overflow occurs in extracting integral part, an IllegalArgumentException will be thrown.
-   * This API is inefficient because of slow double -> decimal conversion, so it is mainly for testing.
-   * Compared with scale of [[java.math.BigDecimal]], the scale here represents the opposite meaning.
-   */
-  public static ColumnVector decimalFromDoubles(DType type, RoundingMode mode, double... values) {
-    try (HostColumnVector host = HostColumnVector.decimalFromDoubles(type, mode, values)) {
-      return host.copyToDevice();
-    }
-  }
-
   /**
    * Create a new string vector from the given values.  This API
    * supports inline nulls. This is really intended to be used only for testing as
@@ -3823,19 +3786,6 @@ public static ColumnVector fromStrings(String... values) {
     }
   }
 
-  /**
-   * Create a new vector from the given values.  This API supports inline nulls,
-   * but is much slower than building from primitive array of unscaledValues.
-   * Notice:
-   *  1. All input BigDecimals should share same scale.
-   *  2. The scale will be zero if all input values are null.
-   */
-  public static ColumnVector fromDecimals(BigDecimal... values) {
-    try (HostColumnVector hcv = HostColumnVector.fromDecimals(values)) {
-      return hcv.copyToDevice();
-    }
-  }
-
   /**
    * Create a new vector from the given values.  This API supports inline nulls,
    * but is much slower than using a regular array and should really only be used

@@ -21,8 +21,8 @@
 
 public final class DType {
 
-  public static final int DECIMAL32_MAX_PRECISION = 9;
-  public static final int DECIMAL64_MAX_PRECISION = 18;
+  public static final int DECIMAL32_MAX_PRECISION = 10;
+  public static final int DECIMAL64_MAX_PRECISION = 19;
 
   /* enum representing various types. Whenever a new non-decimal type is added please make sure
   below sections are updated as well:
@@ -92,8 +92,6 @@ public enum DTypeEnum {
     }
 
     public int getNativeId() { return nativeId; }
-
-    public boolean isDecimalType() { return DType.DECIMALS.contains(this); }
   }
 
   final DTypeEnum typeId;
@@ -233,7 +231,7 @@ public String toString() {
    * @return DType
    */
   public static DType create(DTypeEnum dt) {
-    if (DType.DECIMALS.contains(dt)) {
+    if (dt == DTypeEnum.DECIMAL32 || dt == DTypeEnum.DECIMAL64) {
       throw new IllegalArgumentException("Could not create a Decimal DType without scale");
     }
     return DType.fromNative(dt.nativeId, 0);
@@ -249,7 +247,7 @@ public static DType create(DTypeEnum dt) {
    * @return DType
    */
   public static DType create(DTypeEnum dt, int scale) {
-    if (!DType.DECIMALS.contains(dt)) {
+    if (dt != DTypeEnum.DECIMAL32 && dt != DTypeEnum.DECIMAL64) {
       throw new IllegalArgumentException("Could not create a non-Decimal DType with scale");
     }
     return DType.fromNative(dt.nativeId, scale);
@@ -321,8 +319,7 @@ public boolean hasTimeResolution() {
    *       DType.INT32,
    *       DType.UINT32,
    *       DType.DURATION_DAYS,
-   *       DType.TIMESTAMP_DAYS,
-   *       DType.DECIMAL32
+   *       DType.TIMESTAMP_DAYS
    */
   public boolean isBackedByInt() {
     return INTS.contains(this.typeId);
@@ -340,8 +337,7 @@ public boolean isBackedByInt() {
    *       DType.TIMESTAMP_SECONDS,
    *       DType.TIMESTAMP_MILLISECONDS,
    *       DType.TIMESTAMP_MICROSECONDS,
-   *       DType.TIMESTAMP_NANOSECONDS,
-   *       DType.DECIMAL64
+   *       DType.TIMESTAMP_NANOSECONDS
    */
   public boolean isBackedByLong() {
     return LONGS.contains(this.typeId);
@@ -370,7 +366,7 @@ public boolean isBackedByLong() {
    *       DType.DECIMAL32,
    *       DType.DECIMAL64
    */
-  public boolean isDecimalType() { return this.typeId.isDecimalType(); }
+  public boolean isDecimalType() { return DECIMALS.contains(this.typeId); }
 
   /**
    * Returns true for duration types
@@ -426,18 +422,14 @@ public boolean isTimestampType() {
       DTypeEnum.TIMESTAMP_SECONDS,
       DTypeEnum.TIMESTAMP_MILLISECONDS,
       DTypeEnum.TIMESTAMP_MICROSECONDS,
-      DTypeEnum.TIMESTAMP_NANOSECONDS,
-      // The unscaledValue of DECIMAL64 is of type INT64, which means it can be fetched by getLong.
-      DTypeEnum.DECIMAL64
+      DTypeEnum.TIMESTAMP_NANOSECONDS
   );
 
   private static final EnumSet<DTypeEnum> INTS = EnumSet.of(
       DTypeEnum.INT32,
       DTypeEnum.UINT32,
       DTypeEnum.DURATION_DAYS,
-      DTypeEnum.TIMESTAMP_DAYS,
-      // The unscaledValue of DECIMAL32 is of type INT32, which means it can be fetched by getInt.
-      DTypeEnum.DECIMAL32
+      DTypeEnum.TIMESTAMP_DAYS
   );
 
   private static final EnumSet<DTypeEnum> SHORTS = EnumSet.of(

@@ -18,15 +18,9 @@
 
 package ai.rapids.cudf;
 
-import java.math.BigDecimal;
-import java.math.BigInteger;
-import java.math.RoundingMode;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Comparator;
 import java.util.List;
-import java.util.Objects;
 import java.util.Optional;
 import java.util.StringJoiner;
 import java.util.function.Consumer;
@@ -447,50 +441,6 @@ public static HostColumnVector timestampNanoSecondsFromLongs(long... values) {
     return build(DType.TIMESTAMP_NANOSECONDS, values.length, (b) -> b.appendArray(values));
   }
 
-  /**
-   * Create a new decimal vector from unscaled values (int array) and scale.
-   * The created vector is of type DType.DECIMAL32, whose max precision is 9.
-   * Compared with scale of [[java.math.BigDecimal]], the scale here represents the opposite meaning.
-   */
-  public static HostColumnVector decimalFromInts(int scale, int... values) {
-    return build(DType.create(DType.DTypeEnum.DECIMAL32, scale), values.length, (b) -> b.appendUnscaledDecimalArray(values));
-  }
-
-  /**
-   * Create a new decimal vector from unscaled values (long array) and scale.
-   * The created vector is of type DType.DECIMAL64, whose max precision is 18.
-   * Compared with scale of [[java.math.BigDecimal]], the scale here represents the opposite meaning.
-   */
-  public static HostColumnVector decimalFromLongs(int scale, long... values) {
-    return build(DType.create(DType.DTypeEnum.DECIMAL64, scale), values.length, (b) -> b.appendUnscaledDecimalArray(values));
-  }
-
-  /**
-   * Create a new decimal vector from double floats with specific DecimalType and RoundingMode.
-   * All doubles will be rescaled if necessary, according to scale of input DecimalType and RoundingMode.
-   * If any overflow occurs in extracting integral part, an IllegalArgumentException will be thrown.
-   * This API is inefficient because of slow double -> decimal conversion, so it is mainly for testing.
-   * Compared with scale of [[java.math.BigDecimal]], the scale here represents the opposite meaning.
-   */
-  public static HostColumnVector decimalFromDoubles(DType type, RoundingMode mode, double... values) {
-    assert type.isDecimalType();
-    if (type.typeId == DType.DTypeEnum.DECIMAL64) {
-      long[] data = new long[values.length];
-      for (int i = 0; i < values.length; i++) {
-        BigDecimal dec = BigDecimal.valueOf(values[i]).setScale(-type.getScale(), mode);
-        data[i] = dec.unscaledValue().longValueExact();
-      }
-      return build(type, values.length, (b) -> b.appendUnscaledDecimalArray(data));
-    } else {
-      int[] data = new int[values.length];
-      for (int i = 0; i < values.length; i++) {
-        BigDecimal dec = BigDecimal.valueOf(values[i]).setScale(-type.getScale(), mode);
-        data[i] = dec.unscaledValue().intValueExact();
-      }
-      return build(type, values.length, (b) -> b.appendUnscaledDecimalArray(data));
-    }
-  }
-
   /**
    * Create a new string vector from the given values.  This API
    * supports inline nulls. This is really intended to be used only for testing as
@@ -518,30 +468,6 @@ public static HostColumnVector fromStrings(String... values) {
     });
   }
 
-  /**
-   * Create a new vector from the given values.  This API supports inline nulls,
-   * but is much slower than building from primitive array of unscaledValues.
-   * Notice:
-   *  1. Input values will be rescaled with min scale (max scale in terms of java.math.BigDecimal),
-   *  which avoids potential precision loss due to rounding. But there exists risk of precision overflow.
-   *  2. The scale will be zero if all input values are null.
-   */
-  public static HostColumnVector fromDecimals(BigDecimal... values) {
-    // 1. Fetch the element with max precision (maxDec). Fill with ZERO if inputs is empty.
-    // 2. Fetch the max scale. Fill with ZERO if inputs is empty.
-    // 3. Rescale the maxDec with the max scale, so to come out the max precision capacity we need.
-    BigDecimal maxDec = Arrays.stream(values).filter(Objects::nonNull)
-        .max(Comparator.comparingInt(BigDecimal::precision))
-        .orElse(BigDecimal.ZERO);
-    int maxScale = Arrays.stream(values)
-        .map(decimal -> (decimal == null) ? 0 : decimal.scale())
-        .max(Comparator.naturalOrder())
-        .orElse(0);
-    maxDec = maxDec.setScale(maxScale, RoundingMode.UNNECESSARY);
-
-    return build(DType.fromJavaBigDecimal(maxDec), values.length, (b) -> b.appendBoxed(values));
-  }
-
   /**
    * Create a new vector from the given values.  This API supports inline nulls,
    * but is much slower than using a regular array and should really only be used
@@ -977,8 +903,6 @@ private void appendChildOrNull(ColumnBuilder childBuilder, Object listElement) {
         childBuilder.append((Byte) listElement);
       } else if (listElement instanceof Short) {
         childBuilder.append((Short) listElement);
-      } else if (listElement instanceof BigDecimal) {
-        childBuilder.append((BigDecimal) listElement);
       } else if (listElement instanceof List) {
         childBuilder.append((List) listElement);
       } else if (listElement instanceof StructData) {
@@ -1068,23 +992,6 @@ public final ColumnBuilder append(boolean value) {
       return this;
     }
 
-    public final ColumnBuilder append(BigDecimal value) {
-      growBuffersAndRows(false, currentIndex * type.getSizeInBytes() + type.getSizeInBytes());
-      assert currentIndex < rows;
-      // Rescale input decimal with UNNECESSARY policy, which accepts no precision loss.
-      BigInteger unscaledVal = value.setScale(-type.getScale(), RoundingMode.UNNECESSARY).unscaledValue();
-      if (type.typeId == DType.DTypeEnum.DECIMAL32) {
-        data.setInt(currentIndex * type.getSizeInBytes(), unscaledVal.intValueExact());
-      } else if (type.typeId == DType.DTypeEnum.DECIMAL64) {
-        data.setLong(currentIndex * type.getSizeInBytes(), unscaledVal.longValueExact());
-      } else {
-        throw new IllegalStateException(type + " is not a supported decimal type.");
-      }
-      currentIndex++;
-      currentByteIndex += type.getSizeInBytes();
-      return this;
-    }
-
     public ColumnBuilder append(String value) {
       assert value != null : "appendNull must be used to append null strings";
       return appendUTF8String(value.getBytes(StandardCharsets.UTF_8));
@@ -1280,57 +1187,6 @@ public final Builder append(double value) {
       return this;
     }
 
-    /**
-     * Append java.math.BigDecimal into HostColumnVector with UNNECESSARY RoundingMode.
-     * Input decimal should have a larger scale than column vector.Otherwise, an ArithmeticException will be thrown while rescaling.
-     * If unscaledValue after rescaling exceeds the max precision of rapids type,
-     * an ArithmeticException will be thrown while extracting integral.
-     *
-     * @param value BigDecimal value to be appended
-     */
-    public final Builder append(BigDecimal value) {
-      return append(value, RoundingMode.UNNECESSARY);
-    }
-
-    /**
-     * Append java.math.BigDecimal into HostColumnVector with user-defined RoundingMode.
-     * Input decimal will be rescaled according to scale of column type and RoundingMode before appended.
-     * If unscaledValue after rescaling exceeds the max precision of rapids type, an ArithmeticException will be thrown.
-     *
-     * @param value        BigDecimal value to be appended
-     * @param roundingMode rounding mode determines rescaling behavior
-     */
-    public final Builder append(BigDecimal value, RoundingMode roundingMode) {
-      assert type.isDecimalType();
-      assert currentIndex < rows;
-      BigInteger unscaledValue = value.setScale(-type.getScale(), roundingMode).unscaledValue();
-      if (type.typeId == DType.DTypeEnum.DECIMAL32) {
-        data.setInt(currentIndex * type.getSizeInBytes(), unscaledValue.intValueExact());
-      } else if (type.typeId == DType.DTypeEnum.DECIMAL64) {
-        data.setLong(currentIndex * type.getSizeInBytes(), unscaledValue.longValueExact());
-      } else {
-        throw new IllegalStateException(type + " is not a supported decimal type.");
-      }
-      currentIndex++;
-      return this;
-    }
-
-    public final Builder appendUnscaledDecimal(int value) {
-      assert type.typeId == DType.DTypeEnum.DECIMAL32;
-      assert currentIndex < rows;
-      data.setInt(currentIndex * type.getSizeInBytes(), value);
-      currentIndex++;
-      return this;
-    }
-
-    public final Builder appendUnscaledDecimal(long value) {
-      assert type.typeId == DType.DTypeEnum.DECIMAL64;
-      assert currentIndex < rows;
-      data.setLong(currentIndex * type.getSizeInBytes(), value);
-      currentIndex++;
-      return this;
-    }
-
     public Builder append(String value) {
       assert value != null : "appendNull must be used to append null strings";
       return appendUTF8String(value.getBytes(StandardCharsets.UTF_8));
@@ -1427,40 +1283,6 @@ public Builder appendArray(double... values) {
       return this;
     }
 
-    public Builder appendUnscaledDecimalArray(int... values) {
-      assert type.typeId == DType.DTypeEnum.DECIMAL32;
-      assert (values.length + currentIndex) <= rows;
-      data.setInts(currentIndex * type.getSizeInBytes(), values, 0, values.length);
-      currentIndex += values.length;
-      return this;
-    }
-
-    public Builder appendUnscaledDecimalArray(long... values) {
-      assert type.typeId == DType.DTypeEnum.DECIMAL64;
-      assert (values.length + currentIndex) <= rows;
-      data.setLongs(currentIndex * type.getSizeInBytes(), values, 0, values.length);
-      currentIndex += values.length;
-      return this;
-    }
-
-    /**
-     * Append multiple values.  This is very slow and should really only be used for tests.
-     * @param values the values to append, including nulls.
-     * @return this for chaining.
-     * @throws {@link IndexOutOfBoundsException}
-     */
-    public Builder appendBoxed(BigDecimal... values) throws IndexOutOfBoundsException {
-      assert type.isDecimalType();
-      for (BigDecimal v : values) {
-        if (v == null) {
-          appendNull();
-        } else {
-          append(v);
-        }
-      }
-      return this;
-    }
-
     /**
      * Append multiple values.  This is very slow and should really only be used for tests.
      * @param values the values to append, including nulls.