Skip to content

Commit

Permalink
Some improvements to parse_decimal function and bindings for `is_fi…
Browse files Browse the repository at this point in the history
…xed_point` (#9658)

This PR adds Java bindings for `is_fixed_point`

Authors:
  - Raza Jafri (https://github.com/razajafri)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Robert (Bobby) Evans (https://github.com/revans2)
  - David Wendt (https://github.com/davidwendt)
  - Mike Wilson (https://github.com/hyperbolic2346)

URL: #9658
  • Loading branch information
razajafri authored Nov 16, 2021
1 parent 7fc65d8 commit 7e4a985
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 25 deletions.
8 changes: 3 additions & 5 deletions cpp/include/cudf/strings/convert/convert_fixed_point.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,18 +93,16 @@ std::unique_ptr<column> from_fixed_point(
* @brief Returns a boolean column identifying strings in which all
* characters are valid for conversion to fixed-point.
*
* The output row entry is set to `true` if the corresponding string element
* has at least one character in [+-0123456789.]. The optional sign character
* must only be in the first position. The decimal point may only appear once.
* The sign and the exponent is optional. The decimal point may only appear once.
* Also, the integer component must fit within the size limits of the
* underlying fixed-point storage type. The value of the integer component
* is based on the scale of the `decimal_type` provided.
*
* @code{.pseudo}
* Example:
* s = ['123', '-456', '', '1.2.3', '+17E30', '12.34' '.789', '-0.005]
* s = ['123', '-456', '', '1.2.3', '+17E30', '12.34', '.789', '-0.005]
* b = is_fixed_point(s)
* b is [true, true, false, false, false, true, true, true]
* b is [true, true, false, false, true, true, true, true]
* @endcode
*
* Any null entries result in corresponding null entries in the output column.
Expand Down
32 changes: 32 additions & 0 deletions java/src/main/java/ai/rapids/cudf/ColumnView.java
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,34 @@ public final ColumnVector isNull() {
return new ColumnVector(isNullNative(getNativeView()));
}

/**
* Returns a Boolean vector with the same number of rows as this instance, that has
* TRUE for any entry that is a fixed-point, and FALSE if its not a fixed-point.
* A null will be returned for null entries.
*
* The sign and the exponent is optional. The decimal point may only appear once.
* The integer component must fit within the size limits of the underlying fixed-point
* storage type. The value of the integer component is based on the scale of the target
* decimalType.
*
* Example:
* vec = ["A", "nan", "Inf", "-Inf", "Infinity", "infinity", "2.1474", "112.383", "-2.14748",
* "NULL", "null", null, "1.2", "1.2e-4", "0.00012"]
* vec.isFixedPoint() = [false, false, false, false, false, false, true, true, true, false, false,
* null, true, true, true]
*
* @param decimalType the data type that should be used for bounds checking. Note that only
* Decimal types (fixed-point) are allowed.
* @return Boolean vector
*/
public final ColumnVector isFixedPoint(DType decimalType) {
assert type.equals(DType.STRING);
assert decimalType.isDecimalType();
return new ColumnVector(isFixedPoint(getNativeView(),
decimalType.getTypeId().getNativeId(), decimalType.getScale()));
}


/**
* Returns a Boolean vector with the same number of rows as this instance, that has
* TRUE for any entry that is an integer, and FALSE if its not an integer. A null will be returned
Expand Down Expand Up @@ -375,6 +403,7 @@ public final ColumnVector isInteger() {
*/
public final ColumnVector isInteger(DType intType) {
assert type.equals(DType.STRING);
assert intType.isBackedByInt() || intType.isBackedByLong();
return new ColumnVector(isIntegerWithType(getNativeView(),
intType.getTypeId().getNativeId(), intType.getScale()));
}
Expand Down Expand Up @@ -3220,6 +3249,9 @@ static DeviceMemoryBufferView getOffsetsBuffer(long viewHandle) {
*/
private static native long stringTimestampToTimestamp(long viewHandle, int unit, String format);


private static native long isFixedPoint(long viewHandle, int nativeTypeId, int scale);

/**
* Native method to concatenate a list column of strings (each row is a list of strings),
* concatenates the strings within each row and returns a single strings column result.
Expand Down
16 changes: 16 additions & 0 deletions java/src/main/native/src/ColumnViewJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2023,6 +2023,22 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_isInteger(JNIEnv *env, jo
CATCH_STD(env, 0)
}

JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_isFixedPoint(JNIEnv *env, jobject,
jlong handle, jint j_dtype,
jint scale) {

JNI_NULL_CHECK(env, handle, "native view handle is null", 0)

try {
cudf::jni::auto_set_device(env);
cudf::column_view *view = reinterpret_cast<cudf::column_view *>(handle);
cudf::data_type fp_dtype = cudf::jni::make_data_type(j_dtype, scale);
std::unique_ptr<cudf::column> result = cudf::strings::is_fixed_point(*view, fp_dtype);
return reinterpret_cast<jlong>(result.release());
}
CATCH_STD(env, 0)
}

JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_isIntegerWithType(JNIEnv *env, jobject,
jlong handle, jint j_dtype,
jint scale) {
Expand Down
38 changes: 18 additions & 20 deletions java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,7 @@

package ai.rapids.cudf;

import ai.rapids.cudf.HostColumnVector.BasicType;
import ai.rapids.cudf.HostColumnVector.DataType;
import ai.rapids.cudf.HostColumnVector.ListType;
import ai.rapids.cudf.HostColumnVector.StructData;
import ai.rapids.cudf.HostColumnVector.StructType;

import ai.rapids.cudf.HostColumnVector.*;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;

Expand All @@ -38,20 +33,9 @@
import java.util.stream.Collectors;
import java.util.stream.IntStream;

import static ai.rapids.cudf.QuantileMethod.HIGHER;
import static ai.rapids.cudf.QuantileMethod.LINEAR;
import static ai.rapids.cudf.QuantileMethod.LOWER;
import static ai.rapids.cudf.QuantileMethod.MIDPOINT;
import static ai.rapids.cudf.QuantileMethod.NEAREST;
import static ai.rapids.cudf.TableTest.assertColumnsAreEqual;
import static ai.rapids.cudf.TableTest.assertStructColumnsAreEqual;
import static ai.rapids.cudf.TableTest.assertTablesAreEqual;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static ai.rapids.cudf.QuantileMethod.*;
import static ai.rapids.cudf.TableTest.*;
import static org.junit.jupiter.api.Assertions.*;
import static org.junit.jupiter.api.Assumptions.assumeTrue;

public class ColumnVectorTest extends CudfTestBase {
Expand Down Expand Up @@ -4834,6 +4818,20 @@ void testIsInteger() {
}
}

@Test
void testIsFixedPoint() {
String[] decimalStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity",
"2.1474", "112.383", "-2.14748", "NULL", "null", null, "1.2", "1.2e-4", "0.00012"};

DType dt = DType.create(DType.DTypeEnum.DECIMAL32, -3);
try (ColumnVector decStringCV = ColumnVector.fromStrings(decimalStrings);
ColumnVector isFixedPoint = decStringCV.isFixedPoint(dt);
ColumnVector expected = ColumnVector.fromBoxedBooleans(false, false, false, false, false
, false, true, true, true, false, false, null, true, true, true)) {
assertColumnsAreEqual(expected, isFixedPoint);
}
}

@Test
void testIsFloat() {
String[] floatStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity", "-0.0", "0.0",
Expand Down

0 comments on commit 7e4a985

Please sign in to comment.