Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Initial work for decimal type in Java/JNI [skip ci] #6514

Merged
merged 32 commits into from
Oct 27, 2020
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
4a4ea1d
Initial changes for supporting decimal type
nartal1 Oct 8, 2020
a549452
Modify constructors and JNI methods to create column.
nartal1 Oct 12, 2020
31f131b
Add inner class NativeDataType
nartal1 Oct 13, 2020
4e5f4e1
Merge branch 'branch-0.17' of https://github.com/rapidsai/cudf into d…
nartal1 Oct 13, 2020
23ef61d
Pass in scale and ordinal instead of saving native data_type handle
nartal1 Oct 14, 2020
17e1d30
Add DataType in HostColumnVector and address review comments
nartal1 Oct 14, 2020
b29c59b
Support DataType in Builder
nartal1 Oct 14, 2020
a7ae605
Merge branch 'branch-0.17' of https://github.com/rapidsai/cudf into d…
nartal1 Oct 15, 2020
9a2e948
addressed few review comments
nartal1 Oct 15, 2020
da9e4d6
addressed review comments
nartal1 Oct 15, 2020
b3d72d1
update PR with new design and addressed review comments
nartal1 Oct 21, 2020
7dba559
addressed few nits
nartal1 Oct 22, 2020
b0108ef
add factory methods for DType
sperlingxx Oct 22, 2020
4278d48
addressed review comments
nartal1 Oct 22, 2020
86b8a6b
Merge pull request #1 from sperlingxx/dtype_class_alfxu
nartal1 Oct 22, 2020
1434312
addressed review comments
nartal1 Oct 22, 2020
1931e38
Merge branch 'dtype_class' of github.com:nartal1/cudf into dtype_class
nartal1 Oct 22, 2020
f59c524
addressed review comments
nartal1 Oct 22, 2020
5220a7d
add decimal support for Scalar
sperlingxx Oct 23, 2020
e4af748
Merge pull request #2 from sperlingxx/dtype_class_alfxu
nartal1 Oct 23, 2020
1d21b47
addressed review comments
nartal1 Oct 24, 2020
3635007
update changelog
nartal1 Oct 24, 2020
9680e44
Merge branch 'branch-0.17' of https://github.com/rapidsai/cudf into d…
nartal1 Oct 24, 2020
fa8af4b
update changelog
nartal1 Oct 24, 2020
5c52d88
updated toString method:
nartal1 Oct 26, 2020
6ad640b
addressed review comments
nartal1 Oct 26, 2020
9b5b4b1
Merge branch 'branch-0.17' of https://github.com/rapidsai/cudf into d…
nartal1 Oct 26, 2020
767b50c
update changelog
nartal1 Oct 26, 2020
6d2bac9
update comments
nartal1 Oct 26, 2020
74c311c
update comments
nartal1 Oct 26, 2020
ef9d557
Merge branch 'branch-0.17' of https://github.com/rapidsai/cudf into d…
nartal1 Oct 27, 2020
f5c6d56
update changelog
nartal1 Oct 27, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
235 changes: 150 additions & 85 deletions java/src/main/java/ai/rapids/cudf/ColumnVector.java

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions java/src/main/java/ai/rapids/cudf/DType.java
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ public enum DType {
DURATION_NANOSECONDS(8, 21, "int64"),
//DICTIONARY32(4, 22, "NO IDEA"),

DECIMAL32(4, 25, "decimal32"),
jlowe marked this conversation as resolved.
Show resolved Hide resolved
DECIMAL64(8, 26, "decimal64"),
STRING(0, 23, "str"),
LIST(0, 24, "list"),
STRUCT(0, 27, "struct");
Expand Down
52 changes: 52 additions & 0 deletions java/src/main/java/ai/rapids/cudf/DataType.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ai.rapids.cudf;

import java.io.IOException;

public class DataType {

DType typeId;
revans2 marked this conversation as resolved.
Show resolved Hide resolved
int scale;
public DataType(DType id) {typeId = id; }

public DataType(DType id, int fp_scale) {
revans2 marked this conversation as resolved.
Show resolved Hide resolved
typeId =id;
scale = fp_scale;
}
private static native long makeNativeType(int typeId, int scale);

public class NativeDataType implements AutoCloseable {
jlowe marked this conversation as resolved.
Show resolved Hide resolved
long nativeType;
// build native data_type in constructors
public NativeDataType(DType id) {
this.nativeType = DataType.makeNativeType(id.nativeId, 0);
}
public NativeDataType(DType id, int decimalScale) {
this.nativeType = DataType.makeNativeType(id.nativeId, decimalScale);;
}
public long returnNativeId() {
return this.nativeType;
}

public void close() throws IOException {
// call native delete method to clean up native data_type
//deleteNativeType

}
}

}
40 changes: 40 additions & 0 deletions java/src/main/java/ai/rapids/cudf/HostColumnVector.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ public final class HostColumnVector extends HostColumnVectorCore {
this(type, rows, nullCount, hostDataBuffer, hostValidityBuffer, null);
}

HostColumnVector(ai.rapids.cudf.DataType type, long rows, Optional<Long> nullCount,
jlowe marked this conversation as resolved.
Show resolved Hide resolved
HostMemoryBuffer hostDataBuffer, HostMemoryBuffer hostValidityBuffer) {
this(type, rows, nullCount, hostDataBuffer, hostValidityBuffer, null);
}

/**
* Create a new column vector with data populated on the host.
* @param type the type of the vector
Expand Down Expand Up @@ -89,6 +94,21 @@ public final class HostColumnVector extends HostColumnVectorCore {
incRefCountInternal(true);
}

HostColumnVector(ai.rapids.cudf.DataType type, long rows, Optional<Long> nullCount,
HostMemoryBuffer hostDataBuffer, HostMemoryBuffer hostValidityBuffer,
HostMemoryBuffer offsetBuffer) {
super(type, rows, nullCount, hostDataBuffer, hostValidityBuffer, offsetBuffer, new ArrayList<>());
assert type.typeId != DType.LIST : "This constructor should not be used for list type";
if (nullCount.isPresent() && nullCount.get() > 0 && hostValidityBuffer == null) {
throw new IllegalStateException("Buffer cannot have a nullCount without a validity buffer");
}
if (type.typeId != DType.STRING && type.typeId != DType.LIST) {
assert offsetBuffer == null : "offsets are only supported for STRING and LIST";
}
refCount = 0;
incRefCountInternal(true);
}

/**
* This is a really ugly API, but it is possible that the lifecycle of a column of
* data may not have a clear lifecycle thanks to java and GC. This API informs the leak
Expand Down Expand Up @@ -384,6 +404,26 @@ public static HostColumnVector fromLongs(long... values) {
return build(DType.INT64, values.length, (b) -> b.appendArray(values));
}


/* public static HostColumnVector fromDecimal32(int... values) {
return build(DType.DECIMAL32, values.length, (b) -> b.appendArray(values));
}

public static HostColumnVector fromDecimal64(long... values) {
return build(DType.DECIMAL64, values.length, (b) -> b.appendArray(values));
}


public static HostColumnVector fromBoxedDecimals32(Integer... values) {
return build(DType.DECIMAL32, values.length, (b) -> b.appendBoxed(values));
}

public static HostColumnVector fromBoxedDecimals64(Long... values) {
return build(DType.DECIMAL64, values.length, (b) -> b.appendBoxed(values));
}*/



/**
* Create a new vector from the given values.
* <p>
Expand Down
14 changes: 14 additions & 0 deletions java/src/main/java/ai/rapids/cudf/HostColumnVectorCore.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,12 @@ public class HostColumnVectorCore implements ColumnViewAccess<HostMemoryBuffer>

protected final OffHeapState offHeap;
protected final DType type;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we contain DataType instead of DType + scale in here? And we need implement getDecimal. I think we can change it in subsequent PR.

protected long scale;
revans2 marked this conversation as resolved.
Show resolved Hide resolved
protected long rows;
protected Optional<Long> nullCount;
protected List<HostColumnVectorCore> children;


public HostColumnVectorCore(DType type, long rows,
Optional<Long> nullCount, HostMemoryBuffer data, HostMemoryBuffer validity,
HostMemoryBuffer offsets, List<HostColumnVectorCore> nestedChildren) {
Expand All @@ -53,6 +55,18 @@ public HostColumnVectorCore(DType type, long rows,
this.children = nestedChildren;
}

public HostColumnVectorCore(DataType type, long rows,
Optional<Long> nullCount, HostMemoryBuffer data, HostMemoryBuffer validity,
HostMemoryBuffer offsets, List<HostColumnVectorCore> nestedChildren) {
this.offHeap = new OffHeapState(data, validity, offsets);
MemoryCleaner.register(this, offHeap);
this.type = type.typeId;
this.scale = type.scale;
this.rows = rows;
this.nullCount = nullCount;
this.children = nestedChildren;
}

/**
* Returns the type of this vector.
*/
Expand Down
33 changes: 23 additions & 10 deletions java/src/main/native/src/ColumnVectorJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1283,15 +1283,15 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_hash(JNIEnv *env,
////////

JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_makeCudfColumnView(
JNIEnv *env, jobject j_object, jint j_type, jlong j_data, jlong j_data_size, jlong j_offset,
JNIEnv *env, jobject j_object, jint j_type, jint scale, jlong j_data, jlong j_data_size, jlong j_offset,
jlowe marked this conversation as resolved.
Show resolved Hide resolved
jlong j_valid, jint j_null_count, jint size, jlongArray j_children) {

JNI_ARG_CHECK(env, (size != 0), "size is 0", 0);
try {
using cudf::column_view;
cudf::jni::auto_set_device(env);
cudf::type_id n_type = static_cast<cudf::type_id>(j_type);
cudf::data_type n_data_type(n_type);
cudf::data_type n_data_type(n_type, scale);

std::unique_ptr<cudf::column_view> ret;
void *data = reinterpret_cast<void *>(j_data);
Expand All @@ -1309,25 +1309,25 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_makeCudfColumnView(
// data is the second child

cudf::size_type *offsets = reinterpret_cast<cudf::size_type *>(j_offset);
cudf::column_view offsets_column(cudf::data_type{cudf::type_id::INT32}, size + 1, offsets);
cudf::column_view data_column(cudf::data_type{cudf::type_id::INT8}, j_data_size, data);
ret.reset(new cudf::column_view(cudf::data_type{cudf::type_id::STRING}, size, nullptr, valid,
cudf::column_view offsets_column(cudf::data_type{cudf::type_id::INT32, scale}, size + 1, offsets);
cudf::column_view data_column(cudf::data_type{cudf::type_id::INT8, scale}, j_data_size, data);
ret.reset(new cudf::column_view(cudf::data_type{cudf::type_id::STRING, scale}, size, nullptr, valid,
j_null_count, 0, {offsets_column, data_column}));
} else if (n_type == cudf::type_id::LIST) {
JNI_NULL_CHECK(env, j_offset, "offset is null", 0);
cudf::jni::native_jpointerArray<cudf::column_view> children(env, j_children);
JNI_ARG_CHECK(env, (children.size() != 0), "LIST children size is 0", 0);
cudf::size_type *offsets = reinterpret_cast<cudf::size_type *>(j_offset);
cudf::column_view offsets_column(cudf::data_type{cudf::type_id::INT32}, size + 1, offsets);
ret.reset(new cudf::column_view(cudf::data_type{cudf::type_id::LIST}, size, nullptr, valid,
cudf::column_view offsets_column(cudf::data_type{cudf::type_id::INT32, scale}, size + 1, offsets);
ret.reset(new cudf::column_view(cudf::data_type{cudf::type_id::LIST, scale}, size, nullptr, valid,
j_null_count, 0, {offsets_column, *children[0]}));
} else if (n_type == cudf::type_id::STRUCT) {
cudf::jni::native_jpointerArray<cudf::column_view> children(env, j_children);
std::vector<column_view> children_vector(children.size());
for (int i = 0; i < children.size(); i++) {
children_vector[i] = *children[i];
}
ret.reset(new cudf::column_view(cudf::data_type{cudf::type_id::STRUCT}, size, nullptr, valid,
ret.reset(new cudf::column_view(cudf::data_type{cudf::type_id::STRUCT, scale}, size, nullptr, valid,
j_null_count, 0, children_vector));
} else {
ret.reset(new cudf::column_view(n_data_type, size, data, valid, j_null_count));
Expand All @@ -1350,6 +1350,18 @@ JNIEXPORT jint JNICALL Java_ai_rapids_cudf_ColumnVector_getNativeTypeId(JNIEnv *
CATCH_STD(env, 0);
}

JNIEXPORT jint JNICALL Java_ai_rapids_cudf_ColumnVector_getNativeTypeScale(JNIEnv *env,
jobject j_object,
jlowe marked this conversation as resolved.
Show resolved Hide resolved
jlong handle) {
JNI_NULL_CHECK(env, handle, "native handle is null", 0);
try {
cudf::jni::auto_set_device(env);
cudf::column_view *column = reinterpret_cast<cudf::column_view *>(handle);
return static_cast<jint>(column->type().scale());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is type checking needed before calling scale() on type_id? will it blow up in case its not a decimal type?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should not need the cast and I would prefer to not have it because it shows that we don't have issues mapping from the native scale to the JNI representation.

}
CATCH_STD(env, 0);
}

JNIEXPORT jint JNICALL Java_ai_rapids_cudf_ColumnVector_getNativeRowCount(JNIEnv *env,
jobject j_object,
jlowe marked this conversation as resolved.
Show resolved Hide resolved
jlong handle) {
Expand Down Expand Up @@ -1570,12 +1582,13 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_getNativeColumnView(JNI

JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_makeEmptyCudfColumn(JNIEnv *env,
jobject j_object,
jlowe marked this conversation as resolved.
Show resolved Hide resolved
jint j_type) {
jint j_type,
jint scale) {

try {
cudf::jni::auto_set_device(env);
cudf::type_id n_type = static_cast<cudf::type_id>(j_type);
cudf::data_type n_data_type(n_type);
cudf::data_type n_data_type(n_type, scale);
std::unique_ptr<cudf::column> column(cudf::make_empty_column(n_data_type));
return reinterpret_cast<jlong>(column.release());
}
Expand Down
9 changes: 8 additions & 1 deletion java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,9 @@ void testSequenceOtherTypes() {
@Test
void testFromScalarZeroRows() {
for (DType type : DType.values()) {
if (type == DType.DECIMAL32 || type == DType.DECIMAL64) {
continue;
revans2 marked this conversation as resolved.
Show resolved Hide resolved
}
Scalar s = null;
try {
switch (type) {
Expand Down Expand Up @@ -794,6 +797,9 @@ void testGetNativeView() {
void testFromScalar() {
final int rowCount = 4;
for (DType type : DType.values()) {
if(type == DType.DECIMAL32 || type == DType.DECIMAL64) {
continue;
}
Scalar s = null;
ColumnVector expected = null;
ColumnVector result = null;
Expand Down Expand Up @@ -957,7 +963,8 @@ void testFromScalar() {
void testFromScalarNull() {
final int rowCount = 4;
for (DType type : DType.values()) {
if (type == DType.EMPTY || type == DType.LIST || type == DType.STRUCT) {
if (type == DType.EMPTY || type == DType.LIST || type == DType.STRUCT
|| type == DType.DECIMAL32 || type == DType.DECIMAL64) {
continue;
}
try (Scalar s = Scalar.fromNull(type);
Expand Down
3 changes: 3 additions & 0 deletions java/src/test/java/ai/rapids/cudf/ScalarTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ public void testIncRef() {
@Test
public void testNull() {
for (DType type : DType.values()) {
if(type == DType.DECIMAL32 || type == DType.DECIMAL64) {
continue;
}
if (!type.isNestedType()) {
try (Scalar s = Scalar.fromNull(type)) {
assertEquals(type, s.getType());
Expand Down