From 10afaede961c1d15c29fed7977d129ac0e25afe9 Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Mon, 11 Jan 2021 11:44:55 -0600 Subject: [PATCH 1/2] Adds in JNI support for creating an list column from existing columns. --- .../java/ai/rapids/cudf/ColumnVector.java | 51 ++++++++++++++++++- java/src/main/native/src/ColumnVectorJni.cpp | 45 ++++++++++++++++ .../java/ai/rapids/cudf/ColumnVectorTest.java | 36 +++++++++++++ 3 files changed, 131 insertions(+), 1 deletion(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnVector.java b/java/src/main/java/ai/rapids/cudf/ColumnVector.java index e698d497b2b..88c024a437b 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnVector.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnVector.java @@ -19,7 +19,6 @@ package ai.rapids.cudf; import ai.rapids.cudf.HostColumnVector.Builder; -import ai.rapids.cudf.WindowOptions.FrameType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -350,6 +349,53 @@ public static ColumnVector makeStruct(long rows, ColumnView... columns) { } } + /** + * Create a LIST column from the given columns. Each list in the returned column will have the + * same number of entries in it as columns passed into this method. Be careful about the + * number of rows passed in as there are limits on the maximum output size supported for + * column lists. + * @param columns the columns to make up the list column, in the order they will appear in the + * resulting lists. + * @return the new LIST ColumnVector + */ + public static ColumnVector makeList(ColumnView... columns) { + if (columns.length <= 0) { + throw new IllegalArgumentException("At least one column is needed to get the row count"); + } + return makeList(columns[0].getRowCount(), columns[0].getType(), columns); + } + + /** + * Create a LIST column from the given columns. Each list in the returned column will have the + * same number of entries in it as columns passed into this method. Be careful about the + * number of rows passed in as there are limits on the maximum output size supported for + * column lists. + * @param rows the number of rows to create, for the special case of an empty list. + * @param type the type of the child column, for the special case of an empty list. + * @param columns the columns to make up the list column, in the order they will appear in the + * resulting lists. + * @return the new LIST ColumnVector + */ + public static ColumnVector makeList(long rows, DType type, ColumnView... columns) { + long[] handles = new long[columns.length]; + for (int i = 0; i < columns.length; i++) { + ColumnView cv = columns[i]; + if (rows != cv.getRowCount()) { + throw new IllegalArgumentException("All columns must have the same number of rows"); + } + if (!type.equals(cv.getType())) { + throw new IllegalArgumentException("All columns must have the same type"); + } + + handles[i] = cv.getNativeView(); + } + if (columns.length == 0 && type.isNestedType()) { + throw new IllegalArgumentException( + "Creating an empty list column of nested types is not currently supported"); + } + return new ColumnVector(makeList(handles, type.typeId.nativeId, type.getScale(), rows)); + } + /** * Create a new vector of length rows, starting at the initialValue and going by step each time. * Only numeric types are supported. @@ -571,6 +617,9 @@ public ColumnVector castTo(DType type) { private static native long fromScalar(long scalarHandle, int rowCount) throws CudfException; + private static native long makeList(long[] handles, long typeHandle, int scale, long rows) + throws CudfException; + private static native long concatenate(long[] viewHandles) throws CudfException; /** diff --git a/java/src/main/native/src/ColumnVectorJni.cpp b/java/src/main/native/src/ColumnVectorJni.cpp index 8aadb4011e1..360af23a138 100644 --- a/java/src/main/native/src/ColumnVectorJni.cpp +++ b/java/src/main/native/src/ColumnVectorJni.cpp @@ -18,9 +18,11 @@ #include #include #include +#include #include #include #include +#include #include #include "cudf_jni_apis.hpp" @@ -48,6 +50,49 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_sequence(JNIEnv *env, j CATCH_STD(env, 0); } +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_makeList(JNIEnv *env, jobject j_object, + jlongArray handles, + jlong j_type, + jint scale, + jlong row_count) { + using ScalarType = cudf::scalar_type_t; + JNI_NULL_CHECK(env, handles, "native view handles are null", 0) + try { + cudf::jni::auto_set_device(env); + std::unique_ptr ret; + cudf::jni::native_jpointerArray children(env, handles); + std::vector children_vector(children.size()); + for (int i = 0; i < children.size(); i++) { + children_vector[i] = *children[i]; + } + auto zero = cudf::make_numeric_scalar(cudf::data_type(cudf::type_id::INT32)); + zero->set_valid(true); + static_cast(zero.get())->set_value(0); + + if (children.size() == 0) { + // special case because cudf::interleave_columns does not support no columns + auto offsets = cudf::make_column_from_scalar(*zero, row_count + 1); + cudf::type_id n_type = static_cast(j_type); + cudf::data_type n_data_type = cudf::jni::make_data_type(j_type, scale); + auto empty_col = cudf::make_empty_column(n_data_type); + ret = cudf::make_lists_column(row_count, std::move(offsets), std::move(empty_col), + 0, rmm::device_buffer()); + } else { + auto count = cudf::make_numeric_scalar(cudf::data_type(cudf::type_id::INT32)); + count->set_valid(true); + static_cast(count.get())->set_value(children.size()); + + std::unique_ptr offsets = cudf::sequence(row_count + 1, *zero, *count); + auto data_col = cudf::interleave_columns(cudf::table_view(children_vector)); + ret = cudf::make_lists_column(row_count, std::move(offsets), std::move(data_col), + 0, rmm::device_buffer()); + } + + return reinterpret_cast(ret.release()); + } + CATCH_STD(env, 0); +} + JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_fromScalar(JNIEnv *env, jclass, jlong j_scalar, jint row_count) { diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 738bacfe130..88ff50959f7 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -3830,4 +3830,40 @@ void testMakeStruct() { assertColumnsAreEqual(expected, created); } } + + @Test + void testMakeListEmpty() { + final int numRows = 10; + try (ColumnVector expected = + ColumnVector.fromLists( + new ListType(false, new BasicType(false, DType.STRING)), + Arrays.asList(), + Arrays.asList(), + Arrays.asList(), + Arrays.asList(), + Arrays.asList(), + Arrays.asList(), + Arrays.asList(), + Arrays.asList(), + Arrays.asList(), + Arrays.asList()); + ColumnVector created = ColumnVector.makeList(numRows, DType.STRING)) { + assertColumnsAreEqual(expected, created); + } + } + + @Test + void testMakeList() { + try (ColumnVector expected = + ColumnVector.fromLists( + new ListType(false, new BasicType(false, DType.INT32)), + Arrays.asList(1, 3, 5), + Arrays.asList(2, 4, 6)); + ColumnVector child1 = ColumnVector.fromInts(1, 2); + ColumnVector child2 = ColumnVector.fromInts(3, 4); + ColumnVector child3 = ColumnVector.fromInts(5, 6); + ColumnVector created = ColumnVector.makeList(child1, child2, child3)) { + assertColumnsAreEqual(expected, created); + } + } } From d84e8c32318dc5417274a80f159304efc32aff0e Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Mon, 11 Jan 2021 12:30:31 -0600 Subject: [PATCH 2/2] Updated copyright --- java/src/main/native/src/ColumnVectorJni.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/src/main/native/src/ColumnVectorJni.cpp b/java/src/main/native/src/ColumnVectorJni.cpp index 360af23a138..3bce4912fa4 100644 --- a/java/src/main/native/src/ColumnVectorJni.cpp +++ b/java/src/main/native/src/ColumnVectorJni.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License.