Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds in JNI support for creating an list column from existing columns [skip ci] #7112

Merged
merged 2 commits into from
Jan 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 50 additions & 1 deletion java/src/main/java/ai/rapids/cudf/ColumnVector.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
package ai.rapids.cudf;

import ai.rapids.cudf.HostColumnVector.Builder;
import ai.rapids.cudf.WindowOptions.FrameType;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -350,6 +349,53 @@ public static ColumnVector makeStruct(long rows, ColumnView... columns) {
}
}

/**
* Create a LIST column from the given columns. Each list in the returned column will have the
* same number of entries in it as columns passed into this method. Be careful about the
* number of rows passed in as there are limits on the maximum output size supported for
* column lists.
* @param columns the columns to make up the list column, in the order they will appear in the
* resulting lists.
* @return the new LIST ColumnVector
*/
public static ColumnVector makeList(ColumnView... columns) {
if (columns.length <= 0) {
throw new IllegalArgumentException("At least one column is needed to get the row count");
}
return makeList(columns[0].getRowCount(), columns[0].getType(), columns);
}

/**
* Create a LIST column from the given columns. Each list in the returned column will have the
* same number of entries in it as columns passed into this method. Be careful about the
* number of rows passed in as there are limits on the maximum output size supported for
* column lists.
* @param rows the number of rows to create, for the special case of an empty list.
* @param type the type of the child column, for the special case of an empty list.
* @param columns the columns to make up the list column, in the order they will appear in the
* resulting lists.
* @return the new LIST ColumnVector
*/
public static ColumnVector makeList(long rows, DType type, ColumnView... columns) {
long[] handles = new long[columns.length];
for (int i = 0; i < columns.length; i++) {
ColumnView cv = columns[i];
if (rows != cv.getRowCount()) {
throw new IllegalArgumentException("All columns must have the same number of rows");
}
if (!type.equals(cv.getType())) {
throw new IllegalArgumentException("All columns must have the same type");
}

handles[i] = cv.getNativeView();
}
if (columns.length == 0 && type.isNestedType()) {
throw new IllegalArgumentException(
"Creating an empty list column of nested types is not currently supported");
}
return new ColumnVector(makeList(handles, type.typeId.nativeId, type.getScale(), rows));
}

/**
* Create a new vector of length rows, starting at the initialValue and going by step each time.
* Only numeric types are supported.
Expand Down Expand Up @@ -571,6 +617,9 @@ public ColumnVector castTo(DType type) {

private static native long fromScalar(long scalarHandle, int rowCount) throws CudfException;

private static native long makeList(long[] handles, long typeHandle, int scale, long rows)
throws CudfException;

private static native long concatenate(long[] viewHandles) throws CudfException;

/**
Expand Down
47 changes: 46 additions & 1 deletion java/src/main/native/src/ColumnVectorJni.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -18,9 +18,11 @@
#include <cudf/concatenate.hpp>
#include <cudf/filling.hpp>
#include <cudf/hashing.hpp>
#include <cudf/reshape.hpp>
jlowe marked this conversation as resolved.
Show resolved Hide resolved
#include <cudf/utilities/bit.hpp>
#include <cudf/lists/detail/concatenate.hpp>
#include <cudf/lists/lists_column_view.hpp>
#include <cudf/scalar/scalar_factories.hpp>
#include <cudf/structs/structs_column_view.hpp>

#include "cudf_jni_apis.hpp"
Expand Down Expand Up @@ -48,6 +50,49 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_sequence(JNIEnv *env, j
CATCH_STD(env, 0);
}

JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_makeList(JNIEnv *env, jobject j_object,
jlongArray handles,
jlong j_type,
jint scale,
jlong row_count) {
using ScalarType = cudf::scalar_type_t<cudf::size_type>;
JNI_NULL_CHECK(env, handles, "native view handles are null", 0)
try {
cudf::jni::auto_set_device(env);
std::unique_ptr<cudf::column> ret;
cudf::jni::native_jpointerArray<cudf::column_view> children(env, handles);
std::vector<cudf::column_view> children_vector(children.size());
for (int i = 0; i < children.size(); i++) {
children_vector[i] = *children[i];
}
auto zero = cudf::make_numeric_scalar(cudf::data_type(cudf::type_id::INT32));
zero->set_valid(true);
static_cast<ScalarType *>(zero.get())->set_value(0);

if (children.size() == 0) {
// special case because cudf::interleave_columns does not support no columns
auto offsets = cudf::make_column_from_scalar(*zero, row_count + 1);
cudf::type_id n_type = static_cast<cudf::type_id>(j_type);
cudf::data_type n_data_type = cudf::jni::make_data_type(j_type, scale);
auto empty_col = cudf::make_empty_column(n_data_type);
ret = cudf::make_lists_column(row_count, std::move(offsets), std::move(empty_col),
0, rmm::device_buffer());
} else {
auto count = cudf::make_numeric_scalar(cudf::data_type(cudf::type_id::INT32));
count->set_valid(true);
static_cast<ScalarType *>(count.get())->set_value(children.size());

std::unique_ptr<cudf::column> offsets = cudf::sequence(row_count + 1, *zero, *count);
auto data_col = cudf::interleave_columns(cudf::table_view(children_vector));
ret = cudf::make_lists_column(row_count, std::move(offsets), std::move(data_col),
0, rmm::device_buffer());
}

return reinterpret_cast<jlong>(ret.release());
}
CATCH_STD(env, 0);
}

JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_fromScalar(JNIEnv *env, jclass,
jlong j_scalar,
jint row_count) {
Expand Down
36 changes: 36 additions & 0 deletions java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -3830,4 +3830,40 @@ void testMakeStruct() {
assertColumnsAreEqual(expected, created);
}
}

@Test
void testMakeListEmpty() {
final int numRows = 10;
try (ColumnVector expected =
ColumnVector.fromLists(
new ListType(false, new BasicType(false, DType.STRING)),
Arrays.asList(),
Arrays.asList(),
Arrays.asList(),
Arrays.asList(),
Arrays.asList(),
Arrays.asList(),
Arrays.asList(),
Arrays.asList(),
Arrays.asList(),
Arrays.asList());
ColumnVector created = ColumnVector.makeList(numRows, DType.STRING)) {
assertColumnsAreEqual(expected, created);
}
}

@Test
void testMakeList() {
try (ColumnVector expected =
ColumnVector.fromLists(
new ListType(false, new BasicType(false, DType.INT32)),
Arrays.asList(1, 3, 5),
Arrays.asList(2, 4, 6));
ColumnVector child1 = ColumnVector.fromInts(1, 2);
ColumnVector child2 = ColumnVector.fromInts(3, 4);
ColumnVector child3 = ColumnVector.fromInts(5, 6);
ColumnVector created = ColumnVector.makeList(child1, child2, child3)) {
assertColumnsAreEqual(expected, created);
}
}
}