Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JNI api for cudf::chunked_pack #13278

Merged
merged 9 commits into from
May 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions java/src/main/java/ai/rapids/cudf/ChunkedPack.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

package ai.rapids.cudf;

/**
* JNI interface to cudf::chunked_pack.
*
* ChunkedPack has an Iterator-like API with the familiar `hasNext` and `next`
* methods. `next` should be used in a loop until `hasNext` returns false.
*
* However, `ChunkedPack.next` is special because it takes a `DeviceMemoryBuffer` as a
* parameter, which means that the caller can call `next` giving any bounce buffer it
* may have previously allocated. No requirement exists that the bounce buffer be the
* same each time, the only requirement is that their sizes are all the same, and match
* the size that was passed to `Table.makeChunkedPack` (which instantiates this class).
*
* The user of `ChunkedPack` must close `.close()` when done using it to clear up both
* host and device resources.
*/
public class ChunkedPack implements AutoCloseable {
long nativePtr;

/**
* This constructor is invoked by `Table.makeChunkedPack` after creating a native
* `cudf::chunked_pack`.
* @param nativePtr pointer to a `cudf::chunked_pack`
*/
public ChunkedPack(long nativePtr) {
this.nativePtr = nativePtr;
}

/**
* Get the final contiguous size of the table we are packing. This is
* the size that the final buffer should be, just like if the user called
* `cudf::pack` instead.
* @return the total number of bytes for the table in contiguous layout
*/
public long getTotalContiguousSize() {
return chunkedPackGetTotalContiguousSize(nativePtr);
}

/**
* Method to be called to ensure that `ChunkedPack` has work left.
* This method should be invoked followed by a call to `next`, until
* `hasNext` returns false.
* @return true if there is work left to be done (`next` should be called),
* false otherwise.
*/
public boolean hasNext() {
return chunkedPackHasNext(nativePtr);
}

/**
* Place the next contiguous chunk of our table into `userPtr`.
*
* This method throws if `hasNext` is false.
* @param userPtr the bounce buffer to use for this iteration
* @return the number of bytes that we were able to place in `userPtr`. This is
* at most `userPtr.getLength()`.
*/
public long next(DeviceMemoryBuffer userPtr) {
return chunkedPackNext(nativePtr, userPtr.getAddress(), userPtr.getLength());
}

/**
* Generates opaque table metadata that can be unpacked via `cudf::unpack`
* at a later time.
* @return a `PackedColumnMetadata` instance referencing cuDF packed table metadata
*/
public PackedColumnMetadata buildMetadata() {
return new PackedColumnMetadata(chunkedPackBuildMetadata(nativePtr));
}

@Override
public void close() {
chunkedPackDelete(nativePtr);
}

private static native long chunkedPackGetTotalContiguousSize(long nativePtr);
private static native boolean chunkedPackHasNext(long nativePtr);
private static native long chunkedPackNext(long nativePtr, long userPtr, long userPtrSize);
private static native long chunkedPackBuildMetadata(long nativePtr);
private static native void chunkedPackDelete(long nativePtr);
}
27 changes: 9 additions & 18 deletions java/src/main/java/ai/rapids/cudf/ContiguousTable.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -25,11 +25,11 @@
* much simpler.
*/
public final class ContiguousTable implements AutoCloseable {
private long metadataHandle = 0;
private Table table = null;
private DeviceMemoryBuffer buffer;
private ByteBuffer metadataBuffer = null;
private final long rowCount;
private PackedColumnMetadata meta;
private ByteBuffer metadataBuffer;

// This method is invoked by JNI
static ContiguousTable fromPackedTable(long metadataHandle,
Expand All @@ -43,8 +43,8 @@ static ContiguousTable fromPackedTable(long metadataHandle,

/** Construct a contiguous table instance given a table and the device buffer backing it. */
ContiguousTable(Table table, DeviceMemoryBuffer buffer) {
this.metadataHandle = createPackedMetadata(table.getNativeView(),
buffer.getAddress(), buffer.getLength());
this.meta = new PackedColumnMetadata(createPackedMetadata(table.getNativeView(),
buffer.getAddress(), buffer.getLength()));
this.table = table;
this.buffer = buffer;
this.rowCount = table.getRowCount();
Expand All @@ -57,7 +57,7 @@ static ContiguousTable fromPackedTable(long metadataHandle,
* @param rowCount number of rows in the table
*/
ContiguousTable(long metadataHandle, DeviceMemoryBuffer buffer, long rowCount) {
this.metadataHandle = metadataHandle;
this.meta = new PackedColumnMetadata(metadataHandle);
this.buffer = buffer;
this.rowCount = rowCount;
}
Expand Down Expand Up @@ -94,18 +94,14 @@ public DeviceMemoryBuffer getBuffer() {
* or data corruption.
*/
public ByteBuffer getMetadataDirectBuffer() {
if (metadataBuffer == null) {
metadataBuffer = createMetadataDirectBuffer(metadataHandle);
}
return metadataBuffer.asReadOnlyBuffer();
return meta.getMetadataDirectBuffer();
}

/** Close the contiguous table instance and its underlying resources. */
@Override
public void close() {
if (metadataHandle != 0) {
closeMetadata(metadataHandle);
metadataHandle = 0;
if (meta != null) {
meta.close();
}

if (table != null) {
Expand All @@ -122,9 +118,4 @@ public void close() {
// create packed metadata for a table backed by a single data buffer
private static native long createPackedMetadata(long tableView, long dataAddress, long dataSize);

// create a DirectByteBuffer for the packed table metadata
private static native ByteBuffer createMetadataDirectBuffer(long metadataHandle);

// release the native metadata resources for a packed table
private static native void closeMetadata(long metadataHandle);
}
74 changes: 74 additions & 0 deletions java/src/main/java/ai/rapids/cudf/PackedColumnMetadata.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

package ai.rapids.cudf;

import java.nio.ByteBuffer;

/**
* Metadata for a table that is backed by a single contiguous device buffer.
*/
public final class PackedColumnMetadata implements AutoCloseable {
private long metadataHandle = 0;
private ByteBuffer metadataBuffer = null;

// This method is invoked by JNI
static PackedColumnMetadata fromPackedColumnMeta(long metadataHandle) {
return new PackedColumnMetadata(metadataHandle);
}

/**
* Construct the PackedColumnMetadata instance given a metadata handle.
* @param metadataHandle address of the cudf packed_table host-based metadata instance
*/
PackedColumnMetadata(long metadataHandle) {
this.metadataHandle = metadataHandle;
}

/**
* Get the byte buffer containing the host metadata describing the schema and layout of the
* contiguous table.
* <p>
* NOTE: This is a direct byte buffer that is backed by the underlying native metadata instance
* and therefore is only valid to be used while this PackedColumnMetadata instance is valid.
* Attempts to cache and access the resulting buffer after this instance has been destroyed
* will result in undefined behavior including the possibility of segmentation faults
* or data corruption.
*/
public ByteBuffer getMetadataDirectBuffer() {
if (metadataBuffer == null) {
metadataBuffer = createMetadataDirectBuffer(metadataHandle);
}
return metadataBuffer.asReadOnlyBuffer();
}

/** Close the PackedColumnMetadata instance and its underlying resources. */
@Override
public void close() {
if (metadataHandle != 0) {
closeMetadata(metadataHandle);
metadataHandle = 0;
}
}

// create a DirectByteBuffer for the packed metadata
private static native ByteBuffer createMetadataDirectBuffer(long metadataHandle);

// release the native metadata resources for a packed table
private static native void closeMetadata(long metadataHandle);
}
40 changes: 40 additions & 0 deletions java/src/main/java/ai/rapids/cudf/Table.java
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,8 @@ static Table removeNullMasksIfNeeded(Table table) {

private static native ContiguousTable[] contiguousSplit(long inputTable, int[] indices);

private static native long makeChunkedPack(long inputTable, long bounceBufferSize, long tempMemoryResource);

private static native long[] partition(long inputTable, long partitionView,
int numberOfPartitions, int[] outputOffsets);

Expand Down Expand Up @@ -2166,6 +2168,44 @@ public ContiguousTable[] contiguousSplit(int... indices) {
return contiguousSplit(nativeHandle, indices);
}

/**
* Create an instance of `ChunkedPack` which can be used to pack this table
* contiguously in memory utilizing a bounce buffer of size `bounceBufferSize`.
*
* This version of `makeChunkedPack` takes a `RmmDviceMemoryResource`, which can be used
* to pre-allocate all scratch and temporary space required for the state of `cudf::chunked_pack`.
*
* The caller is responsible for calling close on the returned `ChunkedPack` object.
*
* @param bounceBufferSize The size of bounce buffer that will be utilized to pack into
* @param tempMemoryResource A memory resource that is used to satisfy allocations for
* temporary and thrust scratch space.
* @return An instance of `ChunkedPack` that the caller must use to finish the operation.
*/
public ChunkedPack makeChunkedPack(
long bounceBufferSize, RmmDeviceMemoryResource tempMemoryResource) {
long tempMemoryResourceHandle = tempMemoryResource.getHandle();
return new ChunkedPack(
makeChunkedPack(nativeHandle, bounceBufferSize, tempMemoryResourceHandle));
}

/**
* Create an instance of `ChunkedPack` which can be used to pack this table
* contiguously in memory utilizing a bounce buffer of size `bounceBufferSize`.
*
* This version of `makeChunkedPack` makes use of the default per-device memory resource,
* for scratch and temporary space required for the state of `cudf::chunked_pack`.
*
* The caller is responsible for calling close on the returned `ChunkedPack` object.
*
* @param bounceBufferSize The size of bounce buffer that will be utilized to pack into
* @return An instance of `ChunkedPack` that the caller must use to finish the operation.
*/
public ChunkedPack makeChunkedPack(long bounceBufferSize) {
return new ChunkedPack(
makeChunkedPack(nativeHandle, bounceBufferSize, 0));
}

/**
* Explodes a list column's elements.
*
Expand Down
2 changes: 2 additions & 0 deletions java/src/main/native/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ add_library(
cudfjni
src/Aggregation128UtilsJni.cpp
src/AggregationJni.cpp
src/ChunkedPackJni.cpp
src/ChunkedReaderJni.cpp
src/CudfJni.cpp
src/CudaJni.cpp
Expand All @@ -139,6 +140,7 @@ add_library(
src/NvcompJni.cpp
src/NvtxRangeJni.cpp
src/NvtxUniqueRangeJni.cpp
src/PackedColumnMetadataJni.cpp
src/RmmJni.cpp
src/ScalarJni.cpp
src/TableJni.cpp
Expand Down
75 changes: 75 additions & 0 deletions java/src/main/native/src/ChunkedPackJni.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "cudf_jni_apis.hpp"

extern "C" {
JNIEXPORT void JNICALL Java_ai_rapids_cudf_ChunkedPack_chunkedPackDelete(JNIEnv *env, jclass,
jlong chunked_pack) {
try {
cudf::jni::auto_set_device(env);
auto cs = reinterpret_cast<cudf::chunked_pack *>(chunked_pack);
delete cs;
}
CATCH_STD(env, );
}

JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ChunkedPack_chunkedPackGetTotalContiguousSize(
JNIEnv *env, jclass, jlong chunked_pack) {
try {
cudf::jni::auto_set_device(env);
auto cs = reinterpret_cast<cudf::chunked_pack *>(chunked_pack);
return cs->get_total_contiguous_size();
}
CATCH_STD(env, 0);
}

JNIEXPORT jboolean JNICALL Java_ai_rapids_cudf_ChunkedPack_chunkedPackHasNext(JNIEnv *env, jclass,
jlong chunked_pack) {
try {
cudf::jni::auto_set_device(env);
auto cs = reinterpret_cast<cudf::chunked_pack *>(chunked_pack);
return cs->has_next();
}
CATCH_STD(env, 0);
}

JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ChunkedPack_chunkedPackNext(JNIEnv *env, jclass,
jlong chunked_pack,
jlong user_ptr,
jlong user_ptr_size) {
try {
cudf::jni::auto_set_device(env);
auto cs = reinterpret_cast<cudf::chunked_pack *>(chunked_pack);
auto user_buffer_span = cudf::device_span<uint8_t>(reinterpret_cast<uint8_t *>(user_ptr),
static_cast<std::size_t>(user_ptr_size));
return cs->next(user_buffer_span);
}
CATCH_STD(env, 0);
}

JNIEXPORT jlong JNICALL
Java_ai_rapids_cudf_ChunkedPack_chunkedPackBuildMetadata(JNIEnv *env, jclass, jlong chunked_pack) {
try {
cudf::jni::auto_set_device(env);
auto cs = reinterpret_cast<cudf::chunked_pack *>(chunked_pack);
std::unique_ptr<std::vector<uint8_t>> result = cs->build_metadata();
return reinterpret_cast<jlong>(result.release());
}
CATCH_STD(env, 0);
}

} // extern "C"
Loading