-
Notifications
You must be signed in to change notification settings - Fork 919
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
JNI api for cudf::chunked_pack (#13278)
This PR is the standalone JNI side of #13260. Therefore it doesn't build in as is, but I am putting this up as draft for the Java reviewers to start taking a look. This implements a `ChunkedPack` java class mirroring the interface of `cudf::chunked_pack`. This is an iterator-like class that can be used to invoke `cudf::pack` (aka `cudf::contigous_split` without splits) over several iterations against a bounce buffer. In order to create a `ChunkedPack`, the user calls `makeChunkedPack` from a `Table` instance. During this call the user can also pass an `RmmDeviceMemoryResource` to be used internally by `cudf::chunked_pack` exclusively for scratch/temporary data. Authors: - Alessandro Bellina (https://github.com/abellina) Approvers: - Robert (Bobby) Evans (https://github.com/revans2) - Nghia Truong (https://github.com/ttnghia) URL: #13278
- Loading branch information
Showing
10 changed files
with
437 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
/* | ||
* Copyright (c) 2023, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
* | ||
*/ | ||
|
||
package ai.rapids.cudf; | ||
|
||
/** | ||
* JNI interface to cudf::chunked_pack. | ||
* | ||
* ChunkedPack has an Iterator-like API with the familiar `hasNext` and `next` | ||
* methods. `next` should be used in a loop until `hasNext` returns false. | ||
* | ||
* However, `ChunkedPack.next` is special because it takes a `DeviceMemoryBuffer` as a | ||
* parameter, which means that the caller can call `next` giving any bounce buffer it | ||
* may have previously allocated. No requirement exists that the bounce buffer be the | ||
* same each time, the only requirement is that their sizes are all the same, and match | ||
* the size that was passed to `Table.makeChunkedPack` (which instantiates this class). | ||
* | ||
* The user of `ChunkedPack` must close `.close()` when done using it to clear up both | ||
* host and device resources. | ||
*/ | ||
public class ChunkedPack implements AutoCloseable { | ||
long nativePtr; | ||
|
||
/** | ||
* This constructor is invoked by `Table.makeChunkedPack` after creating a native | ||
* `cudf::chunked_pack`. | ||
* @param nativePtr pointer to a `cudf::chunked_pack` | ||
*/ | ||
public ChunkedPack(long nativePtr) { | ||
this.nativePtr = nativePtr; | ||
} | ||
|
||
/** | ||
* Get the final contiguous size of the table we are packing. This is | ||
* the size that the final buffer should be, just like if the user called | ||
* `cudf::pack` instead. | ||
* @return the total number of bytes for the table in contiguous layout | ||
*/ | ||
public long getTotalContiguousSize() { | ||
return chunkedPackGetTotalContiguousSize(nativePtr); | ||
} | ||
|
||
/** | ||
* Method to be called to ensure that `ChunkedPack` has work left. | ||
* This method should be invoked followed by a call to `next`, until | ||
* `hasNext` returns false. | ||
* @return true if there is work left to be done (`next` should be called), | ||
* false otherwise. | ||
*/ | ||
public boolean hasNext() { | ||
return chunkedPackHasNext(nativePtr); | ||
} | ||
|
||
/** | ||
* Place the next contiguous chunk of our table into `userPtr`. | ||
* | ||
* This method throws if `hasNext` is false. | ||
* @param userPtr the bounce buffer to use for this iteration | ||
* @return the number of bytes that we were able to place in `userPtr`. This is | ||
* at most `userPtr.getLength()`. | ||
*/ | ||
public long next(DeviceMemoryBuffer userPtr) { | ||
return chunkedPackNext(nativePtr, userPtr.getAddress(), userPtr.getLength()); | ||
} | ||
|
||
/** | ||
* Generates opaque table metadata that can be unpacked via `cudf::unpack` | ||
* at a later time. | ||
* @return a `PackedColumnMetadata` instance referencing cuDF packed table metadata | ||
*/ | ||
public PackedColumnMetadata buildMetadata() { | ||
return new PackedColumnMetadata(chunkedPackBuildMetadata(nativePtr)); | ||
} | ||
|
||
@Override | ||
public void close() { | ||
chunkedPackDelete(nativePtr); | ||
} | ||
|
||
private static native long chunkedPackGetTotalContiguousSize(long nativePtr); | ||
private static native boolean chunkedPackHasNext(long nativePtr); | ||
private static native long chunkedPackNext(long nativePtr, long userPtr, long userPtrSize); | ||
private static native long chunkedPackBuildMetadata(long nativePtr); | ||
private static native void chunkedPackDelete(long nativePtr); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
74 changes: 74 additions & 0 deletions
74
java/src/main/java/ai/rapids/cudf/PackedColumnMetadata.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
/* | ||
* | ||
* Copyright (c) 2023, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
* | ||
*/ | ||
|
||
package ai.rapids.cudf; | ||
|
||
import java.nio.ByteBuffer; | ||
|
||
/** | ||
* Metadata for a table that is backed by a single contiguous device buffer. | ||
*/ | ||
public final class PackedColumnMetadata implements AutoCloseable { | ||
private long metadataHandle = 0; | ||
private ByteBuffer metadataBuffer = null; | ||
|
||
// This method is invoked by JNI | ||
static PackedColumnMetadata fromPackedColumnMeta(long metadataHandle) { | ||
return new PackedColumnMetadata(metadataHandle); | ||
} | ||
|
||
/** | ||
* Construct the PackedColumnMetadata instance given a metadata handle. | ||
* @param metadataHandle address of the cudf packed_table host-based metadata instance | ||
*/ | ||
PackedColumnMetadata(long metadataHandle) { | ||
this.metadataHandle = metadataHandle; | ||
} | ||
|
||
/** | ||
* Get the byte buffer containing the host metadata describing the schema and layout of the | ||
* contiguous table. | ||
* <p> | ||
* NOTE: This is a direct byte buffer that is backed by the underlying native metadata instance | ||
* and therefore is only valid to be used while this PackedColumnMetadata instance is valid. | ||
* Attempts to cache and access the resulting buffer after this instance has been destroyed | ||
* will result in undefined behavior including the possibility of segmentation faults | ||
* or data corruption. | ||
*/ | ||
public ByteBuffer getMetadataDirectBuffer() { | ||
if (metadataBuffer == null) { | ||
metadataBuffer = createMetadataDirectBuffer(metadataHandle); | ||
} | ||
return metadataBuffer.asReadOnlyBuffer(); | ||
} | ||
|
||
/** Close the PackedColumnMetadata instance and its underlying resources. */ | ||
@Override | ||
public void close() { | ||
if (metadataHandle != 0) { | ||
closeMetadata(metadataHandle); | ||
metadataHandle = 0; | ||
} | ||
} | ||
|
||
// create a DirectByteBuffer for the packed metadata | ||
private static native ByteBuffer createMetadataDirectBuffer(long metadataHandle); | ||
|
||
// release the native metadata resources for a packed table | ||
private static native void closeMetadata(long metadataHandle); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
/* | ||
* Copyright (c) 2023, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#include "cudf_jni_apis.hpp" | ||
|
||
extern "C" { | ||
JNIEXPORT void JNICALL Java_ai_rapids_cudf_ChunkedPack_chunkedPackDelete(JNIEnv *env, jclass, | ||
jlong chunked_pack) { | ||
try { | ||
cudf::jni::auto_set_device(env); | ||
auto cs = reinterpret_cast<cudf::chunked_pack *>(chunked_pack); | ||
delete cs; | ||
} | ||
CATCH_STD(env, ); | ||
} | ||
|
||
JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ChunkedPack_chunkedPackGetTotalContiguousSize( | ||
JNIEnv *env, jclass, jlong chunked_pack) { | ||
try { | ||
cudf::jni::auto_set_device(env); | ||
auto cs = reinterpret_cast<cudf::chunked_pack *>(chunked_pack); | ||
return cs->get_total_contiguous_size(); | ||
} | ||
CATCH_STD(env, 0); | ||
} | ||
|
||
JNIEXPORT jboolean JNICALL Java_ai_rapids_cudf_ChunkedPack_chunkedPackHasNext(JNIEnv *env, jclass, | ||
jlong chunked_pack) { | ||
try { | ||
cudf::jni::auto_set_device(env); | ||
auto cs = reinterpret_cast<cudf::chunked_pack *>(chunked_pack); | ||
return cs->has_next(); | ||
} | ||
CATCH_STD(env, 0); | ||
} | ||
|
||
JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ChunkedPack_chunkedPackNext(JNIEnv *env, jclass, | ||
jlong chunked_pack, | ||
jlong user_ptr, | ||
jlong user_ptr_size) { | ||
try { | ||
cudf::jni::auto_set_device(env); | ||
auto cs = reinterpret_cast<cudf::chunked_pack *>(chunked_pack); | ||
auto user_buffer_span = cudf::device_span<uint8_t>(reinterpret_cast<uint8_t *>(user_ptr), | ||
static_cast<std::size_t>(user_ptr_size)); | ||
return cs->next(user_buffer_span); | ||
} | ||
CATCH_STD(env, 0); | ||
} | ||
|
||
JNIEXPORT jlong JNICALL | ||
Java_ai_rapids_cudf_ChunkedPack_chunkedPackBuildMetadata(JNIEnv *env, jclass, jlong chunked_pack) { | ||
try { | ||
cudf::jni::auto_set_device(env); | ||
auto cs = reinterpret_cast<cudf::chunked_pack *>(chunked_pack); | ||
std::unique_ptr<std::vector<uint8_t>> result = cs->build_metadata(); | ||
return reinterpret_cast<jlong>(result.release()); | ||
} | ||
CATCH_STD(env, 0); | ||
} | ||
|
||
} // extern "C" |
Oops, something went wrong.