Skip to content

Commit

Permalink
Ensure columns have valid null counts in CUDF JNI. (#13355)
Browse files Browse the repository at this point in the history
Fixes #13353.
Depends on #13345.
    
In preparation for #11968, this change ensures that columns constructed from CUDF JNI do not have their null counts set to `UNKNOWN_NULL_COUNT` (i.e. `-1`). In cases where the caller invokes JNI functions with `UNKNOWN_NULL_COUNT`, the JNI layer computes the concrete null count from the validity mask, and sets this value in the column.

The current Java API remains unchanged; there should be no impact to user code.
    
The option to specify an optional null count through the Java API will likely be removed at a later date.
    
Signed-off-by: MithunR <[email protected]>

Authors:
  - MithunR (https://github.com/mythrocks)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Jason Lowe (https://github.com/jlowe)
  - Nghia Truong (https://github.com/ttnghia)

URL: #13355
  • Loading branch information
mythrocks authored May 18, 2023
1 parent e4d2a23 commit 12060af
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 9 deletions.
10 changes: 5 additions & 5 deletions java/src/main/java/ai/rapids/cudf/ColumnVector.java
Original file line number Diff line number Diff line change
Expand Up @@ -205,16 +205,16 @@ private static long getColumnViewFromColumn(long nativePointer) {
}
}

static long initViewHandle(DType type, int rows, int nc,
BaseDeviceMemoryBuffer dataBuffer,
BaseDeviceMemoryBuffer validityBuffer,
BaseDeviceMemoryBuffer offsetBuffer, long[] childHandles) {
static long initViewHandle(DType type, int numRows, int nullCount,
BaseDeviceMemoryBuffer dataBuffer,
BaseDeviceMemoryBuffer validityBuffer,
BaseDeviceMemoryBuffer offsetBuffer, long[] childHandles) {
long cd = dataBuffer == null ? 0 : dataBuffer.address;
long cdSize = dataBuffer == null ? 0 : dataBuffer.length;
long od = offsetBuffer == null ? 0 : offsetBuffer.address;
long vd = validityBuffer == null ? 0 : validityBuffer.address;
return makeCudfColumnView(type.typeId.getNativeId(), type.getScale(), cd, cdSize,
od, vd, nc, rows, childHandles);
od, vd, nullCount, numRows, childHandles);
}

static ColumnVector fromViewWithContiguousAllocation(long columnViewAddress, DeviceMemoryBuffer buffer) {
Expand Down
5 changes: 5 additions & 0 deletions java/src/main/native/src/ColumnViewJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1904,6 +1904,11 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_makeCudfColumnView(
j_null_count = 0;
}

if (j_null_count < 0) { // Check for unknown null count.
// Calculate concrete null count.
j_null_count = cudf::null_count(valid, 0, size);
}

if (n_type == cudf::type_id::STRING) {
if (size == 0) {
return ptr_as_jlong(
Expand Down
9 changes: 5 additions & 4 deletions java/src/main/native/src/row_conversion.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2257,11 +2257,12 @@ std::unique_ptr<table> convert_from_rows(lists_column_view const &input,
for (int i = 0; i < static_cast<int>(schema.size()); ++i) {
if (schema[i].id() == type_id::STRING) {
// stuff real string column
auto const null_count = string_row_offset_columns[string_idx]->null_count();
auto string_data = string_row_offset_columns[string_idx].release()->release();
output_columns[i] = make_strings_column(num_rows, std::move(string_col_offsets[string_idx]),
std::move(string_data_cols[string_idx]),
std::move(*string_data.null_mask.release()),
cudf::UNKNOWN_NULL_COUNT);
output_columns[i] =
make_strings_column(num_rows, std::move(string_col_offsets[string_idx]),
std::move(string_data_cols[string_idx]),
std::move(*string_data.null_mask.release()), null_count);
string_idx++;
}
}
Expand Down
17 changes: 17 additions & 0 deletions java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -6749,4 +6749,21 @@ public void testEventHandlerIsNotCalledIfNotSet() {
}
assertEquals(0, onClosedWasCalled.get());
}

/**
* Test that the ColumnView with unknown null-counts still returns
* the correct null-count when queried.
*/
@Test
public void testColumnViewNullCount() {
try (ColumnVector vector = ColumnVector.fromBoxedInts(1, 2, null, 3, null, 4, null, 5, null, 6);
ColumnView view = new ColumnView(DType.INT32,
vector.getRowCount(),
Optional.empty(), // Unknown null count.
vector.getDeviceBufferFor(BufferType.DATA),
vector.getDeviceBufferFor(BufferType.VALIDITY),
vector.getDeviceBufferFor(BufferType.OFFSET))) {
assertEquals(vector.getNullCount(), view.getNullCount());
}
}
}

0 comments on commit 12060af

Please sign in to comment.