Skip to content

Commit

Permalink
Add JNI methods for detecting and purging non-empty nulls from LIST a…
Browse files Browse the repository at this point in the history
…nd STRUCT (#12742)

This PR adds methods for detecting and purging non-empty nulls.

Authors:
  - Raza Jafri (https://github.com/razajafri)
  - Nghia Truong (https://github.com/ttnghia)
  - AJ Schmidt (https://github.com/ajschmidt8)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)

URL: #12742
  • Loading branch information
razajafri authored Feb 27, 2023
1 parent 173459e commit ac1cac6
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 0 deletions.
37 changes: 37 additions & 0 deletions java/src/main/java/ai/rapids/cudf/ColumnView.java
Original file line number Diff line number Diff line change
Expand Up @@ -4639,6 +4639,10 @@ static native long makeCudfColumnView(int type, int scale, long data, long dataS

static native long applyBooleanMask(long arrayColumnView, long booleanMaskHandle) throws CudfException;

static native boolean hasNonEmptyNulls(long handle) throws CudfException;

static native long purgeNonEmptyNulls(long handle) throws CudfException;

/**
* A utility class to create column vector like objects without refcounts and other APIs when
* creating the device side vector from host side nested vectors. Eventually this can go away or
Expand Down Expand Up @@ -4997,4 +5001,37 @@ public HostColumnVector copyToHost() {
}
}
}

/**
* Exact check if a column or its descendants have non-empty null rows
*
* @return Whether the column or its descendants have non-empty null rows
*/
public boolean hasNonEmptyNulls() {
return hasNonEmptyNulls(viewHandle);
}

/**
* Copies this column into output while purging any non-empty null rows in the column or its
* descendants.
*
* If this column is not of compound type (LIST/STRING/STRUCT/DICTIONARY), the output will be
* the same as input.
*
* The purge operation only applies directly to LIST and STRING columns, but it applies indirectly
* to STRUCT/DICTIONARY columns as well, since these columns may have child columns that
* are LIST or STRING.
*
* Examples:
* lists = data: [{{0,1}, {2,3}, {4,5}} validity: {true, false, true}]
* lists[1] is null, but the list's child column still stores `{2,3}`.
*
* After purging the contents of the list's null rows, the column's contents will be:
* lists = [data: {{0,1}, {4,5}} validity: {true, false, true}]
*
* @return A new column with equivalent contents to `input`, but with null rows purged
*/
public ColumnVector purgeNonEmptyNulls() {
return new ColumnVector(purgeNonEmptyNulls(viewHandle));
}
}
22 changes: 22 additions & 0 deletions java/src/main/native/src/ColumnViewJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2457,4 +2457,26 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_applyBooleanMask(
CATCH_STD(env, 0);
}

JNIEXPORT jboolean JNICALL
Java_ai_rapids_cudf_ColumnView_hasNonEmptyNulls(JNIEnv *env, jclass, jlong column_view_handle) {
JNI_NULL_CHECK(env, column_view_handle, "column_view handle is null", 0);
try {
cudf::jni::auto_set_device(env);
auto const *cv = reinterpret_cast<cudf::column_view const *>(column_view_handle);
return cudf::has_nonempty_nulls(*cv);
}
CATCH_STD(env, 0);
}

JNIEXPORT jlong JNICALL
Java_ai_rapids_cudf_ColumnView_purgeNonEmptyNulls(JNIEnv *env, jclass, jlong column_view_handle) {
JNI_NULL_CHECK(env, column_view_handle, "column_view handle is null", 0);
try {
cudf::jni::auto_set_device(env);
auto const *cv = reinterpret_cast<cudf::column_view const *>(column_view_handle);
return release_as_jlong(cudf::purge_nonempty_nulls(*cv));
}
CATCH_STD(env, 0);
}

} // extern "C"
62 changes: 62 additions & 0 deletions java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
Expand Down Expand Up @@ -6691,4 +6692,65 @@ void testApplyBooleanMaskFromListOfStructure() {
assertColumnsAreEqual(expectedCv, actualCv);
}
}

/**
* The caller needs to make sure to close the returned ColumnView
*/
private ColumnView[] getColumnViewWithNonEmptyNulls() {
List<Integer> list0 = Arrays.asList(1, 2, 3);
List<Integer> list1 = Arrays.asList(4, 5, null);
List<Integer> list2 = Arrays.asList(7, 8, 9);
List<Integer> list3 = null;
ColumnVector input = makeListsColumn(DType.INT32, list0, list1, list2, list3);
// Modify the validity buffer
BaseDeviceMemoryBuffer dmb = input.getDeviceBufferFor(BufferType.VALIDITY);
try (HostMemoryBuffer newValidity = HostMemoryBuffer.allocate(64)) {
newValidity.copyFromDeviceBuffer(dmb);
BitVectorHelper.setNullAt(newValidity, 1);
dmb.copyFromHostBuffer(newValidity);
}
try (HostColumnVector hostColumnVector = input.copyToHost()) {
assert (hostColumnVector.isNull(1));
assert (hostColumnVector.isNull(3));
}
try (ColumnVector expectedOffsetsBeforePurge = ColumnVector.fromInts(0, 3, 6, 9, 9)) {
ColumnView offsetsCvBeforePurge = input.getListOffsetsView();
assertColumnsAreEqual(expectedOffsetsBeforePurge, offsetsCvBeforePurge);
}
ColumnView colWithNonEmptyNulls = new ColumnView(input.type, input.rows, Optional.of(2L), dmb,
input.getDeviceBufferFor(BufferType.OFFSET), input.getChildColumnViews());
assertEquals(2, colWithNonEmptyNulls.nullCount);
return new ColumnView[]{input, colWithNonEmptyNulls};
}

@Test
void testPurgeNonEmptyNullsList() {
ColumnView[] values = getColumnViewWithNonEmptyNulls();
try (ColumnView colWithNonEmptyNulls = values[1];
ColumnView input = values[0];
// purge non-empty nulls
ColumnView colWithEmptyNulls = colWithNonEmptyNulls.purgeNonEmptyNulls();
ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6);
ColumnView offsetsCvAfterPurge = colWithEmptyNulls.getListOffsetsView()) {
assertTrue(colWithNonEmptyNulls.hasNonEmptyNulls());
assertColumnsAreEqual(expectedOffsetsAfterPurge, offsetsCvAfterPurge);
assertFalse(colWithEmptyNulls.hasNonEmptyNulls());
}
}

@Test
void testPurgeNonEmptyNullsStruct() {
ColumnView[] values = getColumnViewWithNonEmptyNulls();
try (ColumnView listCol = values[1];
ColumnView input = values[0];
ColumnView stringsCol = ColumnVector.fromStrings("A", "col", "of", "Strings");
ColumnView structView = ColumnView.makeStructView(stringsCol, listCol);
ColumnView structWithEmptyNulls = structView.purgeNonEmptyNulls();
ColumnView newListChild = structWithEmptyNulls.getChildColumnView(1);
ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6);
ColumnView offsetsCvAfterPurge = newListChild.getListOffsetsView()) {
assertColumnsAreEqual(expectedOffsetsAfterPurge, offsetsCvAfterPurge);
assertFalse(newListChild.hasNonEmptyNulls());
}
}
}

0 comments on commit ac1cac6

Please sign in to comment.