From 0f72ac797f2e4b2d30f87d03ed3db042e33d4a51 Mon Sep 17 00:00:00 2001 From: Raza Jafri Date: Wed, 8 Feb 2023 20:23:37 -0800 Subject: [PATCH 1/9] Added Jni methods for detecting and purging non-empty nulls --- .../main/java/ai/rapids/cudf/ColumnView.java | 59 +++++++++++++++++ java/src/main/native/src/ColumnViewJni.cpp | 42 ++++++++++++ .../java/ai/rapids/cudf/ColumnVectorTest.java | 64 +++++++++++++++++++ 3 files changed, 165 insertions(+) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index a28c01fc9c0..9a0545c146b 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -4474,6 +4474,12 @@ static native long makeCudfColumnView(int type, int scale, long data, long dataS static native long applyBooleanMask(long arrayColumnView, long booleanMaskHandle) throws CudfException; + static native boolean hasNonEmptyNulls(long handle) throws CudfException; + + static native boolean mayHaveNonEmptyNulls(long handle) throws CudfException; + + static native long purgeNonEmptyNulls(long handle) throws CudfException; + /** * A utility class to create column vector like objects without refcounts and other APIs when * creating the device side vector from host side nested vectors. Eventually this can go away or @@ -4832,4 +4838,57 @@ public HostColumnVector copyToHost() { } } } + + /** + * Exact check if a column or its descendants have non-empty null rows + * + * @return Whether the column or its descendants have non-empty null rows + */ + public boolean hasNonEmptyNulls() { + assert type.isNestedType() || type.hasOffsets() : "Columns should be a LIST, STRING or STRUCT"; + return hasNonEmptyNulls(viewHandle); + } + + /** + * Approximates if a column or its descendants *may* have non-empty null elements + * Compared to the exact `has_nonempty_nulls()` function, this function is typically + * more efficient. + * + * Complexity: + * - Best case: `O(count_descendants(input))` + * - Worst case: `O(count_descendants(input)) * m`, where `m` is the number of rows in the largest + * descendant + * + * @return true, if either the column or its descendants MIGHT have null rows + * false, if neither the column nor its descendants have null rows + */ + public boolean mayHaveNonEmptyNulls() { + assert type.isNestedType() || type.hasOffsets() : "Columns should be a LIST, STRING or STRUCT"; + return mayHaveNonEmptyNulls(viewHandle); + } + + /** + * Copies this column into output while purging any non-empty null rows in the column or its + * descendants. + * + * If this column is not of compound type (LIST/STRING/STRUCT/DICTIONARY), the output will be + * the same as input. + * + * The purge operation only applies directly to LIST and STRING columns, but it applies indirectly + * to STRUCT/DICTIONARY columns as well, since these columns may have child columns that + * are LIST or STRING. + * + * Examples: + * lists = data: [{{0,1}, {2,3}, {4,5}} validity: {true, false, true}] + * lists[1] is null, but the list's child column still stores `{2,3}`. + * + * After purging the contents of the list's null rows, the column's contents will be: + * lists = [data: {{0,1}, {4,5}} validity: {true, false, true}] + * + * @return A new column with equivalent contents to `input`, but with null rows purged + */ + public ColumnVector purgeNonEmptyNulls() { + assert type.isNestedType() || type.hasOffsets() : "Columns should be a LIST, STRING or STRUCT"; + return new ColumnVector(purgeNonEmptyNulls(viewHandle)); + } } diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 8819d35ed44..821b26b2776 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -2421,4 +2421,46 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_applyBooleanMask( CATCH_STD(env, 0); } +JNIEXPORT jboolean JNICALL Java_ai_rapids_cudf_ColumnView_hasNonEmptyNulls( + JNIEnv *env, jclass, jlong column_view_handle) { + JNI_NULL_CHECK(env, column_view_handle, "column_view handle is null", 0); + try { + cudf::jni::auto_set_device(env); + + cudf::column_view const *cv = + reinterpret_cast(column_view_handle); + + return cudf::has_nonempty_nulls(*cv); + } + CATCH_STD(env, 0); +} + +JNIEXPORT jboolean JNICALL Java_ai_rapids_cudf_ColumnView_mayHaveNonEmptyNulls( + JNIEnv *env, jclass, jlong column_view_handle) { + JNI_NULL_CHECK(env, column_view_handle, "column_view handle is null", 0); + try { + cudf::jni::auto_set_device(env); + + cudf::column_view const *cv = + reinterpret_cast(column_view_handle); + + return cudf::may_have_nonempty_nulls(*cv); + } + CATCH_STD(env, 0); +} + +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_purgeNonEmptyNulls( + JNIEnv *env, jclass, jlong column_view_handle) { + JNI_NULL_CHECK(env, column_view_handle, "column_view handle is null", 0); + try { + cudf::jni::auto_set_device(env); + + cudf::column_view const *cv = + reinterpret_cast(column_view_handle); + + return release_as_jlong(cudf::purge_nonempty_nulls(*cv)); + } + CATCH_STD(env, 0); +} + } // extern "C" diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 614561a8aa8..0bbb63891b3 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -32,6 +32,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.Optional; import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -6650,4 +6651,67 @@ void testApplyBooleanMaskFromListOfStructure() { assertColumnsAreEqual(expectedCv, actualCv); } } + + private ColumnView getColumnViewWithNonEmptyNulls() { + List list0 = Arrays.asList(1, 2, 3); + List list1 = Arrays.asList(4, 5, null); + List list2 = Arrays.asList(7, 8, 9); + List list3 = null; + ColumnVector input = makeListsColumn(DType.INT32, list0, list1, list2, list3); + + // Modify the validity buffer + BaseDeviceMemoryBuffer dmb = input.getDeviceBufferFor(BufferType.VALIDITY); + HostMemoryBuffer newValidity = HostMemoryBuffer.allocate(64); + newValidity.copyFromDeviceBuffer(dmb); + BitVectorHelper.setNullAt(newValidity, 1); + dmb.copyFromHostBuffer(newValidity); + + HostColumnVector hostColumnVector = input.copyToHost(); + assert(hostColumnVector.isNull(1)); + assert(hostColumnVector.isNull(3)); + + ColumnVector expectedOffsetsBeforePurge = ColumnVector.fromInts(0, 3, 6, 9, 9); + ColumnView offsetsCvBeforePurge = input.getListOffsetsView(); + assertColumnsAreEqual(expectedOffsetsBeforePurge, offsetsCvBeforePurge); + ColumnView colWithNonEmptyNulls = new ColumnView(input.type, input.rows, Optional.of(2L), dmb, + input.getDeviceBufferFor(BufferType.OFFSET), input.getChildColumnViews()); + assertEquals(2, colWithNonEmptyNulls.nullCount); + return colWithNonEmptyNulls; + } + + @Test + void testPurgeNonEmptyNullsList() { + ColumnView colWithNonEmptyNulls = getColumnViewWithNonEmptyNulls(); + // purge non-empty nulls + assertTrue(colWithNonEmptyNulls.hasNonEmptyNulls()); + ColumnView colWithEmptyNulls = colWithNonEmptyNulls.purgeNonEmptyNulls(); + ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6); + ColumnView offsetsCvAfterPurge = colWithEmptyNulls.getListOffsetsView(); + assertColumnsAreEqual(expectedOffsetsAfterPurge, offsetsCvAfterPurge); + assertFalse(colWithEmptyNulls.hasNonEmptyNulls()); + } + + /** + * Since ColumnView#mayHaveNonEmptyNulls is a non-deterministic method, this test is only here to + * make sure we have piped everything correctly. Cudf already should have tests for this + */ + @Test + void testMayHaveNonEmptyNulls() { + ColumnView colWithNonEmptyNulls = getColumnViewWithNonEmptyNulls(); + assertTrue(colWithNonEmptyNulls.mayHaveNonEmptyNulls() || + colWithNonEmptyNulls.hasNonEmptyNulls()); + } + + @Test + void testPurgeNonEmptyNullsStruct() { + ColumnView listCol = getColumnViewWithNonEmptyNulls(); + ColumnView stringsCol = ColumnVector.fromStrings("A", "col", "of", "Strings"); + ColumnView structView = ColumnView.makeStructView(stringsCol, listCol); + ColumnView structWithEmptyNulls = structView.purgeNonEmptyNulls(); + ColumnView newListChild = structWithEmptyNulls.getChildColumnView(1); + ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6); + ColumnView offsetsCvAfterPurge = newListChild.getListOffsetsView(); + assertColumnsAreEqual(expectedOffsetsAfterPurge, offsetsCvAfterPurge); + assertFalse(newListChild.hasNonEmptyNulls()); + } } From 7abf38460ffeb3d69b12f2b300ccbbbbd8c0fa4a Mon Sep 17 00:00:00 2001 From: Raza Jafri Date: Thu, 9 Feb 2023 10:14:28 -0800 Subject: [PATCH 2/9] fix formatting --- java/src/main/native/src/ColumnViewJni.cpp | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 77217a39e54..70eee69f75f 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -2462,10 +2462,7 @@ JNIEXPORT jboolean JNICALL Java_ai_rapids_cudf_ColumnView_hasNonEmptyNulls( JNI_NULL_CHECK(env, column_view_handle, "column_view handle is null", 0); try { cudf::jni::auto_set_device(env); - - cudf::column_view const *cv = - reinterpret_cast(column_view_handle); - + cudf::column_view const *cv = reinterpret_cast(column_view_handle); return cudf::has_nonempty_nulls(*cv); } CATCH_STD(env, 0); @@ -2476,10 +2473,7 @@ JNIEXPORT jboolean JNICALL Java_ai_rapids_cudf_ColumnView_mayHaveNonEmptyNulls( JNI_NULL_CHECK(env, column_view_handle, "column_view handle is null", 0); try { cudf::jni::auto_set_device(env); - - cudf::column_view const *cv = - reinterpret_cast(column_view_handle); - + cudf::column_view const *cv = reinterpret_cast(column_view_handle); return cudf::may_have_nonempty_nulls(*cv); } CATCH_STD(env, 0); @@ -2490,10 +2484,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_purgeNonEmptyNulls( JNI_NULL_CHECK(env, column_view_handle, "column_view handle is null", 0); try { cudf::jni::auto_set_device(env); - - cudf::column_view const *cv = - reinterpret_cast(column_view_handle); - + cudf::column_view const *cv = reinterpret_cast(column_view_handle); return release_as_jlong(cudf::purge_nonempty_nulls(*cv)); } CATCH_STD(env, 0); From 097a09a2101a237a5b937d9a69a1e436fc31b2e9 Mon Sep 17 00:00:00 2001 From: Raza Jafri Date: Thu, 9 Feb 2023 15:38:58 -0800 Subject: [PATCH 3/9] addressed review comments --- .../main/java/ai/rapids/cudf/ColumnView.java | 20 ------------------- java/src/main/native/src/ColumnViewJni.cpp | 15 ++------------ .../java/ai/rapids/cudf/ColumnVectorTest.java | 11 ---------- 3 files changed, 2 insertions(+), 44 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index cffd69b348e..530d456eddc 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -5010,28 +5010,9 @@ public HostColumnVector copyToHost() { * @return Whether the column or its descendants have non-empty null rows */ public boolean hasNonEmptyNulls() { - assert type.isNestedType() || type.hasOffsets() : "Columns should be a LIST, STRING or STRUCT"; return hasNonEmptyNulls(viewHandle); } - /** - * Approximates if a column or its descendants *may* have non-empty null elements - * Compared to the exact `has_nonempty_nulls()` function, this function is typically - * more efficient. - * - * Complexity: - * - Best case: `O(count_descendants(input))` - * - Worst case: `O(count_descendants(input)) * m`, where `m` is the number of rows in the largest - * descendant - * - * @return true, if either the column or its descendants MIGHT have null rows - * false, if neither the column nor its descendants have null rows - */ - public boolean mayHaveNonEmptyNulls() { - assert type.isNestedType() || type.hasOffsets() : "Columns should be a LIST, STRING or STRUCT"; - return mayHaveNonEmptyNulls(viewHandle); - } - /** * Copies this column into output while purging any non-empty null rows in the column or its * descendants. @@ -5053,7 +5034,6 @@ public boolean mayHaveNonEmptyNulls() { * @return A new column with equivalent contents to `input`, but with null rows purged */ public ColumnVector purgeNonEmptyNulls() { - assert type.isNestedType() || type.hasOffsets() : "Columns should be a LIST, STRING or STRUCT"; return new ColumnVector(purgeNonEmptyNulls(viewHandle)); } } diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 70eee69f75f..a51daf9f31d 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -2462,29 +2462,18 @@ JNIEXPORT jboolean JNICALL Java_ai_rapids_cudf_ColumnView_hasNonEmptyNulls( JNI_NULL_CHECK(env, column_view_handle, "column_view handle is null", 0); try { cudf::jni::auto_set_device(env); - cudf::column_view const *cv = reinterpret_cast(column_view_handle); + auto const *cv = reinterpret_cast(column_view_handle); return cudf::has_nonempty_nulls(*cv); } CATCH_STD(env, 0); } -JNIEXPORT jboolean JNICALL Java_ai_rapids_cudf_ColumnView_mayHaveNonEmptyNulls( - JNIEnv *env, jclass, jlong column_view_handle) { - JNI_NULL_CHECK(env, column_view_handle, "column_view handle is null", 0); - try { - cudf::jni::auto_set_device(env); - cudf::column_view const *cv = reinterpret_cast(column_view_handle); - return cudf::may_have_nonempty_nulls(*cv); - } - CATCH_STD(env, 0); -} - JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_purgeNonEmptyNulls( JNIEnv *env, jclass, jlong column_view_handle) { JNI_NULL_CHECK(env, column_view_handle, "column_view handle is null", 0); try { cudf::jni::auto_set_device(env); - cudf::column_view const *cv = reinterpret_cast(column_view_handle); + auto const *cv = reinterpret_cast(column_view_handle); return release_as_jlong(cudf::purge_nonempty_nulls(*cv)); } CATCH_STD(env, 0); diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index be172bc0821..4b373122ac9 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -6732,17 +6732,6 @@ void testPurgeNonEmptyNullsList() { assertFalse(colWithEmptyNulls.hasNonEmptyNulls()); } - /** - * Since ColumnView#mayHaveNonEmptyNulls is a non-deterministic method, this test is only here to - * make sure we have piped everything correctly. Cudf already should have tests for this - */ - @Test - void testMayHaveNonEmptyNulls() { - ColumnView colWithNonEmptyNulls = getColumnViewWithNonEmptyNulls(); - assertTrue(colWithNonEmptyNulls.mayHaveNonEmptyNulls() || - colWithNonEmptyNulls.hasNonEmptyNulls()); - } - @Test void testPurgeNonEmptyNullsStruct() { ColumnView listCol = getColumnViewWithNonEmptyNulls(); From dac0b21a2381c9af35928ac4c02ec7b2ca45547a Mon Sep 17 00:00:00 2001 From: Raza Jafri Date: Thu, 9 Feb 2023 16:17:49 -0800 Subject: [PATCH 4/9] removed the unused native method --- java/src/main/java/ai/rapids/cudf/ColumnView.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 530d456eddc..84183819854 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -4641,8 +4641,6 @@ static native long makeCudfColumnView(int type, int scale, long data, long dataS static native boolean hasNonEmptyNulls(long handle) throws CudfException; - static native boolean mayHaveNonEmptyNulls(long handle) throws CudfException; - static native long purgeNonEmptyNulls(long handle) throws CudfException; /** From d9bd29a6e7a382564790492b6cebe75e4d1cefc9 Mon Sep 17 00:00:00 2001 From: Raza Jafri Date: Thu, 9 Feb 2023 16:30:06 -0800 Subject: [PATCH 5/9] reformatted --- java/src/main/native/src/ColumnViewJni.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index a51daf9f31d..6ac9fd995bc 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -2457,8 +2457,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_applyBooleanMask( CATCH_STD(env, 0); } -JNIEXPORT jboolean JNICALL Java_ai_rapids_cudf_ColumnView_hasNonEmptyNulls( - JNIEnv *env, jclass, jlong column_view_handle) { +JNIEXPORT jboolean JNICALL +Java_ai_rapids_cudf_ColumnView_hasNonEmptyNulls(JNIEnv *env, jclass, jlong column_view_handle) { JNI_NULL_CHECK(env, column_view_handle, "column_view handle is null", 0); try { cudf::jni::auto_set_device(env); @@ -2468,10 +2468,10 @@ JNIEXPORT jboolean JNICALL Java_ai_rapids_cudf_ColumnView_hasNonEmptyNulls( CATCH_STD(env, 0); } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_purgeNonEmptyNulls( - JNIEnv *env, jclass, jlong column_view_handle) { - JNI_NULL_CHECK(env, column_view_handle, "column_view handle is null", 0); - try { +JNIEXPORT jlong JNICALL +Java_ai_rapids_cudf_ColumnView_purgeNonEmptyNulls(JNIEnv *env, jclass, jlong column_view_handle) { + JNI_NULL_CHECK(env, column_view_handle, "column_view handle is null", 0); + try { cudf::jni::auto_set_device(env); auto const *cv = reinterpret_cast(column_view_handle); return release_as_jlong(cudf::purge_nonempty_nulls(*cv)); From 0a77f17571df7cf7032c879da837f1abd20a954e Mon Sep 17 00:00:00 2001 From: Raza Jafri Date: Mon, 13 Feb 2023 12:50:28 -0800 Subject: [PATCH 6/9] reformat --- java/src/main/native/src/ColumnViewJni.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 6ac9fd995bc..f2c361c5e8c 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -2470,8 +2470,8 @@ Java_ai_rapids_cudf_ColumnView_hasNonEmptyNulls(JNIEnv *env, jclass, jlong colum JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_purgeNonEmptyNulls(JNIEnv *env, jclass, jlong column_view_handle) { - JNI_NULL_CHECK(env, column_view_handle, "column_view handle is null", 0); - try { + JNI_NULL_CHECK(env, column_view_handle, "column_view handle is null", 0); + try { cudf::jni::auto_set_device(env); auto const *cv = reinterpret_cast(column_view_handle); return release_as_jlong(cudf::purge_nonempty_nulls(*cv)); From fbb5e490c0a7118ab1ae83d3eaff765ec01b0601 Mon Sep 17 00:00:00 2001 From: Raza Jafri Date: Fri, 24 Feb 2023 10:07:21 -0800 Subject: [PATCH 7/9] closing the vectors --- .../java/ai/rapids/cudf/ColumnVectorTest.java | 39 +++++++++++-------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index d2b4a987fc9..256363ae9ec 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -6693,6 +6693,9 @@ void testApplyBooleanMaskFromListOfStructure() { } } + /** + * The caller needs to make sure to close the returned ColumnView + */ private ColumnView getColumnViewWithNonEmptyNulls() { List list0 = Arrays.asList(1, 2, 3); List list1 = Arrays.asList(4, 5, null); @@ -6722,26 +6725,28 @@ private ColumnView getColumnViewWithNonEmptyNulls() { @Test void testPurgeNonEmptyNullsList() { - ColumnView colWithNonEmptyNulls = getColumnViewWithNonEmptyNulls(); - // purge non-empty nulls - assertTrue(colWithNonEmptyNulls.hasNonEmptyNulls()); - ColumnView colWithEmptyNulls = colWithNonEmptyNulls.purgeNonEmptyNulls(); - ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6); - ColumnView offsetsCvAfterPurge = colWithEmptyNulls.getListOffsetsView(); - assertColumnsAreEqual(expectedOffsetsAfterPurge, offsetsCvAfterPurge); - assertFalse(colWithEmptyNulls.hasNonEmptyNulls()); + try (ColumnView colWithNonEmptyNulls = getColumnViewWithNonEmptyNulls(); + // purge non-empty nulls + ColumnView colWithEmptyNulls = colWithNonEmptyNulls.purgeNonEmptyNulls(); + ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6); + ColumnView offsetsCvAfterPurge = colWithEmptyNulls.getListOffsetsView()) { + assertTrue(colWithNonEmptyNulls.hasNonEmptyNulls()); + assertColumnsAreEqual(expectedOffsetsAfterPurge, offsetsCvAfterPurge); + assertFalse(colWithEmptyNulls.hasNonEmptyNulls()); + } } @Test void testPurgeNonEmptyNullsStruct() { - ColumnView listCol = getColumnViewWithNonEmptyNulls(); - ColumnView stringsCol = ColumnVector.fromStrings("A", "col", "of", "Strings"); - ColumnView structView = ColumnView.makeStructView(stringsCol, listCol); - ColumnView structWithEmptyNulls = structView.purgeNonEmptyNulls(); - ColumnView newListChild = structWithEmptyNulls.getChildColumnView(1); - ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6); - ColumnView offsetsCvAfterPurge = newListChild.getListOffsetsView(); - assertColumnsAreEqual(expectedOffsetsAfterPurge, offsetsCvAfterPurge); - assertFalse(newListChild.hasNonEmptyNulls()); + try (ColumnView listCol = getColumnViewWithNonEmptyNulls(); + ColumnView stringsCol = ColumnVector.fromStrings("A", "col", "of", "Strings"); + ColumnView structView = ColumnView.makeStructView(stringsCol, listCol); + ColumnView structWithEmptyNulls = structView.purgeNonEmptyNulls(); + ColumnView newListChild = structWithEmptyNulls.getChildColumnView(1); + ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6); + ColumnView offsetsCvAfterPurge = newListChild.getListOffsetsView()) { + assertColumnsAreEqual(expectedOffsetsAfterPurge, offsetsCvAfterPurge); + assertFalse(newListChild.hasNonEmptyNulls()); + } } } From 508b9e698cff6a2accd062ddd9b679b86d16af75 Mon Sep 17 00:00:00 2001 From: Raza Jafri Date: Fri, 24 Feb 2023 17:20:44 -0800 Subject: [PATCH 8/9] release local allocation --- .../java/ai/rapids/cudf/ColumnVectorTest.java | 41 ++++++++++--------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 256363ae9ec..9dac006214f 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -6701,26 +6701,27 @@ private ColumnView getColumnViewWithNonEmptyNulls() { List list1 = Arrays.asList(4, 5, null); List list2 = Arrays.asList(7, 8, 9); List list3 = null; - ColumnVector input = makeListsColumn(DType.INT32, list0, list1, list2, list3); - - // Modify the validity buffer - BaseDeviceMemoryBuffer dmb = input.getDeviceBufferFor(BufferType.VALIDITY); - HostMemoryBuffer newValidity = HostMemoryBuffer.allocate(64); - newValidity.copyFromDeviceBuffer(dmb); - BitVectorHelper.setNullAt(newValidity, 1); - dmb.copyFromHostBuffer(newValidity); - - HostColumnVector hostColumnVector = input.copyToHost(); - assert(hostColumnVector.isNull(1)); - assert(hostColumnVector.isNull(3)); - - ColumnVector expectedOffsetsBeforePurge = ColumnVector.fromInts(0, 3, 6, 9, 9); - ColumnView offsetsCvBeforePurge = input.getListOffsetsView(); - assertColumnsAreEqual(expectedOffsetsBeforePurge, offsetsCvBeforePurge); - ColumnView colWithNonEmptyNulls = new ColumnView(input.type, input.rows, Optional.of(2L), dmb, - input.getDeviceBufferFor(BufferType.OFFSET), input.getChildColumnViews()); - assertEquals(2, colWithNonEmptyNulls.nullCount); - return colWithNonEmptyNulls; + try (ColumnVector input = makeListsColumn(DType.INT32, list0, list1, list2, list3)) { + // Modify the validity buffer + BaseDeviceMemoryBuffer dmb = input.getDeviceBufferFor(BufferType.VALIDITY); + try (HostMemoryBuffer newValidity = HostMemoryBuffer.allocate(64)) { + newValidity.copyFromDeviceBuffer(dmb); + BitVectorHelper.setNullAt(newValidity, 1); + dmb.copyFromHostBuffer(newValidity); + } + try (HostColumnVector hostColumnVector = input.copyToHost()) { + assert (hostColumnVector.isNull(1)); + assert (hostColumnVector.isNull(3)); + } + try (ColumnVector expectedOffsetsBeforePurge = ColumnVector.fromInts(0, 3, 6, 9, 9)) { + ColumnView offsetsCvBeforePurge = input.getListOffsetsView(); + assertColumnsAreEqual(expectedOffsetsBeforePurge, offsetsCvBeforePurge); + } + ColumnView colWithNonEmptyNulls = new ColumnView(input.type, input.rows, Optional.of(2L), dmb, + input.getDeviceBufferFor(BufferType.OFFSET), input.getChildColumnViews()); + assertEquals(2, colWithNonEmptyNulls.nullCount); + return colWithNonEmptyNulls; + } } @Test From a8a11063911c9e699211cece20397c9432f207ac Mon Sep 17 00:00:00 2001 From: Raza Jafri Date: Fri, 24 Feb 2023 18:23:38 -0800 Subject: [PATCH 9/9] Return an Array of ColumnViews so they can be closed by the caller --- .../java/ai/rapids/cudf/ColumnVectorTest.java | 49 ++++++++++--------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 9dac006214f..7848807dab8 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -6696,37 +6696,38 @@ void testApplyBooleanMaskFromListOfStructure() { /** * The caller needs to make sure to close the returned ColumnView */ - private ColumnView getColumnViewWithNonEmptyNulls() { + private ColumnView[] getColumnViewWithNonEmptyNulls() { List list0 = Arrays.asList(1, 2, 3); List list1 = Arrays.asList(4, 5, null); List list2 = Arrays.asList(7, 8, 9); List list3 = null; - try (ColumnVector input = makeListsColumn(DType.INT32, list0, list1, list2, list3)) { - // Modify the validity buffer - BaseDeviceMemoryBuffer dmb = input.getDeviceBufferFor(BufferType.VALIDITY); - try (HostMemoryBuffer newValidity = HostMemoryBuffer.allocate(64)) { - newValidity.copyFromDeviceBuffer(dmb); - BitVectorHelper.setNullAt(newValidity, 1); - dmb.copyFromHostBuffer(newValidity); - } - try (HostColumnVector hostColumnVector = input.copyToHost()) { - assert (hostColumnVector.isNull(1)); - assert (hostColumnVector.isNull(3)); - } - try (ColumnVector expectedOffsetsBeforePurge = ColumnVector.fromInts(0, 3, 6, 9, 9)) { - ColumnView offsetsCvBeforePurge = input.getListOffsetsView(); - assertColumnsAreEqual(expectedOffsetsBeforePurge, offsetsCvBeforePurge); - } - ColumnView colWithNonEmptyNulls = new ColumnView(input.type, input.rows, Optional.of(2L), dmb, - input.getDeviceBufferFor(BufferType.OFFSET), input.getChildColumnViews()); - assertEquals(2, colWithNonEmptyNulls.nullCount); - return colWithNonEmptyNulls; + ColumnVector input = makeListsColumn(DType.INT32, list0, list1, list2, list3); + // Modify the validity buffer + BaseDeviceMemoryBuffer dmb = input.getDeviceBufferFor(BufferType.VALIDITY); + try (HostMemoryBuffer newValidity = HostMemoryBuffer.allocate(64)) { + newValidity.copyFromDeviceBuffer(dmb); + BitVectorHelper.setNullAt(newValidity, 1); + dmb.copyFromHostBuffer(newValidity); + } + try (HostColumnVector hostColumnVector = input.copyToHost()) { + assert (hostColumnVector.isNull(1)); + assert (hostColumnVector.isNull(3)); + } + try (ColumnVector expectedOffsetsBeforePurge = ColumnVector.fromInts(0, 3, 6, 9, 9)) { + ColumnView offsetsCvBeforePurge = input.getListOffsetsView(); + assertColumnsAreEqual(expectedOffsetsBeforePurge, offsetsCvBeforePurge); } + ColumnView colWithNonEmptyNulls = new ColumnView(input.type, input.rows, Optional.of(2L), dmb, + input.getDeviceBufferFor(BufferType.OFFSET), input.getChildColumnViews()); + assertEquals(2, colWithNonEmptyNulls.nullCount); + return new ColumnView[]{input, colWithNonEmptyNulls}; } @Test void testPurgeNonEmptyNullsList() { - try (ColumnView colWithNonEmptyNulls = getColumnViewWithNonEmptyNulls(); + ColumnView[] values = getColumnViewWithNonEmptyNulls(); + try (ColumnView colWithNonEmptyNulls = values[1]; + ColumnView input = values[0]; // purge non-empty nulls ColumnView colWithEmptyNulls = colWithNonEmptyNulls.purgeNonEmptyNulls(); ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6); @@ -6739,7 +6740,9 @@ void testPurgeNonEmptyNullsList() { @Test void testPurgeNonEmptyNullsStruct() { - try (ColumnView listCol = getColumnViewWithNonEmptyNulls(); + ColumnView[] values = getColumnViewWithNonEmptyNulls(); + try (ColumnView listCol = values[1]; + ColumnView input = values[0]; ColumnView stringsCol = ColumnVector.fromStrings("A", "col", "of", "Strings"); ColumnView structView = ColumnView.makeStructView(stringsCol, listCol); ColumnView structWithEmptyNulls = structView.purgeNonEmptyNulls();