Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Assert for non-empty nulls #13071

Merged
merged 20 commits into from
Apr 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 22 additions & 6 deletions java/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@
<configuration>
<excludes>
<exclude>**/CudaFatalTest.java</exclude>
<exclude>**/ColumnViewNonEmptyNullsTest.java</exclude>
</excludes>
</configuration>
<executions>
Expand All @@ -201,15 +202,22 @@
<goal>test</goal>
</goals>
</execution>
<execution>
<id>non-empty-null-test</id>
<goals>
<goal>test</goal>
</goals>
<configuration>
<argLine>-da:ai.rapids.cudf.AssertEmptyNulls</argLine>
<test>*/ColumnViewNonEmptyNullsTest.java</test>
</configuration>
</execution>
<execution>
<id>fatal-cuda-test</id>
<goals>
<goal>test</goal>
</goals>
<configuration>
<includes>
<include>**/CudaFatalTest.java</include>
</includes>
<reuseForks>false</reuseForks>
<test>*/CudaFatalTest.java</test>
</configuration>
Expand All @@ -233,6 +241,7 @@
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<excludes>
<exclude>**/ColumnViewNonEmptyNullsTest.java</exclude>
<exclude>**/CuFileTest.java</exclude>
<exclude>**/CudaFatalTest.java</exclude>
</excludes>
Expand All @@ -250,13 +259,20 @@
<goal>test</goal>
</goals>
<configuration>
<includes>
<include>**/CudaFatalTest.java</include>
</includes>
<reuseForks>false</reuseForks>
<test>*/CudaFatalTest.java</test>
</configuration>
</execution>
<execution>
<id>non-empty-null-test</id>
<goals>
<goal>test</goal>
</goals>
<configuration>
<argLine>-da:ai.rapids.cudf.AssertEmptyNulls</argLine>
<test>*/ColumnViewNonEmptyNullsTest.java</test>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
Expand Down
36 changes: 36 additions & 0 deletions java/src/main/java/ai/rapids/cudf/AssertEmptyNulls.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

package ai.rapids.cudf;

/**
* This class is a Helper class to assert there are no non-empty nulls in a ColumnView
*
* The reason for the existence of this class is so that we can turn the asserts on/off when needed
* by passing "-da:ai.rapids.cudf.AssertEmptyNulls". We need that behavior because we have tests
* that explicitly test with ColumnViews that contain non-empty nulls but more importantly, there
revans2 marked this conversation as resolved.
Show resolved Hide resolved
* could be cases where an external system may not have a requirement of nulls being empty, so for
* us to work with those systems, we can turn off this assert in the field.
*/
public class AssertEmptyNulls {
public static void assertNullsAreEmpty(ColumnView cv) {
revans2 marked this conversation as resolved.
Show resolved Hide resolved
if (cv.type.isNestedType() || cv.type.hasOffsets()) {
assert !cv.hasNonEmptyNulls() : "Column has non-empty nulls";
ttnghia marked this conversation as resolved.
Show resolved Hide resolved
}
}
}
2 changes: 2 additions & 0 deletions java/src/main/java/ai/rapids/cudf/ColumnView.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ public class ColumnView implements AutoCloseable, BinaryOperable {
this.rows = ColumnView.getNativeRowCount(viewHandle);
this.nullCount = ColumnView.getNativeNullCount(viewHandle);
this.offHeap = null;
AssertEmptyNulls.assertNullsAreEmpty(this);
}


Expand All @@ -67,6 +68,7 @@ protected ColumnView(ColumnVector.OffHeapState state) {
type = DType.fromNative(ColumnView.getNativeTypeId(viewHandle), ColumnView.getNativeTypeScale(viewHandle));
rows = ColumnView.getNativeRowCount(viewHandle);
nullCount = ColumnView.getNativeNullCount(viewHandle);
AssertEmptyNulls.assertNullsAreEmpty(this);
}

/**
Expand Down
100 changes: 1 addition & 99 deletions java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -740,43 +740,6 @@ void testSpark32BitMurmur3HashListsAndNestedLists() {
}
}

@Test
void testAndNullReconfigureNulls() {
try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null);
ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null);
ColumnVector intResult = v1.mergeAndSetValidity(BinaryOp.BITWISE_AND, v0);
ColumnVector v2 = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", "3");
ColumnVector stringResult = v2.mergeAndSetValidity(BinaryOp.BITWISE_AND, v0, v1);
ColumnVector stringExpected = ColumnVector.fromStrings("0", "100", null, null, "MIN_VALUE", null);
ColumnVector noMaskResult = v2.mergeAndSetValidity(BinaryOp.BITWISE_AND)) {
assertColumnsAreEqual(v0, intResult);
assertColumnsAreEqual(stringExpected, stringResult);
assertColumnsAreEqual(v2, noMaskResult);
}
}

@Test
void testOrNullReconfigureNulls() {
try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null);
ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null);
ColumnVector v2 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, Integer.MAX_VALUE);
ColumnVector intResultV0 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0);
ColumnVector intResultV0V1 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1);
ColumnVector intResultMulti = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v0, v1, v1, v0, v1, v0);
ColumnVector intResultv0v1v2 = v2.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1, v2);
ColumnVector v3 = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", "3");
ColumnVector stringResult = v3.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1);
ColumnVector stringExpected = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", null);
ColumnVector noMaskResult = v3.mergeAndSetValidity(BinaryOp.BITWISE_OR)) {
assertColumnsAreEqual(v0, intResultV0);
assertColumnsAreEqual(v1, intResultV0V1);
assertColumnsAreEqual(v1, intResultMulti);
assertColumnsAreEqual(v2, intResultv0v1v2);
assertColumnsAreEqual(stringExpected, stringResult);
assertColumnsAreEqual(v3, noMaskResult);
}
}

@Test
void isNotNullTestEmptyColumn() {
try (ColumnVector v = ColumnVector.fromBoxedInts();
Expand Down Expand Up @@ -4635,7 +4598,7 @@ void testDropListDuplicatesWithKeysValuesNullable() {
}

@SafeVarargs
private static <T> ColumnVector makeListsColumn(DType childDType, List<T>... rows) {
public static <T> ColumnVector makeListsColumn(DType childDType, List<T>... rows) {
HostColumnVector.DataType childType = new HostColumnVector.BasicType(true, childDType);
HostColumnVector.DataType listType = new HostColumnVector.ListType(true, childType);
return ColumnVector.fromLists(listType, rows);
Expand Down Expand Up @@ -6713,65 +6676,4 @@ void testApplyBooleanMaskFromListOfStructure() {
assertColumnsAreEqual(expectedCv, actualCv);
}
}

/**
* The caller needs to make sure to close the returned ColumnView
*/
private ColumnView[] getColumnViewWithNonEmptyNulls() {
List<Integer> list0 = Arrays.asList(1, 2, 3);
List<Integer> list1 = Arrays.asList(4, 5, null);
List<Integer> list2 = Arrays.asList(7, 8, 9);
List<Integer> list3 = null;
ColumnVector input = makeListsColumn(DType.INT32, list0, list1, list2, list3);
// Modify the validity buffer
BaseDeviceMemoryBuffer dmb = input.getDeviceBufferFor(BufferType.VALIDITY);
try (HostMemoryBuffer newValidity = HostMemoryBuffer.allocate(64)) {
newValidity.copyFromDeviceBuffer(dmb);
BitVectorHelper.setNullAt(newValidity, 1);
dmb.copyFromHostBuffer(newValidity);
}
try (HostColumnVector hostColumnVector = input.copyToHost()) {
assert (hostColumnVector.isNull(1));
assert (hostColumnVector.isNull(3));
}
try (ColumnVector expectedOffsetsBeforePurge = ColumnVector.fromInts(0, 3, 6, 9, 9)) {
ColumnView offsetsCvBeforePurge = input.getListOffsetsView();
assertColumnsAreEqual(expectedOffsetsBeforePurge, offsetsCvBeforePurge);
}
ColumnView colWithNonEmptyNulls = new ColumnView(input.type, input.rows, Optional.of(2L), dmb,
input.getDeviceBufferFor(BufferType.OFFSET), input.getChildColumnViews());
assertEquals(2, colWithNonEmptyNulls.nullCount);
return new ColumnView[]{input, colWithNonEmptyNulls};
}

@Test
void testPurgeNonEmptyNullsList() {
ColumnView[] values = getColumnViewWithNonEmptyNulls();
try (ColumnView colWithNonEmptyNulls = values[1];
ColumnView input = values[0];
// purge non-empty nulls
ColumnView colWithEmptyNulls = colWithNonEmptyNulls.purgeNonEmptyNulls();
ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6);
ColumnView offsetsCvAfterPurge = colWithEmptyNulls.getListOffsetsView()) {
assertTrue(colWithNonEmptyNulls.hasNonEmptyNulls());
assertColumnsAreEqual(expectedOffsetsAfterPurge, offsetsCvAfterPurge);
assertFalse(colWithEmptyNulls.hasNonEmptyNulls());
}
}

@Test
void testPurgeNonEmptyNullsStruct() {
ColumnView[] values = getColumnViewWithNonEmptyNulls();
try (ColumnView listCol = values[1];
ColumnView input = values[0];
ColumnView stringsCol = ColumnVector.fromStrings("A", "col", "of", "Strings");
ColumnView structView = ColumnView.makeStructView(stringsCol, listCol);
ColumnView structWithEmptyNulls = structView.purgeNonEmptyNulls();
ColumnView newListChild = structWithEmptyNulls.getChildColumnView(1);
ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6);
ColumnView offsetsCvAfterPurge = newListChild.getListOffsetsView()) {
assertColumnsAreEqual(expectedOffsetsAfterPurge, offsetsCvAfterPurge);
assertFalse(newListChild.hasNonEmptyNulls());
}
}
}
134 changes: 134 additions & 0 deletions java/src/test/java/ai/rapids/cudf/ColumnViewNonEmptyNullsTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/*
*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

package ai.rapids.cudf;

import org.junit.jupiter.api.Test;

import java.util.Arrays;
import java.util.List;
import java.util.Optional;

import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;

/**
* This class will house only tests that need to explicitly set non-empty nulls
*/
public class ColumnViewNonEmptyNullsTest extends CudfTestBase {

@Test
void testAndNullReconfigureNulls() {
try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null);
ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null);
ColumnVector intResult = v1.mergeAndSetValidity(BinaryOp.BITWISE_AND, v0);
ColumnVector v2 = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", "3");
ColumnVector stringResult = v2.mergeAndSetValidity(BinaryOp.BITWISE_AND, v0, v1);
ColumnVector stringExpected = ColumnVector.fromStrings("0", "100", null, null, "MIN_VALUE", null);
ColumnVector noMaskResult = v2.mergeAndSetValidity(BinaryOp.BITWISE_AND)) {
assertColumnsAreEqual(v0, intResult);
assertColumnsAreEqual(stringExpected, stringResult);
assertColumnsAreEqual(v2, noMaskResult);
}
}

@Test
void testOrNullReconfigureNulls() {
try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null);
ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null);
ColumnVector v2 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, Integer.MAX_VALUE);
ColumnVector intResultV0 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0);
ColumnVector intResultV0V1 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1);
ColumnVector intResultMulti = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v0, v1, v1, v0, v1, v0);
ColumnVector intResultv0v1v2 = v2.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1, v2);
ColumnVector v3 = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", "3");
ColumnVector stringResult = v3.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1);
ColumnVector stringExpected = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", null);
ColumnVector noMaskResult = v3.mergeAndSetValidity(BinaryOp.BITWISE_OR)) {
assertColumnsAreEqual(v0, intResultV0);
assertColumnsAreEqual(v1, intResultV0V1);
assertColumnsAreEqual(v1, intResultMulti);
assertColumnsAreEqual(v2, intResultv0v1v2);
assertColumnsAreEqual(stringExpected, stringResult);
assertColumnsAreEqual(v3, noMaskResult);
}
}

/**
* The caller needs to make sure to close the returned ColumnView
*/
private ColumnView[] getColumnViewWithNonEmptyNulls() {
List<Integer> list0 = Arrays.asList(1, 2, 3);
List<Integer> list1 = Arrays.asList(4, 5, null);
List<Integer> list2 = Arrays.asList(7, 8, 9);
List<Integer> list3 = null;
ColumnVector input = ColumnVectorTest.makeListsColumn(DType.INT32, list0, list1, list2, list3);
// Modify the validity buffer
BaseDeviceMemoryBuffer dmb = input.getDeviceBufferFor(BufferType.VALIDITY);
try (HostMemoryBuffer newValidity = HostMemoryBuffer.allocate(64)) {
newValidity.copyFromDeviceBuffer(dmb);
BitVectorHelper.setNullAt(newValidity, 1);
dmb.copyFromHostBuffer(newValidity);
}
try (HostColumnVector hostColumnVector = input.copyToHost()) {
assert (hostColumnVector.isNull(1));
assert (hostColumnVector.isNull(3));
}
try (ColumnVector expectedOffsetsBeforePurge = ColumnVector.fromInts(0, 3, 6, 9, 9)) {
ColumnView offsetsCvBeforePurge = input.getListOffsetsView();
assertColumnsAreEqual(expectedOffsetsBeforePurge, offsetsCvBeforePurge);
}
ColumnView colWithNonEmptyNulls = new ColumnView(input.type, input.rows, Optional.of(2L), dmb,
input.getDeviceBufferFor(BufferType.OFFSET), input.getChildColumnViews());
assertEquals(2, colWithNonEmptyNulls.nullCount);
return new ColumnView[]{input, colWithNonEmptyNulls};
}

@Test
void testPurgeNonEmptyNullsList() {
ColumnView[] values = getColumnViewWithNonEmptyNulls();
try (ColumnView colWithNonEmptyNulls = values[1];
ColumnView input = values[0];
// purge non-empty nulls
ColumnView colWithEmptyNulls = colWithNonEmptyNulls.purgeNonEmptyNulls();
ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6);
ColumnView offsetsCvAfterPurge = colWithEmptyNulls.getListOffsetsView()) {
assertTrue(colWithNonEmptyNulls.hasNonEmptyNulls());
assertColumnsAreEqual(expectedOffsetsAfterPurge, offsetsCvAfterPurge);
assertFalse(colWithEmptyNulls.hasNonEmptyNulls());
}
}

@Test
void testPurgeNonEmptyNullsStruct() {
ColumnView[] values = getColumnViewWithNonEmptyNulls();
try (ColumnView listCol = values[1];
ColumnView input = values[0];
ColumnView stringsCol = ColumnVector.fromStrings("A", "col", "of", "Strings");
ColumnView structView = ColumnView.makeStructView(stringsCol, listCol);
ColumnView structWithEmptyNulls = structView.purgeNonEmptyNulls();
ColumnView newListChild = structWithEmptyNulls.getChildColumnView(1);
ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6);
ColumnView offsetsCvAfterPurge = newListChild.getListOffsetsView()) {
assertColumnsAreEqual(expectedOffsetsAfterPurge, offsetsCvAfterPurge);
assertFalse(newListChild.hasNonEmptyNulls());
}
}
}