Skip to content

Commit

Permalink
Add JNI support for drop_list_duplicates (#9198)
Browse files Browse the repository at this point in the history
This adds in support for drop list duplicates in JNI

Authors:
  - Robert (Bobby) Evans (https://github.com/revans2)

Approvers:
  - Jason Lowe (https://github.com/jlowe)

URL: #9198
  • Loading branch information
revans2 authored Sep 9, 2021
1 parent 473063f commit 794734c
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 0 deletions.
6 changes: 6 additions & 0 deletions java/src/main/java/ai/rapids/cudf/ColumnView.java
Original file line number Diff line number Diff line change
Expand Up @@ -2160,6 +2160,10 @@ public final ColumnVector extractListElement(int index) {
return new ColumnVector(extractListElement(getNativeView(), index));
}

public final ColumnVector dropListDuplicates() {
return new ColumnVector(dropListDuplicates(getNativeView()));
}

/////////////////////////////////////////////////////////////////////////////
// STRINGS
/////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -3489,6 +3493,8 @@ private static native long stringReplaceWithBackrefs(long columnView, String pat

private static native long extractListElement(long nativeView, int index);

private static native long dropListDuplicates(long nativeView);

/**
* Native method for list lookup
* @param nativeView the column view handle of the list
Expand Down
15 changes: 15 additions & 0 deletions java/src/main/native/src/ColumnViewJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <cudf/lists/contains.hpp>
#include <cudf/lists/count_elements.hpp>
#include <cudf/lists/detail/concatenate.hpp>
#include <cudf/lists/drop_list_duplicates.hpp>
#include <cudf/lists/extract.hpp>
#include <cudf/lists/lists_column_view.hpp>
#include <cudf/lists/sorting.hpp>
Expand Down Expand Up @@ -395,6 +396,20 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_extractListElement(JNIEnv
CATCH_STD(env, 0);
}

JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_dropListDuplicates(JNIEnv *env, jclass,
jlong column_view) {
JNI_NULL_CHECK(env, column_view, "column is null", 0);
try {
cudf::jni::auto_set_device(env);
cudf::column_view const *cv = reinterpret_cast<cudf::column_view const *>(column_view);
cudf::lists_column_view lcv(*cv);

std::unique_ptr<cudf::column> ret = cudf::lists::drop_list_duplicates(lcv);
return reinterpret_cast<jlong>(ret.release());
}
CATCH_STD(env, 0);
}

JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_listContains(JNIEnv *env, jclass,
jlong column_view,
jlong lookup_key) {
Expand Down
22 changes: 22 additions & 0 deletions java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -4193,6 +4193,28 @@ void testExtractListElements() {
}
}

@Test
void testDropListDuplicates() {
List<Integer> list1 = Arrays.asList(1, 2);
List<Integer> list2 = Arrays.asList(3, 4, 5);
List<Integer> list3 = Arrays.asList(null, 0, 6, 6, 0);
List<Integer> dedupeList3 = Arrays.asList(0, 6, null);
List<Integer> list4 = Arrays.asList(null, 6, 7, null, 7);
List<Integer> dedupeList4 = Arrays.asList(6, 7, null);
List<Integer> list5 = null;

HostColumnVector.DataType listType = new HostColumnVector.ListType(true,
new HostColumnVector.BasicType(true, DType.INT32));
try (ColumnVector v = ColumnVector.fromLists(listType, list1, list2, list3, list4, list5);
ColumnVector expected = ColumnVector.fromLists(listType, list1, list2, dedupeList3, dedupeList4, list5);
ColumnVector tmp = v.dropListDuplicates();
// Note dropping duplicates does not have any ordering guarantee, so sort to make it all
// consistent
ColumnVector result = tmp.listSortRows(false, false)) {
assertColumnsAreEqual(expected, result);
}
}

@Test
void testListContainsString() {
List<String> list1 = Arrays.asList("Héllo there", "thésé");
Expand Down

0 comments on commit 794734c

Please sign in to comment.