Skip to content

Commit

Permalink
JNI support for Collect Ops in Reduction (#10427)
Browse files Browse the repository at this point in the history
Exposes public APIs for collect operations as `ReductionAggregation`, which are essential to spark-rapids.  In addition, this PR also extends the test framework of Reduction to discriminate output types from input types.

Authors:
  - Alfred Xu (https://github.com/sperlingxx)

Approvers:
  - Jason Lowe (https://github.com/jlowe)

URL: #10427
  • Loading branch information
sperlingxx authored Mar 15, 2022
1 parent 228cc79 commit 4596244
Show file tree
Hide file tree
Showing 4 changed files with 411 additions and 139 deletions.
11 changes: 11 additions & 0 deletions java/src/main/java/ai/rapids/cudf/ColumnVector.java
Original file line number Diff line number Diff line change
Expand Up @@ -1206,6 +1206,17 @@ public static ColumnVector emptyStructs(HostColumnVector.DataType dataType, long
}
}

/**
* Create a new vector from the given values.
*/
public static ColumnVector fromBooleans(boolean... values) {
byte[] bytes = new byte[values.length];
for (int i = 0; i < values.length; i++) {
bytes[i] = values[i] ? (byte) 1 : (byte) 0;
}
return build(DType.BOOL8, values.length, (b) -> b.appendArray(bytes));
}

/**
* Create a new vector from the given values.
*/
Expand Down
4 changes: 2 additions & 2 deletions java/src/main/java/ai/rapids/cudf/GroupByAggregation.java
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ public static GroupByAggregation collectList(NullPolicy nullPolicy) {
}

/**
* Collect the values into a set. All null values will be excluded, and all nan values are regarded as
* Collect the values into a set. All null values will be excluded, and all NaN values are regarded as
* unique instances.
*/
public static GroupByAggregation collectSet() {
Expand All @@ -270,7 +270,7 @@ public static GroupByAggregation mergeLists() {
}

/**
* Merge the partial sets produced by multiple CollectSetAggregations. Each null/nan value will be regarded as
* Merge the partial sets produced by multiple CollectSetAggregations. Each null/NaN value will be regarded as
* a unique instance.
*/
public static GroupByAggregation mergeSets() {
Expand Down
65 changes: 64 additions & 1 deletion java/src/main/java/ai/rapids/cudf/ReductionAggregation.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -209,4 +209,67 @@ public static ReductionAggregation nth(int offset) {
public static ReductionAggregation nth(int offset, NullPolicy nullPolicy) {
return new ReductionAggregation(Aggregation.nth(offset, nullPolicy));
}

/**
* Collect the values into a list. Nulls will be skipped.
*/
public static ReductionAggregation collectList() {
return new ReductionAggregation(Aggregation.collectList());
}

/**
* Collect the values into a list.
*
* @param nullPolicy Indicates whether to include/exclude nulls during collection.
*/
public static ReductionAggregation collectList(NullPolicy nullPolicy) {
return new ReductionAggregation(Aggregation.collectList(nullPolicy));
}

/**
* Collect the values into a set. All null values will be excluded, and all NaN values are regarded as
* unique instances.
*/
public static ReductionAggregation collectSet() {
return new ReductionAggregation(Aggregation.collectSet());
}

/**
* Collect the values into a set.
*
* @param nullPolicy Indicates whether to include/exclude nulls during collection.
* @param nullEquality Flag to specify whether null entries within each list should be considered equal.
* @param nanEquality Flag to specify whether NaN values in floating point column should be considered equal.
*/
public static ReductionAggregation collectSet(NullPolicy nullPolicy,
NullEquality nullEquality, NaNEquality nanEquality) {
return new ReductionAggregation(Aggregation.collectSet(nullPolicy, nullEquality, nanEquality));
}

/**
* Merge the partial lists produced by multiple CollectListAggregations.
* NOTICE: The partial lists to be merged should NOT include any null list element (but can include null list entries).
*/
public static ReductionAggregation mergeLists() {
return new ReductionAggregation(Aggregation.mergeLists());
}

/**
* Merge the partial sets produced by multiple CollectSetAggregations. Each null/NaN value will be regarded as
* a unique instance.
*/
public static ReductionAggregation mergeSets() {
return new ReductionAggregation(Aggregation.mergeSets());
}

/**
* Merge the partial sets produced by multiple CollectSetAggregations.
*
* @param nullEquality Flag to specify whether null entries within each list should be considered equal.
* @param nanEquality Flag to specify whether NaN values in floating point column should be considered equal.
*/
public static ReductionAggregation mergeSets(NullEquality nullEquality, NaNEquality nanEquality) {
return new ReductionAggregation(Aggregation.mergeSets(nullEquality, nanEquality));
}

}
Loading

0 comments on commit 4596244

Please sign in to comment.