Skip to content

Commit

Permalink
Java bindings for mixed left, inner, and full joins (#9941)
Browse files Browse the repository at this point in the history
Depends on #9917.  Adds Java bindings for the libcudf mixed join APIs.  A new MixedJoinSize class was added to track the size information returned for mixed joins.

Authors:
  - Jason Lowe (https://github.com/jlowe)

Approvers:
  - Robert (Bobby) Evans (https://github.com/revans2)

URL: #9941
  • Loading branch information
jlowe authored Jan 19, 2022
1 parent f193d59 commit e49084e
Show file tree
Hide file tree
Showing 4 changed files with 788 additions and 1 deletion.
43 changes: 43 additions & 0 deletions java/src/main/java/ai/rapids/cudf/MixedJoinSize.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package ai.rapids.cudf;

/** This class tracks size information associated with a mixed table join. */
public final class MixedJoinSize implements AutoCloseable {
private final long outputRowCount;
// This is in flux, avoid exposing publicly until the dust settles.
private ColumnVector matches;

MixedJoinSize(long outputRowCount, ColumnVector matches) {
this.outputRowCount = outputRowCount;
this.matches = matches;
}

/** Return the number of output rows that would be generated from the mixed join */
public long getOutputRowCount() {
return outputRowCount;
}

ColumnVector getMatches() {
return matches;
}

@Override
public synchronized void close() {
matches.close();
}
}
235 changes: 234 additions & 1 deletion java/src/main/java/ai/rapids/cudf/Table.java
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,36 @@ private static native long[] conditionalLeftAntiJoinGatherMapWithCount(long left
long condition,
long rowCount) throws CudfException;

private static native long[] mixedLeftJoinSize(long leftKeysTable, long rightKeysTable,
long leftConditionTable, long rightConditionTable,
long condition, boolean compareNullsEqual);

private static native long[] mixedLeftJoinGatherMaps(long leftKeysTable, long rightKeysTable,
long leftConditionTable, long rightConditionTable,
long condition, boolean compareNullsEqual);

private static native long[] mixedLeftJoinGatherMapsWithSize(long leftKeysTable, long rightKeysTable,
long leftConditionTable, long rightConditionTable,
long condition, boolean compareNullsEqual,
long outputRowCount, long matchesColumnView);

private static native long[] mixedInnerJoinSize(long leftKeysTable, long rightKeysTable,
long leftConditionTable, long rightConditionTable,
long condition, boolean compareNullsEqual);

private static native long[] mixedInnerJoinGatherMaps(long leftKeysTable, long rightKeysTable,
long leftConditionTable, long rightConditionTable,
long condition, boolean compareNullsEqual);

private static native long[] mixedInnerJoinGatherMapsWithSize(long leftKeysTable, long rightKeysTable,
long leftConditionTable, long rightConditionTable,
long condition, boolean compareNullsEqual,
long outputRowCount, long matchesColumnView);

private static native long[] mixedFullJoinGatherMaps(long leftKeysTable, long rightKeysTable,
long leftConditionTable, long rightConditionTable,
long condition, boolean compareNullsEqual);

private static native long[] crossJoin(long leftTable, long rightTable) throws CudfException;

private static native long[] concatenate(long[] cudfTablePointers) throws CudfException;
Expand Down Expand Up @@ -2221,7 +2251,7 @@ public static Table scatter(Scalar[] source, ColumnView scatterMap, Table target
target.getNativeView(), checkBounds));
}

private GatherMap[] buildJoinGatherMaps(long[] gatherMapData) {
private static GatherMap[] buildJoinGatherMaps(long[] gatherMapData) {
long bufferSize = gatherMapData[0];
long leftAddr = gatherMapData[1];
long leftHandle = gatherMapData[2];
Expand Down Expand Up @@ -2374,6 +2404,94 @@ public GatherMap[] conditionalLeftJoinGatherMaps(Table rightTable,
return buildJoinGatherMaps(gatherMapData);
}

/**
* Computes output size information for a left join between two tables using a mix of equality
* and inequality conditions. The entire join condition is assumed to be a logical AND of the
* equality condition and inequality condition.
* NOTE: It is the responsibility of the caller to close the resulting size information object
* or native resources can be leaked!
* @param leftKeys the left table's key columns for the equality condition
* @param rightKeys the right table's key columns for the equality condition
* @param leftConditional the left table's columns needed to evaluate the inequality condition
* @param rightConditional the right table's columns needed to evaluate the inequality condition
* @param condition the inequality condition of the join
* @param nullEquality whether nulls should compare as equal
* @return size information for the join
*/
public static MixedJoinSize mixedLeftJoinSize(Table leftKeys, Table rightKeys,
Table leftConditional, Table rightConditional,
CompiledExpression condition,
NullEquality nullEquality) {
long[] mixedSizeInfo = mixedLeftJoinSize(
leftKeys.getNativeView(), rightKeys.getNativeView(),
leftConditional.getNativeView(), rightConditional.getNativeView(),
condition.getNativeHandle(), nullEquality == NullEquality.EQUAL);
assert mixedSizeInfo.length == 2;
long outputRowCount = mixedSizeInfo[0];
long matchesColumnHandle = mixedSizeInfo[1];
return new MixedJoinSize(outputRowCount, new ColumnVector(matchesColumnHandle));
}

/**
* Computes the gather maps that can be used to manifest the result of a left join between
* two tables using a mix of equality and inequality conditions. The entire join condition is
* assumed to be a logical AND of the equality condition and inequality condition.
* Two {@link GatherMap} instances will be returned that can be used to gather
* the left and right tables, respectively, to produce the result of the left join.
* It is the responsibility of the caller to close the resulting gather map instances.
* @param leftKeys the left table's key columns for the equality condition
* @param rightKeys the right table's key columns for the equality condition
* @param leftConditional the left table's columns needed to evaluate the inequality condition
* @param rightConditional the right table's columns needed to evaluate the inequality condition
* @param condition the inequality condition of the join
* @param nullEquality whether nulls should compare as equal
* @return left and right table gather maps
*/
public static GatherMap[] mixedLeftJoinGatherMaps(Table leftKeys, Table rightKeys,
Table leftConditional, Table rightConditional,
CompiledExpression condition,
NullEquality nullEquality) {
long[] gatherMapData = mixedLeftJoinGatherMaps(
leftKeys.getNativeView(), rightKeys.getNativeView(),
leftConditional.getNativeView(), rightConditional.getNativeView(),
condition.getNativeHandle(),
nullEquality == NullEquality.EQUAL);
return buildJoinGatherMaps(gatherMapData);
}

/**
* Computes the gather maps that can be used to manifest the result of a left join between
* two tables using a mix of equality and inequality conditions. The entire join condition is
* assumed to be a logical AND of the equality condition and inequality condition.
* Two {@link GatherMap} instances will be returned that can be used to gather
* the left and right tables, respectively, to produce the result of the left join.
* It is the responsibility of the caller to close the resulting gather map instances.
* This interface allows passing the size result from
* {@link #mixedLeftJoinSize(Table, Table, Table, Table, CompiledExpression, NullEquality)}
* when the output size was computed previously.
* @param leftKeys the left table's key columns for the equality condition
* @param rightKeys the right table's key columns for the equality condition
* @param leftConditional the left table's columns needed to evaluate the inequality condition
* @param rightConditional the right table's columns needed to evaluate the inequality condition
* @param condition the inequality condition of the join
* @param nullEquality whether nulls should compare as equal
* @param joinSize mixed join size result
* @return left and right table gather maps
*/
public static GatherMap[] mixedLeftJoinGatherMaps(Table leftKeys, Table rightKeys,
Table leftConditional, Table rightConditional,
CompiledExpression condition,
NullEquality nullEquality,
MixedJoinSize joinSize) {
long[] gatherMapData = mixedLeftJoinGatherMapsWithSize(
leftKeys.getNativeView(), rightKeys.getNativeView(),
leftConditional.getNativeView(), rightConditional.getNativeView(),
condition.getNativeHandle(),
nullEquality == NullEquality.EQUAL,
joinSize.getOutputRowCount(), joinSize.getMatches().getNativeView());
return buildJoinGatherMaps(gatherMapData);
}

/**
* Computes the gather maps that can be used to manifest the result of an inner equi-join between
* two tables. It is assumed this table instance holds the key columns from the left table, and
Expand Down Expand Up @@ -2514,6 +2632,94 @@ public GatherMap[] conditionalInnerJoinGatherMaps(Table rightTable,
return buildJoinGatherMaps(gatherMapData);
}

/**
* Computes output size information for an inner join between two tables using a mix of equality
* and inequality conditions. The entire join condition is assumed to be a logical AND of the
* equality condition and inequality condition.
* NOTE: It is the responsibility of the caller to close the resulting size information object
* or native resources can be leaked!
* @param leftKeys the left table's key columns for the equality condition
* @param rightKeys the right table's key columns for the equality condition
* @param leftConditional the left table's columns needed to evaluate the inequality condition
* @param rightConditional the right table's columns needed to evaluate the inequality condition
* @param condition the inequality condition of the join
* @param nullEquality whether nulls should compare as equal
* @return size information for the join
*/
public static MixedJoinSize mixedInnerJoinSize(Table leftKeys, Table rightKeys,
Table leftConditional, Table rightConditional,
CompiledExpression condition,
NullEquality nullEquality) {
long[] mixedSizeInfo = mixedInnerJoinSize(
leftKeys.getNativeView(), rightKeys.getNativeView(),
leftConditional.getNativeView(), rightConditional.getNativeView(),
condition.getNativeHandle(), nullEquality == NullEquality.EQUAL);
assert mixedSizeInfo.length == 2;
long outputRowCount = mixedSizeInfo[0];
long matchesColumnHandle = mixedSizeInfo[1];
return new MixedJoinSize(outputRowCount, new ColumnVector(matchesColumnHandle));
}

/**
* Computes the gather maps that can be used to manifest the result of an inner join between
* two tables using a mix of equality and inequality conditions. The entire join condition is
* assumed to be a logical AND of the equality condition and inequality condition.
* Two {@link GatherMap} instances will be returned that can be used to gather
* the left and right tables, respectively, to produce the result of the inner join.
* It is the responsibility of the caller to close the resulting gather map instances.
* @param leftKeys the left table's key columns for the equality condition
* @param rightKeys the right table's key columns for the equality condition
* @param leftConditional the left table's columns needed to evaluate the inequality condition
* @param rightConditional the right table's columns needed to evaluate the inequality condition
* @param condition the inequality condition of the join
* @param nullEquality whether nulls should compare as equal
* @return left and right table gather maps
*/
public static GatherMap[] mixedInnerJoinGatherMaps(Table leftKeys, Table rightKeys,
Table leftConditional, Table rightConditional,
CompiledExpression condition,
NullEquality nullEquality) {
long[] gatherMapData = mixedInnerJoinGatherMaps(
leftKeys.getNativeView(), rightKeys.getNativeView(),
leftConditional.getNativeView(), rightConditional.getNativeView(),
condition.getNativeHandle(),
nullEquality == NullEquality.EQUAL);
return buildJoinGatherMaps(gatherMapData);
}

/**
* Computes the gather maps that can be used to manifest the result of an inner join between
* two tables using a mix of equality and inequality conditions. The entire join condition is
* assumed to be a logical AND of the equality condition and inequality condition.
* Two {@link GatherMap} instances will be returned that can be used to gather
* the left and right tables, respectively, to produce the result of the inner join.
* It is the responsibility of the caller to close the resulting gather map instances.
* This interface allows passing the size result from
* {@link #mixedInnerJoinSize(Table, Table, Table, Table, CompiledExpression, NullEquality)}
* when the output size was computed previously.
* @param leftKeys the left table's key columns for the equality condition
* @param rightKeys the right table's key columns for the equality condition
* @param leftConditional the left table's columns needed to evaluate the inequality condition
* @param rightConditional the right table's columns needed to evaluate the inequality condition
* @param condition the inequality condition of the join
* @param nullEquality whether nulls should compare as equal
* @param joinSize mixed join size result
* @return left and right table gather maps
*/
public static GatherMap[] mixedInnerJoinGatherMaps(Table leftKeys, Table rightKeys,
Table leftConditional, Table rightConditional,
CompiledExpression condition,
NullEquality nullEquality,
MixedJoinSize joinSize) {
long[] gatherMapData = mixedInnerJoinGatherMapsWithSize(
leftKeys.getNativeView(), rightKeys.getNativeView(),
leftConditional.getNativeView(), rightConditional.getNativeView(),
condition.getNativeHandle(),
nullEquality == NullEquality.EQUAL,
joinSize.getOutputRowCount(), joinSize.getMatches().getNativeView());
return buildJoinGatherMaps(gatherMapData);
}

/**
* Computes the gather maps that can be used to manifest the result of an full equi-join between
* two tables. It is assumed this table instance holds the key columns from the left table, and
Expand Down Expand Up @@ -2620,6 +2826,33 @@ public GatherMap[] conditionalFullJoinGatherMaps(Table rightTable,
return buildJoinGatherMaps(gatherMapData);
}

/**
* Computes the gather maps that can be used to manifest the result of a full join between
* two tables using a mix of equality and inequality conditions. The entire join condition is
* assumed to be a logical AND of the equality condition and inequality condition.
* Two {@link GatherMap} instances will be returned that can be used to gather
* the left and right tables, respectively, to produce the result of the full join.
* It is the responsibility of the caller to close the resulting gather map instances.
* @param leftKeys the left table's key columns for the equality condition
* @param rightKeys the right table's key columns for the equality condition
* @param leftConditional the left table's columns needed to evaluate the inequality condition
* @param rightConditional the right table's columns needed to evaluate the inequality condition
* @param condition the inequality condition of the join
* @param nullEquality whether nulls should compare as equal
* @return left and right table gather maps
*/
public static GatherMap[] mixedFullJoinGatherMaps(Table leftKeys, Table rightKeys,
Table leftConditional, Table rightConditional,
CompiledExpression condition,
NullEquality nullEquality) {
long[] gatherMapData = mixedFullJoinGatherMaps(
leftKeys.getNativeView(), rightKeys.getNativeView(),
leftConditional.getNativeView(), rightConditional.getNativeView(),
condition.getNativeHandle(),
nullEquality == NullEquality.EQUAL);
return buildJoinGatherMaps(gatherMapData);
}

private GatherMap buildSemiJoinGatherMap(long[] gatherMapData) {
long bufferSize = gatherMapData[0];
long leftAddr = gatherMapData[1];
Expand Down
Loading

0 comments on commit e49084e

Please sign in to comment.