Skip to content

Commit

Permalink
Merge branch 'branch-21.10' into bug-struct-scalar-copy-ctor
Browse files Browse the repository at this point in the history
  • Loading branch information
davidwendt committed Jul 30, 2021
2 parents fbfbe49 + 01b647a commit 3b5f477
Show file tree
Hide file tree
Showing 12 changed files with 632 additions and 79 deletions.
6 changes: 3 additions & 3 deletions ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ conda config --show-sources
conda list --show-channel-urls

gpuci_logger "Install dependencies"
gpuci_conda_retry install -y \
gpuci_mamba_retry install -y \
"cudatoolkit=$CUDA_REL" \
"rapids-build-env=$MINOR_VERSION.*" \
"rapids-notebook-env=$MINOR_VERSION.*" \
Expand All @@ -83,8 +83,8 @@ gpuci_conda_retry install -y \
"ucx-py=0.21.*"

# https://docs.rapids.ai/maintainers/depmgmt/
# gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env
# gpuci_conda_retry install -y "your-pkg=1.0.0"
# gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env
# gpuci_mamba_retry install -y "your-pkg=1.0.0"


gpuci_logger "Check compiler versions"
Expand Down
4 changes: 3 additions & 1 deletion cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,9 @@ ConfigureTest(STRINGS_TEST

###################################################################################################
# - structs test ----------------------------------------------------------------------------------
ConfigureTest(STRUCTS_TEST structs/structs_column_tests.cu)
ConfigureTest(STRUCTS_TEST
structs/structs_column_tests.cpp
)

###################################################################################################
# - nvtext test -----------------------------------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,10 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestStructsContainingLists)
auto struct_col =
cudf::test::structs_column_wrapper{{names_col, lists_col}, {1, 1, 1, 1, 0, 0}}.release();

EXPECT_EQ(struct_col->size(), num_rows);
EXPECT_EQ(struct_col->view().child(0).size(), num_rows);
EXPECT_EQ(struct_col->view().child(1).size(), num_rows);

// Check that the last two rows are null for all members.

// For `Name` member, indices 4 and 5 are null.
Expand All @@ -200,15 +204,9 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestStructsContainingLists)
return i < 4;
})}.release();

cudf::test::expect_columns_equivalent(struct_col->view().child(0), expected_names_col->view());
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(struct_col->view().child(0), expected_names_col->view());

// For the `List` member, indices 4, 5 should be null.
// FIXME: The way list columns are currently compared is not ideal for testing
// structs' list members. Rather than comparing for equivalence,
// column_comparator_impl<list_view> currently checks that list's data (child)
// and offsets match perfectly.
// This causes two "equivalent lists" to compare unequal, if the data columns
// have different values at an index where the value is null.
auto expected_last_two_lists_col = cudf::test::lists_column_wrapper<TypeParam, int32_t>{
{
{1, 2, 3},
Expand All @@ -218,14 +216,11 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestStructsContainingLists)
{7, 8}, // Null.
{9} // Null.
},
cudf::detail::make_counting_transform_iterator(0, [](auto i) {
return i == 0;
})}.release();
cudf::detail::make_counting_transform_iterator(
0, [](auto i) { return i < 4; })}.release();

// FIXME: Uncomment after list comparison is fixed.
// cudf::test::expect_columns_equivalent(
// struct_col->view().child(1),
// expected_last_two_lists_col->view());
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(struct_col->view().child(1),
expected_last_two_lists_col->view());
}

TYPED_TEST(TypedStructColumnWrapperTest, StructOfStructs)
Expand Down Expand Up @@ -255,6 +250,10 @@ TYPED_TEST(TypedStructColumnWrapperTest, StructOfStructs)
auto struct_2 =
cudf::test::structs_column_wrapper{{is_human_col, struct_1}, {0, 1, 1, 1, 1, 1}}.release();

EXPECT_EQ(struct_2->size(), num_rows);
EXPECT_EQ(struct_2->view().child(0).size(), num_rows);
EXPECT_EQ(struct_2->view().child(1).size(), num_rows);

// Verify that the child/grandchild columns are as expected.
auto expected_names_col =
cudf::test::strings_column_wrapper(
Expand Down Expand Up @@ -327,6 +326,10 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestNullMaskPropagationForNonNullStruct
}
.release();

EXPECT_EQ(struct_2->size(), num_rows);
EXPECT_EQ(struct_2->view().child(0).size(), num_rows);
EXPECT_EQ(struct_2->view().child(1).size(), num_rows);

// Verify that the child/grandchild columns are as expected.

// Top-struct has 1 null (at index 0).
Expand Down Expand Up @@ -387,9 +390,9 @@ TYPED_TEST(TypedStructColumnWrapperTest, StructsWithMembersWithDifferentRowCount

TYPED_TEST(TypedStructColumnWrapperTest, TestListsOfStructs)
{
// Test structs with two members:
// Test list containing structs with two members
// 1. Name: String
// 2. List: List<TypeParam>
// 2. Age: TypeParam

std::initializer_list<std::string> names = {"Samuel Vimes",
"Carrot Ironfoundersson",
Expand All @@ -398,7 +401,7 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestListsOfStructs)
"Detritus",
"Mr Slant"};

auto num_rows{std::distance(names.begin(), names.end())};
auto num_struct_rows{std::distance(names.begin(), names.end())};

// `Name` column has all valid values.
auto names_col = cudf::test::strings_column_wrapper{names.begin(), names.end()};
Expand All @@ -410,6 +413,9 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestListsOfStructs)
auto struct_col =
cudf::test::structs_column_wrapper({names_col, ages_col}, {1, 1, 1, 0, 0, 1}).release();

EXPECT_EQ(struct_col->size(), num_struct_rows);
EXPECT_EQ(struct_col->view().child(0).size(), num_struct_rows);

auto expected_unchanged_struct_col = cudf::column(*struct_col);

auto list_offsets_column =
Expand Down
141 changes: 141 additions & 0 deletions java/src/main/java/ai/rapids/cudf/Table.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import ai.rapids.cudf.HostColumnVector.ListType;
import ai.rapids.cudf.HostColumnVector.StructData;
import ai.rapids.cudf.HostColumnVector.StructType;
import ai.rapids.cudf.ast.CompiledExpression;

import java.io.File;
import java.math.BigDecimal;
Expand Down Expand Up @@ -523,6 +524,26 @@ private static native long[] leftAntiJoin(long leftTable, int[] leftJoinCols, lo
private static native long[] leftAntiJoinGatherMap(long leftKeys, long rightKeys,
boolean compareNullsEqual) throws CudfException;

private static native long[] conditionalLeftJoinGatherMaps(long leftTable, long rightTable,
long condition,
boolean compareNullsEqual) throws CudfException;

private static native long[] conditionalInnerJoinGatherMaps(long leftTable, long rightTable,
long condition,
boolean compareNullsEqual) throws CudfException;

private static native long[] conditionalFullJoinGatherMaps(long leftTable, long rightTable,
long condition,
boolean compareNullsEqual) throws CudfException;

private static native long[] conditionalLeftSemiJoinGatherMap(long leftTable, long rightTable,
long condition,
boolean compareNullsEqual) throws CudfException;

private static native long[] conditionalLeftAntiJoinGatherMap(long leftTable, long rightTable,
long condition,
boolean compareNullsEqual) throws CudfException;

private static native long[] crossJoin(long leftTable, long rightTable) throws CudfException;

private static native long[] concatenate(long[] cudfTablePointers) throws CudfException;
Expand Down Expand Up @@ -1969,6 +1990,30 @@ public GatherMap[] leftJoinGatherMaps(Table rightKeys, boolean compareNullsEqual
return buildJoinGatherMaps(gatherMapData);
}

/**
* Computes the gather maps that can be used to manifest the result of a left join between
* two tables when a conditional expression is true. It is assumed this table instance holds
* the columns from the left table, and the table argument represents the columns from the
* right table. Two {@link GatherMap} instances will be returned that can be used to gather
* the left and right tables, respectively, to produce the result of the left join.
* It is the responsibility of the caller to close the resulting gather map instances.
* @param rightTable the right side table of the join in the join
* @param condition conditional expression to evaluate during the join
* @param compareNullsEqual true if null key values should match otherwise false
* @return left and right table gather maps
*/
public GatherMap[] leftJoinGatherMaps(Table rightTable, CompiledExpression condition,
boolean compareNullsEqual) {
if (getNumberOfColumns() != rightTable.getNumberOfColumns()) {
throw new IllegalArgumentException("column count mismatch, this: " + getNumberOfColumns() +
"rightKeys: " + rightTable.getNumberOfColumns());
}
long[] gatherMapData =
conditionalLeftJoinGatherMaps(getNativeView(), rightTable.getNativeView(),
condition.getNativeHandle(), compareNullsEqual);
return buildJoinGatherMaps(gatherMapData);
}

/**
* Computes the gather maps that can be used to manifest the result of an inner equi-join between
* two tables. It is assumed this table instance holds the key columns from the left table, and
Expand All @@ -1990,6 +2035,30 @@ public GatherMap[] innerJoinGatherMaps(Table rightKeys, boolean compareNullsEqua
return buildJoinGatherMaps(gatherMapData);
}

/**
* Computes the gather maps that can be used to manifest the result of an inner join between
* two tables when a conditional expression is true. It is assumed this table instance holds
* the columns from the left table, and the table argument represents the columns from the
* right table. Two {@link GatherMap} instances will be returned that can be used to gather
* the left and right tables, respectively, to produce the result of the inner join.
* It is the responsibility of the caller to close the resulting gather map instances.
* @param rightTable the right side table of the join
* @param condition conditional expression to evaluate during the join
* @param compareNullsEqual true if null key values should match otherwise false
* @return left and right table gather maps
*/
public GatherMap[] innerJoinGatherMaps(Table rightTable, CompiledExpression condition,
boolean compareNullsEqual) {
if (getNumberOfColumns() != rightTable.getNumberOfColumns()) {
throw new IllegalArgumentException("column count mismatch, this: " + getNumberOfColumns() +
"rightKeys: " + rightTable.getNumberOfColumns());
}
long[] gatherMapData =
conditionalInnerJoinGatherMaps(getNativeView(), rightTable.getNativeView(),
condition.getNativeHandle(), compareNullsEqual);
return buildJoinGatherMaps(gatherMapData);
}

/**
* Computes the gather maps that can be used to manifest the result of an full equi-join between
* two tables. It is assumed this table instance holds the key columns from the left table, and
Expand All @@ -2011,6 +2080,30 @@ public GatherMap[] fullJoinGatherMaps(Table rightKeys, boolean compareNullsEqual
return buildJoinGatherMaps(gatherMapData);
}

/**
* Computes the gather maps that can be used to manifest the result of a full join between
* two tables when a conditional expression is true. It is assumed this table instance holds
* the columns from the left table, and the table argument represents the columns from the
* right table. Two {@link GatherMap} instances will be returned that can be used to gather
* the left and right tables, respectively, to produce the result of the full join.
* It is the responsibility of the caller to close the resulting gather map instances.
* @param rightTable the right side table of the join
* @param condition conditional expression to evaluate during the join
* @param compareNullsEqual true if null key values should match otherwise false
* @return left and right table gather maps
*/
public GatherMap[] fullJoinGatherMaps(Table rightTable, CompiledExpression condition,
boolean compareNullsEqual) {
if (getNumberOfColumns() != rightTable.getNumberOfColumns()) {
throw new IllegalArgumentException("column count mismatch, this: " + getNumberOfColumns() +
"rightKeys: " + rightTable.getNumberOfColumns());
}
long[] gatherMapData =
conditionalFullJoinGatherMaps(getNativeView(), rightTable.getNativeView(),
condition.getNativeHandle(), compareNullsEqual);
return buildJoinGatherMaps(gatherMapData);
}

private GatherMap buildSemiJoinGatherMap(long[] gatherMapData) {
long bufferSize = gatherMapData[0];
long leftAddr = gatherMapData[1];
Expand Down Expand Up @@ -2039,6 +2132,30 @@ public GatherMap leftSemiJoinGatherMap(Table rightKeys, boolean compareNullsEqua
return buildSemiJoinGatherMap(gatherMapData);
}

/**
* Computes the gather map that can be used to manifest the result of a left semi join between
* two tables when a conditional expression is true. It is assumed this table instance holds
* the columns from the left table, and the table argument represents the columns from the
* right table. The {@link GatherMap} instance returned can be used to gather the left table
* to produce the result of the left semi join.
* It is the responsibility of the caller to close the resulting gather map instance.
* @param rightTable the right side table of the join
* @param condition conditional expression to evaluate during the join
* @param compareNullsEqual true if null key values should match otherwise false
* @return left table gather map
*/
public GatherMap leftSemiJoinGatherMap(Table rightTable, CompiledExpression condition,
boolean compareNullsEqual) {
if (getNumberOfColumns() != rightTable.getNumberOfColumns()) {
throw new IllegalArgumentException("column count mismatch, this: " + getNumberOfColumns() +
"rightKeys: " + rightTable.getNumberOfColumns());
}
long[] gatherMapData =
conditionalLeftSemiJoinGatherMap(getNativeView(), rightTable.getNativeView(),
condition.getNativeHandle(), compareNullsEqual);
return buildSemiJoinGatherMap(gatherMapData);
}

/**
* Computes the gather map that can be used to manifest the result of a left anti-join between
* two tables. It is assumed this table instance holds the key columns from the left table, and
Expand All @@ -2060,6 +2177,30 @@ public GatherMap leftAntiJoinGatherMap(Table rightKeys, boolean compareNullsEqua
return buildSemiJoinGatherMap(gatherMapData);
}

/**
* Computes the gather map that can be used to manifest the result of a left anti join between
* two tables when a conditional expression is true. It is assumed this table instance holds
* the columns from the left table, and the table argument represents the columns from the
* right table. The {@link GatherMap} instance returned can be used to gather the left table
* to produce the result of the left anti join.
* It is the responsibility of the caller to close the resulting gather map instance.
* @param rightTable the right side table of the join
* @param condition conditional expression to evaluate during the join
* @param compareNullsEqual true if null key values should match otherwise false
* @return left table gather map
*/
public GatherMap leftAntiJoinGatherMap(Table rightTable, CompiledExpression condition,
boolean compareNullsEqual) {
if (getNumberOfColumns() != rightTable.getNumberOfColumns()) {
throw new IllegalArgumentException("column count mismatch, this: " + getNumberOfColumns() +
"rightKeys: " + rightTable.getNumberOfColumns());
}
long[] gatherMapData =
conditionalLeftAntiJoinGatherMap(getNativeView(), rightTable.getNativeView(),
condition.getNativeHandle(), compareNullsEqual);
return buildSemiJoinGatherMap(gatherMapData);
}

/**
* Convert this table of columns into a row major format that is useful for interacting with other
* systems that do row major processing of the data. Currently only fixed-width column types are
Expand Down
5 changes: 5 additions & 0 deletions java/src/main/java/ai/rapids/cudf/ast/CompiledExpression.java
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ public synchronized void close() {
isClosed = true;
}

/** Returns the native address of a compiled expression. Intended for internal cudf use only. */
public long getNativeHandle() {
return cleaner.nativeHandle;
}

private static native long compile(byte[] serializedExpression);
private static native long computeColumn(long astHandle, long tableHandle);
private static native void destroy(long handle);
Expand Down
54 changes: 1 addition & 53 deletions java/src/main/native/src/CompiledExpression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,59 +26,7 @@
#include <cudf/types.hpp>

#include "cudf_jni_apis.hpp"

namespace cudf {
namespace jni {
namespace ast {

/**
* A class to capture all of the resources associated with a compiled AST expression.
* AST nodes do not own their child nodes, so every node in the expression tree
* must be explicitly tracked in order to free the underlying resources for each node.
*
* This should be cleaned up a bit after the libcudf AST refactoring in
* https://github.com/rapidsai/cudf/pull/8815 when a virtual destructor is added to the
* base AST node type. Then we do not have to track every AST node type separately.
*/
class compiled_expr {
/** All literal nodes within the expression tree */
std::vector<std::unique_ptr<cudf::ast::literal>> literals;

/** All column reference nodes within the expression tree */
std::vector<std::unique_ptr<cudf::ast::column_reference>> column_refs;

/** All expression nodes within the expression tree */
std::vector<std::unique_ptr<cudf::ast::expression>> expressions;

/** GPU scalar instances that correspond to literal nodes */
std::vector<std::unique_ptr<cudf::scalar>> scalars;

public:
cudf::ast::literal &add_literal(std::unique_ptr<cudf::ast::literal> literal_ptr,
std::unique_ptr<cudf::scalar> scalar_ptr) {
literals.push_back(std::move(literal_ptr));
scalars.push_back(std::move(scalar_ptr));
return *literals.back();
}

cudf::ast::column_reference &
add_column_ref(std::unique_ptr<cudf::ast::column_reference> ref_ptr) {
column_refs.push_back(std::move(ref_ptr));
return *column_refs.back();
}

cudf::ast::expression &add_expression(std::unique_ptr<cudf::ast::expression> expr_ptr) {
expressions.push_back(std::move(expr_ptr));
return *expressions.back();
}

/** Return the expression node at the top of the tree */
cudf::ast::expression &get_top_expression() const { return *expressions.back(); }
};

} // namespace ast
} // namespace jni
} // namespace cudf
#include "jni_compiled_expr.hpp"

namespace {

Expand Down
Loading

0 comments on commit 3b5f477

Please sign in to comment.