Skip to content

Commit

Permalink
Merge branch 'branch-22.02' into bug/orc_dont_take_temp_by_ref
Browse files Browse the repository at this point in the history
  • Loading branch information
robertmaynard committed Jan 18, 2022
2 parents 539a422 + 7ff5f12 commit a433e4a
Show file tree
Hide file tree
Showing 20 changed files with 763 additions and 193 deletions.
66 changes: 52 additions & 14 deletions cpp/src/search/search.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -173,11 +173,56 @@ bool contains_scalar_dispatch::operator()<cudf::list_view>(column_view const&,
}

template <>
bool contains_scalar_dispatch::operator()<cudf::struct_view>(column_view const&,
scalar const&,
rmm::cuda_stream_view)
bool contains_scalar_dispatch::operator()<cudf::struct_view>(column_view const& col,
scalar const& value,
rmm::cuda_stream_view stream)
{
CUDF_FAIL("struct_view type not supported yet");
CUDF_EXPECTS(col.type() == value.type(), "scalar and column types must match");

auto const scalar_table = static_cast<struct_scalar const*>(&value)->view();
CUDF_EXPECTS(col.num_children() == scalar_table.num_columns(),
"struct scalar and structs column must have the same number of children");
for (size_type i = 0; i < col.num_children(); ++i) {
CUDF_EXPECTS(col.child(i).type() == scalar_table.column(i).type(),
"scalar and column children types must match");
}

// Prepare to flatten the structs column and scalar.
auto const has_null_elements =
has_nested_nulls(table_view{std::vector<column_view>{col.child_begin(), col.child_end()}}) ||
has_nested_nulls(scalar_table);
auto const flatten_nullability = has_null_elements
? structs::detail::column_nullability::FORCE
: structs::detail::column_nullability::MATCH_INCOMING;

// Flatten the input structs column, only materialize the bitmask if there is null in the input.
auto const col_flattened =
structs::detail::flatten_nested_columns(table_view{{col}}, {}, {}, flatten_nullability);
auto const val_flattened =
structs::detail::flatten_nested_columns(scalar_table, {}, {}, flatten_nullability);

// The struct scalar only contains the struct member columns.
// Thus, if there is any null in the input, we must exclude the first column in the flattened
// table of the input column from searching because that column is the materialized bitmask of
// the input structs column.
auto const col_flattened_content = col_flattened.flattened_columns();
auto const col_flattened_children = table_view{std::vector<column_view>{
col_flattened_content.begin() + static_cast<size_type>(has_null_elements),
col_flattened_content.end()}};

auto const d_col_children_ptr = table_device_view::create(col_flattened_children, stream);
auto const d_val_ptr = table_device_view::create(val_flattened, stream);

auto const start_iter = thrust::make_counting_iterator<size_type>(0);
auto const end_iter = start_iter + col.size();
auto const comp = row_equality_comparator(
nullate::DYNAMIC{has_null_elements}, *d_col_children_ptr, *d_val_ptr, null_equality::EQUAL);
auto const found_iter = thrust::find_if(
rmm::exec_policy(stream), start_iter, end_iter, [comp] __device__(auto const idx) {
return comp(idx, 0); // compare col[idx] == val[0].
});

return found_iter != end_iter;
}

template <>
Expand All @@ -203,7 +248,6 @@ namespace detail {
bool contains(column_view const& col, scalar const& value, rmm::cuda_stream_view stream)
{
if (col.is_empty()) { return false; }

if (not value.is_valid(stream)) { return col.has_nulls(); }

return cudf::type_dispatcher(col.type(), contains_scalar_dispatch{}, col, value, stream);
Expand Down Expand Up @@ -264,20 +308,14 @@ struct multi_contains_dispatch {

template <>
std::unique_ptr<column> multi_contains_dispatch::operator()<list_view>(
column_view const& haystack,
column_view const& needles,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
column_view const&, column_view const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*)
{
CUDF_FAIL("list_view type not supported");
}

template <>
std::unique_ptr<column> multi_contains_dispatch::operator()<struct_view>(
column_view const& haystack,
column_view const& needles,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
column_view const&, column_view const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*)
{
CUDF_FAIL("struct_view type not supported");
}
Expand Down
241 changes: 236 additions & 5 deletions cpp/tests/search/search_struct_test.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -21,6 +21,7 @@
#include <cudf_test/type_lists.hpp>

#include <cudf/lists/lists_column_view.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/search.hpp>
#include <cudf/table/table_view.hpp>

Expand All @@ -35,15 +36,14 @@ constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_leve
constexpr int32_t null{0}; // Mark for null child elements
constexpr int32_t XXX{0}; // Mark for null struct elements

template <typename T>
struct TypedStructSearchTest : public cudf::test::BaseFixture {
};

using TestTypes = cudf::test::Concat<cudf::test::IntegralTypesNotBool,
cudf::test::FloatingPointTypes,
cudf::test::DurationTypes,
cudf::test::TimestampTypes>;

template <typename T>
struct TypedStructSearchTest : public cudf::test::BaseFixture {
};
TYPED_TEST_SUITE(TypedStructSearchTest, TestTypes);

namespace {
Expand Down Expand Up @@ -353,3 +353,234 @@ TYPED_TEST(TypedStructSearchTest, ComplexStructTest)
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, results.first->view(), verbosity);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), verbosity);
}

template <typename T>
struct TypedScalarStructContainTest : public cudf::test::BaseFixture {
};
TYPED_TEST_SUITE(TypedScalarStructContainTest, TestTypes);

TYPED_TEST(TypedScalarStructContainTest, EmptyInputTest)
{
using col_wrapper = cudf::test::fixed_width_column_wrapper<TypeParam, int32_t>;

auto const col = [] {
auto child = col_wrapper{};
return structs_col{{child}};
}();

auto const val = [] {
auto child = col_wrapper{1};
return cudf::struct_scalar(std::vector<cudf::column_view>{child});
}();

EXPECT_EQ(false, cudf::contains(col, val));
}

TYPED_TEST(TypedScalarStructContainTest, TrivialInputTests)
{
using col_wrapper = cudf::test::fixed_width_column_wrapper<TypeParam, int32_t>;

auto const col = [] {
auto child1 = col_wrapper{1, 2, 3};
auto child2 = col_wrapper{4, 5, 6};
auto child3 = strings_col{"x", "y", "z"};
return structs_col{{child1, child2, child3}};
}();

auto const val1 = [] {
auto child1 = col_wrapper{1};
auto child2 = col_wrapper{4};
auto child3 = strings_col{"x"};
return cudf::struct_scalar(std::vector<cudf::column_view>{child1, child2, child3});
}();
auto const val2 = [] {
auto child1 = col_wrapper{1};
auto child2 = col_wrapper{4};
auto child3 = strings_col{"a"};
return cudf::struct_scalar(std::vector<cudf::column_view>{child1, child2, child3});
}();

EXPECT_EQ(true, cudf::contains(col, val1));
EXPECT_EQ(false, cudf::contains(col, val2));
}

TYPED_TEST(TypedScalarStructContainTest, SlicedColumnInputTests)
{
using col_wrapper = cudf::test::fixed_width_column_wrapper<TypeParam, int32_t>;

constexpr int32_t dont_care{0};

auto const col_original = [] {
auto child1 = col_wrapper{dont_care, dont_care, 1, 2, 3, dont_care};
auto child2 = col_wrapper{dont_care, dont_care, 4, 5, 6, dont_care};
auto child3 = strings_col{"dont_care", "dont_care", "x", "y", "z", "dont_care"};
return structs_col{{child1, child2, child3}};
}();
auto const col = cudf::slice(col_original, {2, 5})[0];

auto const val1 = [] {
auto child1 = col_wrapper{1};
auto child2 = col_wrapper{4};
auto child3 = strings_col{"x"};
return cudf::struct_scalar(std::vector<cudf::column_view>{child1, child2, child3});
}();
auto const val2 = [] {
auto child1 = col_wrapper{dont_care};
auto child2 = col_wrapper{dont_care};
auto child3 = strings_col{"dont_care"};
return cudf::struct_scalar(std::vector<cudf::column_view>{child1, child2, child3});
}();

EXPECT_EQ(true, cudf::contains(col, val1));
EXPECT_EQ(false, cudf::contains(col, val2));
}

TYPED_TEST(TypedScalarStructContainTest, SimpleInputWithNullsTests)
{
using col_wrapper = cudf::test::fixed_width_column_wrapper<TypeParam, int32_t>;

constexpr int32_t null{0};

// Test with nulls at the top level.
{
auto const col = [] {
auto child1 = col_wrapper{1, null, 3};
auto child2 = col_wrapper{4, null, 6};
auto child3 = strings_col{"x", "" /*NULL*/, "z"};
return structs_col{{child1, child2, child3}, null_at(1)};
}();

auto const val1 = [] {
auto child1 = col_wrapper{1};
auto child2 = col_wrapper{4};
auto child3 = strings_col{"x"};
return cudf::struct_scalar(std::vector<cudf::column_view>{child1, child2, child3});
}();
auto const val2 = [] {
auto child1 = col_wrapper{1};
auto child2 = col_wrapper{4};
auto child3 = strings_col{"a"};
return cudf::struct_scalar(std::vector<cudf::column_view>{child1, child2, child3});
}();

EXPECT_EQ(true, cudf::contains(col, val1));
EXPECT_EQ(false, cudf::contains(col, val2));
}

// Test with nulls at the children level.
{
auto const col = [] {
auto child1 = col_wrapper{{1, null, 3}, null_at(1)};
auto child2 = col_wrapper{{4, null, 6}, null_at(1)};
auto child3 = strings_col{{"" /*NULL*/, "y", "z"}, null_at(0)};
return structs_col{{child1, child2, child3}};
}();

auto const val1 = [] {
auto child1 = col_wrapper{1};
auto child2 = col_wrapper{4};
auto child3 = strings_col{{"" /*NULL*/}, null_at(0)};
return cudf::struct_scalar(std::vector<cudf::column_view>{child1, child2, child3});
}();
auto const val2 = [] {
auto child1 = col_wrapper{1};
auto child2 = col_wrapper{4};
auto child3 = strings_col{""};
return cudf::struct_scalar(std::vector<cudf::column_view>{child1, child2, child3});
}();

EXPECT_EQ(true, cudf::contains(col, val1));
EXPECT_EQ(false, cudf::contains(col, val2));
}

// Test with nulls in the input scalar.
{
auto const col = [] {
auto child1 = col_wrapper{1, 2, 3};
auto child2 = col_wrapper{4, 5, 6};
auto child3 = strings_col{"x", "y", "z"};
return structs_col{{child1, child2, child3}};
}();

auto const val1 = [] {
auto child1 = col_wrapper{1};
auto child2 = col_wrapper{4};
auto child3 = strings_col{"x"};
return cudf::struct_scalar(std::vector<cudf::column_view>{child1, child2, child3});
}();
auto const val2 = [] {
auto child1 = col_wrapper{1};
auto child2 = col_wrapper{4};
auto child3 = strings_col{{"" /*NULL*/}, null_at(0)};
return cudf::struct_scalar(std::vector<cudf::column_view>{child1, child2, child3});
}();

EXPECT_EQ(true, cudf::contains(col, val1));
EXPECT_EQ(false, cudf::contains(col, val2));
}
}

TYPED_TEST(TypedScalarStructContainTest, SlicedInputWithNullsTests)
{
using col_wrapper = cudf::test::fixed_width_column_wrapper<TypeParam, int32_t>;

constexpr int32_t dont_care{0};
constexpr int32_t null{0};

// Test with nulls at the top level.
{
auto const col_original = [] {
auto child1 = col_wrapper{dont_care, dont_care, 1, null, 3, dont_care};
auto child2 = col_wrapper{dont_care, dont_care, 4, null, 6, dont_care};
auto child3 = strings_col{"dont_care", "dont_care", "x", "" /*NULL*/, "z", "dont_care"};
return structs_col{{child1, child2, child3}, null_at(3)};
}();
auto const col = cudf::slice(col_original, {2, 5})[0];

auto const val1 = [] {
auto child1 = col_wrapper{1};
auto child2 = col_wrapper{4};
auto child3 = strings_col{"x"};
return cudf::struct_scalar(std::vector<cudf::column_view>{child1, child2, child3});
}();
auto const val2 = [] {
auto child1 = col_wrapper{1};
auto child2 = col_wrapper{4};
auto child3 = strings_col{"a"};
return cudf::struct_scalar(std::vector<cudf::column_view>{child1, child2, child3});
}();

EXPECT_EQ(true, cudf::contains(col, val1));
EXPECT_EQ(false, cudf::contains(col, val2));
}

// Test with nulls at the children level.
{
auto const col_original = [] {
auto child1 =
col_wrapper{{dont_care, dont_care /*also NULL*/, 1, null, 3, dont_care}, null_at(3)};
auto child2 =
col_wrapper{{dont_care, dont_care /*also NULL*/, 4, null, 6, dont_care}, null_at(3)};
auto child3 = strings_col{
{"dont_care", "dont_care" /*also NULL*/, "" /*NULL*/, "y", "z", "dont_care"}, null_at(2)};
return structs_col{{child1, child2, child3}, null_at(1)};
}();
auto const col = cudf::slice(col_original, {2, 5})[0];

auto const val1 = [] {
auto child1 = col_wrapper{1};
auto child2 = col_wrapper{4};
auto child3 = strings_col{{"x"}, null_at(0)};
return cudf::struct_scalar(std::vector<cudf::column_view>{child1, child2, child3});
}();
auto const val2 = [] {
auto child1 = col_wrapper{dont_care};
auto child2 = col_wrapper{dont_care};
auto child3 = strings_col{"dont_care"};
return cudf::struct_scalar(std::vector<cudf::column_view>{child1, child2, child3});
}();

EXPECT_EQ(true, cudf::contains(col, val1));
EXPECT_EQ(false, cudf::contains(col, val2));
}
}
Loading

0 comments on commit a433e4a

Please sign in to comment.