diff --git a/cpp/src/search/search.cu b/cpp/src/search/search.cu index 241b3c595f1..81ed3cfbd51 100644 --- a/cpp/src/search/search.cu +++ b/cpp/src/search/search.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -173,11 +173,56 @@ bool contains_scalar_dispatch::operator()(column_view const&, } template <> -bool contains_scalar_dispatch::operator()(column_view const&, - scalar const&, - rmm::cuda_stream_view) +bool contains_scalar_dispatch::operator()(column_view const& col, + scalar const& value, + rmm::cuda_stream_view stream) { - CUDF_FAIL("struct_view type not supported yet"); + CUDF_EXPECTS(col.type() == value.type(), "scalar and column types must match"); + + auto const scalar_table = static_cast(&value)->view(); + CUDF_EXPECTS(col.num_children() == scalar_table.num_columns(), + "struct scalar and structs column must have the same number of children"); + for (size_type i = 0; i < col.num_children(); ++i) { + CUDF_EXPECTS(col.child(i).type() == scalar_table.column(i).type(), + "scalar and column children types must match"); + } + + // Prepare to flatten the structs column and scalar. + auto const has_null_elements = + has_nested_nulls(table_view{std::vector{col.child_begin(), col.child_end()}}) || + has_nested_nulls(scalar_table); + auto const flatten_nullability = has_null_elements + ? structs::detail::column_nullability::FORCE + : structs::detail::column_nullability::MATCH_INCOMING; + + // Flatten the input structs column, only materialize the bitmask if there is null in the input. + auto const col_flattened = + structs::detail::flatten_nested_columns(table_view{{col}}, {}, {}, flatten_nullability); + auto const val_flattened = + structs::detail::flatten_nested_columns(scalar_table, {}, {}, flatten_nullability); + + // The struct scalar only contains the struct member columns. + // Thus, if there is any null in the input, we must exclude the first column in the flattened + // table of the input column from searching because that column is the materialized bitmask of + // the input structs column. + auto const col_flattened_content = col_flattened.flattened_columns(); + auto const col_flattened_children = table_view{std::vector{ + col_flattened_content.begin() + static_cast(has_null_elements), + col_flattened_content.end()}}; + + auto const d_col_children_ptr = table_device_view::create(col_flattened_children, stream); + auto const d_val_ptr = table_device_view::create(val_flattened, stream); + + auto const start_iter = thrust::make_counting_iterator(0); + auto const end_iter = start_iter + col.size(); + auto const comp = row_equality_comparator( + nullate::DYNAMIC{has_null_elements}, *d_col_children_ptr, *d_val_ptr, null_equality::EQUAL); + auto const found_iter = thrust::find_if( + rmm::exec_policy(stream), start_iter, end_iter, [comp] __device__(auto const idx) { + return comp(idx, 0); // compare col[idx] == val[0]. + }); + + return found_iter != end_iter; } template <> @@ -203,7 +248,6 @@ namespace detail { bool contains(column_view const& col, scalar const& value, rmm::cuda_stream_view stream) { if (col.is_empty()) { return false; } - if (not value.is_valid(stream)) { return col.has_nulls(); } return cudf::type_dispatcher(col.type(), contains_scalar_dispatch{}, col, value, stream); @@ -264,20 +308,14 @@ struct multi_contains_dispatch { template <> std::unique_ptr multi_contains_dispatch::operator()( - column_view const& haystack, - column_view const& needles, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + column_view const&, column_view const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) { CUDF_FAIL("list_view type not supported"); } template <> std::unique_ptr multi_contains_dispatch::operator()( - column_view const& haystack, - column_view const& needles, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + column_view const&, column_view const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) { CUDF_FAIL("struct_view type not supported"); } diff --git a/cpp/tests/search/search_struct_test.cpp b/cpp/tests/search/search_struct_test.cpp index db2ecb89d6a..a1f0b1d81cf 100644 --- a/cpp/tests/search/search_struct_test.cpp +++ b/cpp/tests/search/search_struct_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -35,15 +36,14 @@ constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_leve constexpr int32_t null{0}; // Mark for null child elements constexpr int32_t XXX{0}; // Mark for null struct elements -template -struct TypedStructSearchTest : public cudf::test::BaseFixture { -}; - using TestTypes = cudf::test::Concat; +template +struct TypedStructSearchTest : public cudf::test::BaseFixture { +}; TYPED_TEST_SUITE(TypedStructSearchTest, TestTypes); namespace { @@ -353,3 +353,234 @@ TYPED_TEST(TypedStructSearchTest, ComplexStructTest) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, results.first->view(), verbosity); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), verbosity); } + +template +struct TypedScalarStructContainTest : public cudf::test::BaseFixture { +}; +TYPED_TEST_SUITE(TypedScalarStructContainTest, TestTypes); + +TYPED_TEST(TypedScalarStructContainTest, EmptyInputTest) +{ + using col_wrapper = cudf::test::fixed_width_column_wrapper; + + auto const col = [] { + auto child = col_wrapper{}; + return structs_col{{child}}; + }(); + + auto const val = [] { + auto child = col_wrapper{1}; + return cudf::struct_scalar(std::vector{child}); + }(); + + EXPECT_EQ(false, cudf::contains(col, val)); +} + +TYPED_TEST(TypedScalarStructContainTest, TrivialInputTests) +{ + using col_wrapper = cudf::test::fixed_width_column_wrapper; + + auto const col = [] { + auto child1 = col_wrapper{1, 2, 3}; + auto child2 = col_wrapper{4, 5, 6}; + auto child3 = strings_col{"x", "y", "z"}; + return structs_col{{child1, child2, child3}}; + }(); + + auto const val1 = [] { + auto child1 = col_wrapper{1}; + auto child2 = col_wrapper{4}; + auto child3 = strings_col{"x"}; + return cudf::struct_scalar(std::vector{child1, child2, child3}); + }(); + auto const val2 = [] { + auto child1 = col_wrapper{1}; + auto child2 = col_wrapper{4}; + auto child3 = strings_col{"a"}; + return cudf::struct_scalar(std::vector{child1, child2, child3}); + }(); + + EXPECT_EQ(true, cudf::contains(col, val1)); + EXPECT_EQ(false, cudf::contains(col, val2)); +} + +TYPED_TEST(TypedScalarStructContainTest, SlicedColumnInputTests) +{ + using col_wrapper = cudf::test::fixed_width_column_wrapper; + + constexpr int32_t dont_care{0}; + + auto const col_original = [] { + auto child1 = col_wrapper{dont_care, dont_care, 1, 2, 3, dont_care}; + auto child2 = col_wrapper{dont_care, dont_care, 4, 5, 6, dont_care}; + auto child3 = strings_col{"dont_care", "dont_care", "x", "y", "z", "dont_care"}; + return structs_col{{child1, child2, child3}}; + }(); + auto const col = cudf::slice(col_original, {2, 5})[0]; + + auto const val1 = [] { + auto child1 = col_wrapper{1}; + auto child2 = col_wrapper{4}; + auto child3 = strings_col{"x"}; + return cudf::struct_scalar(std::vector{child1, child2, child3}); + }(); + auto const val2 = [] { + auto child1 = col_wrapper{dont_care}; + auto child2 = col_wrapper{dont_care}; + auto child3 = strings_col{"dont_care"}; + return cudf::struct_scalar(std::vector{child1, child2, child3}); + }(); + + EXPECT_EQ(true, cudf::contains(col, val1)); + EXPECT_EQ(false, cudf::contains(col, val2)); +} + +TYPED_TEST(TypedScalarStructContainTest, SimpleInputWithNullsTests) +{ + using col_wrapper = cudf::test::fixed_width_column_wrapper; + + constexpr int32_t null{0}; + + // Test with nulls at the top level. + { + auto const col = [] { + auto child1 = col_wrapper{1, null, 3}; + auto child2 = col_wrapper{4, null, 6}; + auto child3 = strings_col{"x", "" /*NULL*/, "z"}; + return structs_col{{child1, child2, child3}, null_at(1)}; + }(); + + auto const val1 = [] { + auto child1 = col_wrapper{1}; + auto child2 = col_wrapper{4}; + auto child3 = strings_col{"x"}; + return cudf::struct_scalar(std::vector{child1, child2, child3}); + }(); + auto const val2 = [] { + auto child1 = col_wrapper{1}; + auto child2 = col_wrapper{4}; + auto child3 = strings_col{"a"}; + return cudf::struct_scalar(std::vector{child1, child2, child3}); + }(); + + EXPECT_EQ(true, cudf::contains(col, val1)); + EXPECT_EQ(false, cudf::contains(col, val2)); + } + + // Test with nulls at the children level. + { + auto const col = [] { + auto child1 = col_wrapper{{1, null, 3}, null_at(1)}; + auto child2 = col_wrapper{{4, null, 6}, null_at(1)}; + auto child3 = strings_col{{"" /*NULL*/, "y", "z"}, null_at(0)}; + return structs_col{{child1, child2, child3}}; + }(); + + auto const val1 = [] { + auto child1 = col_wrapper{1}; + auto child2 = col_wrapper{4}; + auto child3 = strings_col{{"" /*NULL*/}, null_at(0)}; + return cudf::struct_scalar(std::vector{child1, child2, child3}); + }(); + auto const val2 = [] { + auto child1 = col_wrapper{1}; + auto child2 = col_wrapper{4}; + auto child3 = strings_col{""}; + return cudf::struct_scalar(std::vector{child1, child2, child3}); + }(); + + EXPECT_EQ(true, cudf::contains(col, val1)); + EXPECT_EQ(false, cudf::contains(col, val2)); + } + + // Test with nulls in the input scalar. + { + auto const col = [] { + auto child1 = col_wrapper{1, 2, 3}; + auto child2 = col_wrapper{4, 5, 6}; + auto child3 = strings_col{"x", "y", "z"}; + return structs_col{{child1, child2, child3}}; + }(); + + auto const val1 = [] { + auto child1 = col_wrapper{1}; + auto child2 = col_wrapper{4}; + auto child3 = strings_col{"x"}; + return cudf::struct_scalar(std::vector{child1, child2, child3}); + }(); + auto const val2 = [] { + auto child1 = col_wrapper{1}; + auto child2 = col_wrapper{4}; + auto child3 = strings_col{{"" /*NULL*/}, null_at(0)}; + return cudf::struct_scalar(std::vector{child1, child2, child3}); + }(); + + EXPECT_EQ(true, cudf::contains(col, val1)); + EXPECT_EQ(false, cudf::contains(col, val2)); + } +} + +TYPED_TEST(TypedScalarStructContainTest, SlicedInputWithNullsTests) +{ + using col_wrapper = cudf::test::fixed_width_column_wrapper; + + constexpr int32_t dont_care{0}; + constexpr int32_t null{0}; + + // Test with nulls at the top level. + { + auto const col_original = [] { + auto child1 = col_wrapper{dont_care, dont_care, 1, null, 3, dont_care}; + auto child2 = col_wrapper{dont_care, dont_care, 4, null, 6, dont_care}; + auto child3 = strings_col{"dont_care", "dont_care", "x", "" /*NULL*/, "z", "dont_care"}; + return structs_col{{child1, child2, child3}, null_at(3)}; + }(); + auto const col = cudf::slice(col_original, {2, 5})[0]; + + auto const val1 = [] { + auto child1 = col_wrapper{1}; + auto child2 = col_wrapper{4}; + auto child3 = strings_col{"x"}; + return cudf::struct_scalar(std::vector{child1, child2, child3}); + }(); + auto const val2 = [] { + auto child1 = col_wrapper{1}; + auto child2 = col_wrapper{4}; + auto child3 = strings_col{"a"}; + return cudf::struct_scalar(std::vector{child1, child2, child3}); + }(); + + EXPECT_EQ(true, cudf::contains(col, val1)); + EXPECT_EQ(false, cudf::contains(col, val2)); + } + + // Test with nulls at the children level. + { + auto const col_original = [] { + auto child1 = + col_wrapper{{dont_care, dont_care /*also NULL*/, 1, null, 3, dont_care}, null_at(3)}; + auto child2 = + col_wrapper{{dont_care, dont_care /*also NULL*/, 4, null, 6, dont_care}, null_at(3)}; + auto child3 = strings_col{ + {"dont_care", "dont_care" /*also NULL*/, "" /*NULL*/, "y", "z", "dont_care"}, null_at(2)}; + return structs_col{{child1, child2, child3}, null_at(1)}; + }(); + auto const col = cudf::slice(col_original, {2, 5})[0]; + + auto const val1 = [] { + auto child1 = col_wrapper{1}; + auto child2 = col_wrapper{4}; + auto child3 = strings_col{{"x"}, null_at(0)}; + return cudf::struct_scalar(std::vector{child1, child2, child3}); + }(); + auto const val2 = [] { + auto child1 = col_wrapper{dont_care}; + auto child2 = col_wrapper{dont_care}; + auto child3 = strings_col{"dont_care"}; + return cudf::struct_scalar(std::vector{child1, child2, child3}); + }(); + + EXPECT_EQ(true, cudf::contains(col, val1)); + EXPECT_EQ(false, cudf::contains(col, val2)); + } +}