diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index 533f193d692..d03238e4752 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -55,6 +55,9 @@ groupby::groupby(table_view const& keys, _column_order{column_order}, _null_precedence{null_precedence} { + auto is_list = [](auto const& col) { return col.type().id() == cudf::type_id::LIST; }; + auto has_list = std::any_of(keys.begin(), keys.end(), is_list); + CUDF_EXPECTS(not has_list, "Groupby does not support LIST columns as keys."); } // Select hash vs. sort groupby implementation diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index d9553d463ab..03f7967cee0 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -62,6 +62,7 @@ ConfigureTest(GROUPBY_TEST groupby/count_tests.cpp groupby/groups_tests.cpp groupby/keys_tests.cpp + groupby/lists_tests.cpp groupby/m2_tests.cpp groupby/min_tests.cpp groupby/max_scan_tests.cpp diff --git a/cpp/tests/groupby/lists_tests.cpp b/cpp/tests/groupby/lists_tests.cpp new file mode 100644 index 00000000000..11b8ffa92b9 --- /dev/null +++ b/cpp/tests/groupby/lists_tests.cpp @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include + +#include + +namespace cudf { +namespace test { + +template +struct groupby_lists_test : public cudf::test::BaseFixture { +}; + +TYPED_TEST_SUITE(groupby_lists_test, cudf::test::FixedWidthTypes); + +namespace { +// Checking with a single aggregation, and aggregation column. +// This test is orthogonal to the aggregation type; it focuses on testing the grouping +// with LISTS keys. +auto sum_agg() { return cudf::make_sum_aggregation(); } + +void test_sort_based_sum_agg(column_view const& keys, column_view const& values) +{ + test_single_agg( + keys, values, keys, values, sum_agg(), force_use_sort_impl::YES, null_policy::INCLUDE); +} + +void test_hash_based_sum_agg(column_view const& keys, column_view const& values) +{ + test_single_agg( + keys, values, keys, values, sum_agg(), force_use_sort_impl::NO, null_policy::INCLUDE); +} + +} // namespace + +TYPED_TEST(groupby_lists_test, top_level_lists_are_unsupported) +{ + // Test that grouping on LISTS columns fails visibly. + + // clang-format off + auto keys = lists_column_wrapper { {1,1}, {2,2}, {3,3}, {1,1}, {2,2} }; + auto values = fixed_width_column_wrapper { 0, 1, 2, 3, 4 }; + // clang-format on + + EXPECT_THROW(test_sort_based_sum_agg(keys, values), cudf::logic_error); + EXPECT_THROW(test_hash_based_sum_agg(keys, values), cudf::logic_error); +} + +} // namespace test +} // namespace cudf diff --git a/cpp/tests/groupby/structs_tests.cpp b/cpp/tests/groupby/structs_tests.cpp index 00126a4a5a0..3715ba8d17b 100644 --- a/cpp/tests/groupby/structs_tests.cpp +++ b/cpp/tests/groupby/structs_tests.cpp @@ -22,8 +22,6 @@ #include #include -#include "cudf/aggregation.hpp" -#include "cudf/types.hpp" using namespace cudf::test::iterators; @@ -34,7 +32,7 @@ template struct groupby_structs_test : public cudf::test::BaseFixture { }; -TYPED_TEST_CASE(groupby_structs_test, cudf::test::FixedWidthTypes); +TYPED_TEST_SUITE(groupby_structs_test, cudf::test::FixedWidthTypes); using V = int32_t; // Type of Aggregation Column. using M0 = int32_t; // Type of STRUCT's first (i.e. 0th) member. @@ -79,27 +77,43 @@ void print_agg_results(column_view const& keys, column_view const& vals) } } -void test_sum_agg(column_view const& keys, - column_view const& values, - column_view const& expected_keys, - column_view const& expected_values) +void test_sort_based_sum_agg(column_view const& keys, + column_view const& values, + column_view const& expected_keys, + column_view const& expected_values) { test_single_agg(keys, values, expected_keys, expected_values, sum_agg(), - force_use_sort_impl::NO, + force_use_sort_impl::YES, null_policy::INCLUDE); +} + +void test_hash_based_sum_agg(column_view const& keys, + column_view const& values, + column_view const& expected_keys, + column_view const& expected_values) +{ test_single_agg(keys, values, expected_keys, expected_values, sum_agg(), - force_use_sort_impl::YES, + force_use_sort_impl::NO, null_policy::INCLUDE); } +void test_sum_agg(column_view const& keys, + column_view const& values, + column_view const& expected_keys, + column_view const& expected_values) +{ + test_sort_based_sum_agg(keys, values, expected_keys, expected_values); + test_hash_based_sum_agg(keys, values, expected_keys, expected_values); +} + } // namespace TYPED_TEST(groupby_structs_test, basic) @@ -312,7 +326,8 @@ TYPED_TEST(groupby_structs_test, lists_are_unsupported) // clang-format on auto keys = structs{{member_0, member_1}}; - EXPECT_THROW(test_sum_agg(keys, values, keys, values), cudf::logic_error); + EXPECT_THROW(test_sort_based_sum_agg(keys, values, keys, values), cudf::logic_error); + EXPECT_THROW(test_hash_based_sum_agg(keys, values, keys, values), cudf::logic_error); } } // namespace test