From f3bf0e528c447cf363613c3843357333ee11ce6a Mon Sep 17 00:00:00 2001 From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com> Date: Fri, 12 Feb 2021 12:59:14 +0530 Subject: [PATCH] Add groupby SUM_OF_SQUARES support (#7362) closes #4667 - Add hash groupby SUM_OF_SQUARES support - considering ptxas 10.2 crash, it's enabled for floating types only in unit tests. Authors: - Karthikeyan (@karthikeyann) Approvers: - Conor Hoekstra (@codereport) - Keith Kraus (@kkraus14) - Jake Hemstad (@jrhemstad) URL: https://github.com/rapidsai/cudf/pull/7362 --- .../cudf/detail/aggregation/aggregation.cuh | 37 +++++ cpp/src/groupby/hash/groupby.cu | 6 +- cpp/tests/CMakeLists.txt | 1 + .../groupby/group_sum_of_squares_test.cpp | 149 ++++++++++++++++++ 4 files changed, 191 insertions(+), 2 deletions(-) create mode 100644 cpp/tests/groupby/group_sum_of_squares_test.cpp diff --git a/cpp/include/cudf/detail/aggregation/aggregation.cuh b/cpp/include/cudf/detail/aggregation/aggregation.cuh index 3ed887e1269..3d006449044 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.cuh +++ b/cpp/include/cudf/detail/aggregation/aggregation.cuh @@ -314,6 +314,43 @@ struct update_target_element +constexpr bool is_SOS_supported() +{ + return std::is_floating_point::value; +} +#else +template +constexpr bool is_SOS_supported() +{ + return is_numeric(); +} +#endif + +template +struct update_target_element()>> { + __device__ void operator()(mutable_column_device_view target, + size_type target_index, + column_device_view source, + size_type source_index) const noexcept + { + if (source_has_nulls and source.is_null(source_index)) { return; } + + using Target = target_type_t; + auto value = static_cast(source.element(source_index)); + atomicAdd(&target.element(target_index), value * value); + if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); } + } +}; + template struct update_target_element< Source, diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index e22d355f8a8..c54ecee9ccb 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -66,6 +66,7 @@ constexpr std::array hash_aggregations{ aggregation::SUM, aggregation::MIN, aggregation::MAX, aggregation::COUNT_VALID, aggregation::COUNT_ALL, aggregation::ARGMIN, aggregation::ARGMAX, + aggregation::SUM_OF_SQUARES, aggregation::MEAN, aggregation::STD, aggregation::VARIANCE}; //Could be hash: SUM, PRODUCT, MIN, MAX, COUNT_VALID, COUNT_ALL, ANY, ALL, @@ -97,7 +98,8 @@ bool constexpr is_hash_aggregation(aggregation::Kind t) // return array_contains(hash_aggregations, t); return (t == aggregation::SUM) or (t == aggregation::MIN) or (t == aggregation::MAX) or (t == aggregation::COUNT_VALID) or (t == aggregation::COUNT_ALL) or - (t == aggregation::ARGMIN) or (t == aggregation::ARGMAX) or (t == aggregation::MEAN) or + (t == aggregation::ARGMIN) or (t == aggregation::ARGMAX) or + (t == aggregation::SUM_OF_SQUARES) or (t == aggregation::MEAN) or (t == aggregation::STD) or (t == aggregation::VARIANCE); } diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 55b1d50767f..c03a738ae8a 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -153,6 +153,7 @@ set(GROUPBY_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_sum_test.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_min_test.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_max_test.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_sum_of_squares_test.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_mean_test.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_var_test.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_std_test.cpp" diff --git a/cpp/tests/groupby/group_sum_of_squares_test.cpp b/cpp/tests/groupby/group_sum_of_squares_test.cpp new file mode 100644 index 00000000000..24601d2b246 --- /dev/null +++ b/cpp/tests/groupby/group_sum_of_squares_test.cpp @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +#include + +namespace cudf { +namespace test { +template +struct groupby_sum_of_squares_test : public cudf::test::BaseFixture { +}; + +// These tests will not work for all types until the following ptxas bug is fixed in 10.2 +// https://nvbugswb.nvidia.com/NvBugs5/SWBug.aspx?bugid=3186317&cp= +// using supported_types = cudf::test::Types; +using supported_types = cudf::test::Types; + +TYPED_TEST_CASE(groupby_sum_of_squares_test, supported_types); + +// clang-format off +TYPED_TEST(groupby_sum_of_squares_test, basic) +{ + using K = int32_t; + using V = TypeParam; + using R = cudf::detail::target_type_t; + + fixed_width_column_wrapper keys { 1, 2, 3, 1, 2, 2, 1, 3, 3, 2}; + fixed_width_column_wrapper vals { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + + // { 1, 1, 1, 2, 2, 2, 2, 3, 3, 3} + fixed_width_column_wrapper expect_keys { 1, 2, 3 }; + // { 0, 3, 6, 1, 4, 5, 9, 2, 7, 8} + fixed_width_column_wrapper expect_vals({ 45., 123., 117. }, all_valid()); + + auto agg = cudf::make_sum_of_squares_aggregation(); + test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); +} + +TYPED_TEST(groupby_sum_of_squares_test, empty_cols) +{ + using K = int32_t; + using V = TypeParam; + using R = cudf::detail::target_type_t; + + fixed_width_column_wrapper keys { }; + fixed_width_column_wrapper vals { }; + + fixed_width_column_wrapper expect_keys { }; + fixed_width_column_wrapper expect_vals { }; + + auto agg = cudf::make_sum_of_squares_aggregation(); + test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); +} + +TYPED_TEST(groupby_sum_of_squares_test, zero_valid_keys) +{ + using K = int32_t; + using V = TypeParam; + using R = cudf::detail::target_type_t; + + fixed_width_column_wrapper keys ( { 1, 2, 3}, all_null() ); + fixed_width_column_wrapper vals { 3, 4, 5}; + + fixed_width_column_wrapper expect_keys { }; + fixed_width_column_wrapper expect_vals { }; + + auto agg = cudf::make_sum_of_squares_aggregation(); + test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); +} + +TYPED_TEST(groupby_sum_of_squares_test, zero_valid_values) +{ + using K = int32_t; + using V = TypeParam; + using R = cudf::detail::target_type_t; + + fixed_width_column_wrapper keys { 1, 1, 1}; + fixed_width_column_wrapper vals ( { 3, 4, 5}, all_null() ); + + fixed_width_column_wrapper expect_keys { 1 }; + fixed_width_column_wrapper expect_vals({ 0 }, all_null()); + + auto agg = cudf::make_sum_of_squares_aggregation(); + test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); +} + +TYPED_TEST(groupby_sum_of_squares_test, null_keys_and_values) +{ + using K = int32_t; + using V = TypeParam; + using R = cudf::detail::target_type_t; + + fixed_width_column_wrapper keys( { 1, 2, 3, 1, 2, 2, 1, 3, 3, 2, 4}, + { 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1}); + fixed_width_column_wrapper vals( { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 3}, + { 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0}); + + // { 1, 1, 2, 2, 2, 3, 3, 4} + fixed_width_column_wrapper expect_keys({ 1, 2, 3, 4}, all_valid()); + // { 3, 6, 1, 4, 9, 2, 8, 3} + fixed_width_column_wrapper expect_vals({ 45., 98., 68., 9.}, + { 1, 1, 1, 0}); + + auto agg = cudf::make_sum_of_squares_aggregation(); + test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); +} +// clang-format on + +// This test will not work until the following ptxas bug is fixed in 10.2 +// https://nvbugswb.nvidia.com/NvBugs5/SWBug.aspx?bugid=3186317&cp= +TYPED_TEST(groupby_sum_of_squares_test, DISABLED_dictionary) +{ + using K = int32_t; + using V = TypeParam; + using R = cudf::detail::target_type_t; + + // clang-format off + fixed_width_column_wrapper keys{ 1, 2, 3, 1, 2, 2, 1, 3, 3, 2}; + dictionary_column_wrapper vals{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + + // { 1, 1, 1, 2, 2, 2, 2, 3, 3, 3} + fixed_width_column_wrapper expect_keys({ 1, 2, 3 }); + // { 0, 3, 6, 1, 4, 5, 9, 2, 7, 8} + fixed_width_column_wrapper expect_vals( { 45., 123., 117. }, all_valid()); + // clang-format on + + test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_sum_of_squares_aggregation()); +} + +} // namespace test +} // namespace cudf