Skip to content

Commit

Permalink
Add groupby SUM_OF_SQUARES support (#7362)
Browse files Browse the repository at this point in the history
closes #4667
- Add hash groupby SUM_OF_SQUARES support
- considering ptxas 10.2 crash, it's enabled for floating types only in unit tests.

Authors:
  - Karthikeyan (@karthikeyann)

Approvers:
  - Conor Hoekstra (@codereport)
  - Keith Kraus (@kkraus14)
  - Jake Hemstad (@jrhemstad)

URL: #7362
  • Loading branch information
karthikeyann authored Feb 12, 2021
1 parent ebe307e commit f3bf0e5
Show file tree
Hide file tree
Showing 4 changed files with 191 additions and 2 deletions.
37 changes: 37 additions & 0 deletions cpp/include/cudf/detail/aggregation/aggregation.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,43 @@ struct update_target_element<dictionary32, aggregation::SUM, target_has_nulls, s
}
};

// This code will segfault in nvcc/ptxas 10.2 only
// https://nvbugswb.nvidia.com/NvBugs5/SWBug.aspx?bugid=3186317
// Enabling only for 2 types does not segfault. Using for unit tests.
#if (__CUDACC_VER_MAJOR__ == 10) and (__CUDACC_VER_MINOR__ == 2)
template <typename T>
constexpr bool is_SOS_supported()
{
return std::is_floating_point<T>::value;
}
#else
template <typename T>
constexpr bool is_SOS_supported()
{
return is_numeric<T>();
}
#endif

template <typename Source, bool target_has_nulls, bool source_has_nulls>
struct update_target_element<Source,
aggregation::SUM_OF_SQUARES,
target_has_nulls,
source_has_nulls,
std::enable_if_t<is_SOS_supported<Source>()>> {
__device__ void operator()(mutable_column_device_view target,
size_type target_index,
column_device_view source,
size_type source_index) const noexcept
{
if (source_has_nulls and source.is_null(source_index)) { return; }

using Target = target_type_t<Source, aggregation::SUM_OF_SQUARES>;
auto value = static_cast<Target>(source.element<Source>(source_index));
atomicAdd(&target.element<Target>(target_index), value * value);
if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); }
}
};

template <typename Source, bool target_has_nulls, bool source_has_nulls>
struct update_target_element<
Source,
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/groupby/hash/groupby.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -66,6 +66,7 @@ constexpr std::array<aggregation::Kind, 10> hash_aggregations{
aggregation::SUM, aggregation::MIN, aggregation::MAX,
aggregation::COUNT_VALID, aggregation::COUNT_ALL,
aggregation::ARGMIN, aggregation::ARGMAX,
aggregation::SUM_OF_SQUARES,
aggregation::MEAN, aggregation::STD, aggregation::VARIANCE};

//Could be hash: SUM, PRODUCT, MIN, MAX, COUNT_VALID, COUNT_ALL, ANY, ALL,
Expand Down Expand Up @@ -97,7 +98,8 @@ bool constexpr is_hash_aggregation(aggregation::Kind t)
// return array_contains(hash_aggregations, t);
return (t == aggregation::SUM) or (t == aggregation::MIN) or (t == aggregation::MAX) or
(t == aggregation::COUNT_VALID) or (t == aggregation::COUNT_ALL) or
(t == aggregation::ARGMIN) or (t == aggregation::ARGMAX) or (t == aggregation::MEAN) or
(t == aggregation::ARGMIN) or (t == aggregation::ARGMAX) or
(t == aggregation::SUM_OF_SQUARES) or (t == aggregation::MEAN) or
(t == aggregation::STD) or (t == aggregation::VARIANCE);
}

Expand Down
1 change: 1 addition & 0 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ set(GROUPBY_TEST_SRC
"${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_sum_test.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_min_test.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_max_test.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_sum_of_squares_test.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_mean_test.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_var_test.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_std_test.cpp"
Expand Down
149 changes: 149 additions & 0 deletions cpp/tests/groupby/group_sum_of_squares_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <tests/groupby/groupby_test_util.hpp>

#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/type_lists.hpp>

#include <cudf/detail/aggregation/aggregation.hpp>

namespace cudf {
namespace test {
template <typename V>
struct groupby_sum_of_squares_test : public cudf::test::BaseFixture {
};

// These tests will not work for all types until the following ptxas bug is fixed in 10.2
// https://nvbugswb.nvidia.com/NvBugs5/SWBug.aspx?bugid=3186317&cp=
// using supported_types = cudf::test::Types<int8_t, int16_t, int32_t, int64_t, float, double>;
using supported_types = cudf::test::Types<float, double>;

TYPED_TEST_CASE(groupby_sum_of_squares_test, supported_types);

// clang-format off
TYPED_TEST(groupby_sum_of_squares_test, basic)
{
using K = int32_t;
using V = TypeParam;
using R = cudf::detail::target_type_t<V, aggregation::SUM_OF_SQUARES>;

fixed_width_column_wrapper<K> keys { 1, 2, 3, 1, 2, 2, 1, 3, 3, 2};
fixed_width_column_wrapper<V> vals { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9};

// { 1, 1, 1, 2, 2, 2, 2, 3, 3, 3}
fixed_width_column_wrapper<K> expect_keys { 1, 2, 3 };
// { 0, 3, 6, 1, 4, 5, 9, 2, 7, 8}
fixed_width_column_wrapper<R> expect_vals({ 45., 123., 117. }, all_valid());

auto agg = cudf::make_sum_of_squares_aggregation();
test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
}

TYPED_TEST(groupby_sum_of_squares_test, empty_cols)
{
using K = int32_t;
using V = TypeParam;
using R = cudf::detail::target_type_t<V, aggregation::SUM_OF_SQUARES>;

fixed_width_column_wrapper<K> keys { };
fixed_width_column_wrapper<V> vals { };

fixed_width_column_wrapper<K> expect_keys { };
fixed_width_column_wrapper<R> expect_vals { };

auto agg = cudf::make_sum_of_squares_aggregation();
test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
}

TYPED_TEST(groupby_sum_of_squares_test, zero_valid_keys)
{
using K = int32_t;
using V = TypeParam;
using R = cudf::detail::target_type_t<V, aggregation::SUM_OF_SQUARES>;

fixed_width_column_wrapper<K> keys ( { 1, 2, 3}, all_null() );
fixed_width_column_wrapper<V> vals { 3, 4, 5};

fixed_width_column_wrapper<K> expect_keys { };
fixed_width_column_wrapper<R> expect_vals { };

auto agg = cudf::make_sum_of_squares_aggregation();
test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
}

TYPED_TEST(groupby_sum_of_squares_test, zero_valid_values)
{
using K = int32_t;
using V = TypeParam;
using R = cudf::detail::target_type_t<V, aggregation::SUM_OF_SQUARES>;

fixed_width_column_wrapper<K> keys { 1, 1, 1};
fixed_width_column_wrapper<V> vals ( { 3, 4, 5}, all_null() );

fixed_width_column_wrapper<K> expect_keys { 1 };
fixed_width_column_wrapper<R> expect_vals({ 0 }, all_null());

auto agg = cudf::make_sum_of_squares_aggregation();
test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
}

TYPED_TEST(groupby_sum_of_squares_test, null_keys_and_values)
{
using K = int32_t;
using V = TypeParam;
using R = cudf::detail::target_type_t<V, aggregation::SUM_OF_SQUARES>;

fixed_width_column_wrapper<K> keys( { 1, 2, 3, 1, 2, 2, 1, 3, 3, 2, 4},
{ 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1});
fixed_width_column_wrapper<V> vals( { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 3},
{ 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0});

// { 1, 1, 2, 2, 2, 3, 3, 4}
fixed_width_column_wrapper<K> expect_keys({ 1, 2, 3, 4}, all_valid());
// { 3, 6, 1, 4, 9, 2, 8, 3}
fixed_width_column_wrapper<R> expect_vals({ 45., 98., 68., 9.},
{ 1, 1, 1, 0});

auto agg = cudf::make_sum_of_squares_aggregation();
test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
}
// clang-format on

// This test will not work until the following ptxas bug is fixed in 10.2
// https://nvbugswb.nvidia.com/NvBugs5/SWBug.aspx?bugid=3186317&cp=
TYPED_TEST(groupby_sum_of_squares_test, DISABLED_dictionary)
{
using K = int32_t;
using V = TypeParam;
using R = cudf::detail::target_type_t<V, aggregation::SUM_OF_SQUARES>;

// clang-format off
fixed_width_column_wrapper<K> keys{ 1, 2, 3, 1, 2, 2, 1, 3, 3, 2};
dictionary_column_wrapper<V> vals{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9};

// { 1, 1, 1, 2, 2, 2, 2, 3, 3, 3}
fixed_width_column_wrapper<K> expect_keys({ 1, 2, 3 });
// { 0, 3, 6, 1, 4, 5, 9, 2, 7, 8}
fixed_width_column_wrapper<R> expect_vals( { 45., 123., 117. }, all_valid());
// clang-format on

test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_sum_of_squares_aggregation());
}

} // namespace test
} // namespace cudf

0 comments on commit f3bf0e5

Please sign in to comment.