Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add groupby SUM_OF_SQUARES support #7362

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions cpp/include/cudf/detail/aggregation/aggregation.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,32 @@ struct update_target_element<dictionary32, aggregation::SUM, target_has_nulls, s
}
};

template <typename Source, bool target_has_nulls, bool source_has_nulls>
struct update_target_element<Source,
aggregation::SUM_OF_SQUARES,
target_has_nulls,
source_has_nulls,
std::enable_if_t<is_numeric<Source>()>> {
__device__ void operator()(mutable_column_device_view target,
size_type target_index,
column_device_view source,
size_type source_index) const noexcept
{
// This code will segfault in nvcc/ptxas 10.2 only
// https://nvbugswb.nvidia.com/NvBugs5/SWBug.aspx?bugid=3186317
// Enabling only for 2 types does not segfault. Using for unit tests.
#if (__CUDACC_VER_MAJOR__ == 10) and (__CUDACC_VER_MINOR__ == 2)
if (not std::is_floating_point<Source>::value) return;
karthikeyann marked this conversation as resolved.
Show resolved Hide resolved
#endif
if (source_has_nulls and source.is_null(source_index)) { return; }

using Target = target_type_t<Source, aggregation::SUM_OF_SQUARES>;
auto value = static_cast<Target>(source.element<Source>(source_index));
atomicAdd(&target.element<Target>(target_index), value * value);
if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); }
}
};

template <typename Source, bool target_has_nulls, bool source_has_nulls>
struct update_target_element<
Source,
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/groupby/hash/groupby.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -66,6 +66,7 @@ constexpr std::array<aggregation::Kind, 10> hash_aggregations{
aggregation::SUM, aggregation::MIN, aggregation::MAX,
aggregation::COUNT_VALID, aggregation::COUNT_ALL,
aggregation::ARGMIN, aggregation::ARGMAX,
aggregation::SUM_OF_SQUARES,
aggregation::MEAN, aggregation::STD, aggregation::VARIANCE};

//Could be hash: SUM, PRODUCT, MIN, MAX, COUNT_VALID, COUNT_ALL, ANY, ALL,
Expand Down Expand Up @@ -97,7 +98,8 @@ bool constexpr is_hash_aggregation(aggregation::Kind t)
// return array_contains(hash_aggregations, t);
return (t == aggregation::SUM) or (t == aggregation::MIN) or (t == aggregation::MAX) or
(t == aggregation::COUNT_VALID) or (t == aggregation::COUNT_ALL) or
(t == aggregation::ARGMIN) or (t == aggregation::ARGMAX) or (t == aggregation::MEAN) or
(t == aggregation::ARGMIN) or (t == aggregation::ARGMAX) or
(t == aggregation::SUM_OF_SQUARES) or (t == aggregation::MEAN) or
(t == aggregation::STD) or (t == aggregation::VARIANCE);
}

Expand Down
1 change: 1 addition & 0 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ set(GROUPBY_TEST_SRC
"${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_sum_test.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_min_test.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_max_test.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_sum_of_squares_test.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_mean_test.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_var_test.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_std_test.cpp"
Expand Down
149 changes: 149 additions & 0 deletions cpp/tests/groupby/group_sum_of_squares_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <tests/groupby/groupby_test_util.hpp>

#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/type_lists.hpp>

#include <cudf/detail/aggregation/aggregation.hpp>

namespace cudf {
namespace test {
template <typename V>
struct groupby_sum_of_squares_test : public cudf::test::BaseFixture {
};

// These tests will not work for all types until the following ptxas bug is fixed in 10.2
// https://nvbugswb.nvidia.com/NvBugs5/SWBug.aspx?bugid=3186317&cp=
// using supported_types = cudf::test::Types<int8_t, int16_t, int32_t, int64_t, float, double>;
using supported_types = cudf::test::Types<float, double>;

TYPED_TEST_CASE(groupby_sum_of_squares_test, supported_types);

// clang-format off
TYPED_TEST(groupby_sum_of_squares_test, basic)
{
using K = int32_t;
using V = TypeParam;
using R = cudf::detail::target_type_t<V, aggregation::SUM_OF_SQUARES>;

fixed_width_column_wrapper<K> keys { 1, 2, 3, 1, 2, 2, 1, 3, 3, 2};
fixed_width_column_wrapper<V> vals { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9};

// { 1, 1, 1, 2, 2, 2, 2, 3, 3, 3}
fixed_width_column_wrapper<K> expect_keys { 1, 2, 3 };
// { 0, 3, 6, 1, 4, 5, 9, 2, 7, 8}
fixed_width_column_wrapper<R> expect_vals({ 45., 123., 117. }, all_valid());

auto agg = cudf::make_sum_of_squares_aggregation();
test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
}

TYPED_TEST(groupby_sum_of_squares_test, empty_cols)
{
using K = int32_t;
using V = TypeParam;
using R = cudf::detail::target_type_t<V, aggregation::SUM_OF_SQUARES>;

fixed_width_column_wrapper<K> keys { };
fixed_width_column_wrapper<V> vals { };

fixed_width_column_wrapper<K> expect_keys { };
fixed_width_column_wrapper<R> expect_vals { };

auto agg = cudf::make_sum_of_squares_aggregation();
test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
}

TYPED_TEST(groupby_sum_of_squares_test, zero_valid_keys)
{
using K = int32_t;
using V = TypeParam;
using R = cudf::detail::target_type_t<V, aggregation::SUM_OF_SQUARES>;

fixed_width_column_wrapper<K> keys ( { 1, 2, 3}, all_null() );
fixed_width_column_wrapper<V> vals { 3, 4, 5};

fixed_width_column_wrapper<K> expect_keys { };
fixed_width_column_wrapper<R> expect_vals { };

auto agg = cudf::make_sum_of_squares_aggregation();
test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
}

TYPED_TEST(groupby_sum_of_squares_test, zero_valid_values)
{
using K = int32_t;
using V = TypeParam;
using R = cudf::detail::target_type_t<V, aggregation::SUM_OF_SQUARES>;

fixed_width_column_wrapper<K> keys { 1, 1, 1};
fixed_width_column_wrapper<V> vals ( { 3, 4, 5}, all_null() );

fixed_width_column_wrapper<K> expect_keys { 1 };
fixed_width_column_wrapper<R> expect_vals({ 0 }, all_null());

auto agg = cudf::make_sum_of_squares_aggregation();
test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
}

TYPED_TEST(groupby_sum_of_squares_test, null_keys_and_values)
{
using K = int32_t;
using V = TypeParam;
using R = cudf::detail::target_type_t<V, aggregation::SUM_OF_SQUARES>;

fixed_width_column_wrapper<K> keys( { 1, 2, 3, 1, 2, 2, 1, 3, 3, 2, 4},
{ 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1});
fixed_width_column_wrapper<V> vals( { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 3},
{ 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0});

// { 1, 1, 2, 2, 2, 3, 3, 4}
fixed_width_column_wrapper<K> expect_keys({ 1, 2, 3, 4}, all_valid());
// { 3, 6, 1, 4, 9, 2, 8, 3}
fixed_width_column_wrapper<R> expect_vals({ 45., 98., 68., 9.},
{ 1, 1, 1, 0});

auto agg = cudf::make_sum_of_squares_aggregation();
test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
}
// clang-format on

// This test will not work until the following ptxas bug is fixed in 10.2
// https://nvbugswb.nvidia.com/NvBugs5/SWBug.aspx?bugid=3186317&cp=
TYPED_TEST(groupby_sum_of_squares_test, DISABLED_dictionary)
{
using K = int32_t;
using V = TypeParam;
using R = cudf::detail::target_type_t<V, aggregation::SUM_OF_SQUARES>;

// clang-format off
fixed_width_column_wrapper<K> keys{ 1, 2, 3, 1, 2, 2, 1, 3, 3, 2};
dictionary_column_wrapper<V> vals{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9};

// { 1, 1, 1, 2, 2, 2, 2, 3, 3, 3}
fixed_width_column_wrapper<K> expect_keys({ 1, 2, 3 });
// { 0, 3, 6, 1, 4, 5, 9, 2, 7, 8}
fixed_width_column_wrapper<R> expect_vals( { 45., 123., 117. }, all_valid());
// clang-format on

test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_sum_of_squares_aggregation());
}

} // namespace test
} // namespace cudf