Skip to content

Commit

Permalink
update sort groupby to create base type column for dictionary type
Browse files Browse the repository at this point in the history
column
  • Loading branch information
karthikeyann committed Aug 25, 2021
1 parent e6257c9 commit aea6886
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 2 deletions.
11 changes: 9 additions & 2 deletions cpp/src/groupby/sort/aggregate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <cudf/detail/gather.hpp>
#include <cudf/detail/groupby/sort_helper.hpp>
#include <cudf/detail/unary.hpp>
#include <cudf/dictionary/dictionary_column_view.hpp>
#include <cudf/groupby.hpp>
#include <cudf/lists/detail/drop_list_duplicates.hpp>
#include <cudf/table/table.hpp>
Expand Down Expand Up @@ -146,7 +147,10 @@ void aggregate_result_functor::operator()<aggregation::MIN>(aggregation const& a
if (cache.has_result(col_idx, agg)) return;

auto result = [&]() {
if (cudf::is_fixed_width(values.type())) {
auto values_type = cudf::is_dictionary(values.type())
? dictionary_column_view(values).keys().type()
: values.type();
if (cudf::is_fixed_width(values_type)) {
return detail::group_min(
get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr);
} else {
Expand Down Expand Up @@ -183,7 +187,10 @@ void aggregate_result_functor::operator()<aggregation::MAX>(aggregation const& a
if (cache.has_result(col_idx, agg)) return;

auto result = [&]() {
if (cudf::is_fixed_width(values.type())) {
auto values_type = cudf::is_dictionary(values.type())
? dictionary_column_view(values).keys().type()
: values.type();
if (cudf::is_fixed_width(values_type)) {
return detail::group_max(
get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr);
} else {
Expand Down
16 changes: 16 additions & 0 deletions cpp/tests/groupby/max_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,22 @@ TEST_F(groupby_dictionary_max_test, basic)
force_use_sort_impl::YES);
}

TEST_F(groupby_dictionary_max_test, fixed_width)
{
using V = int64_t;

// clang-format off
fixed_width_column_wrapper<K> keys{ 1, 2, 3, 1, 2, 2, 1, 3, 3, 2 };
dictionary_column_wrapper<V> vals{ 0xABC, 0xBBB, 0xF1, 0xAAA, 0xFFF, 0xBAA, 0xAAA, 0x01, 0xF1, 0xEEE};
fixed_width_column_wrapper<K> expect_keys { 1, 2, 3 };
fixed_width_column_wrapper<V> expect_vals_w({ 0xABC, 0xFFF, 0xF1 });
// clang-format on

test_single_agg(keys, vals, expect_keys, expect_vals_w, cudf::make_max_aggregation());
test_single_agg(
keys, vals, expect_keys, expect_vals_w, cudf::make_max_aggregation(), force_use_sort_impl::YES);
}

template <typename T>
struct FixedPointTestBothReps : public cudf::test::BaseFixture {
};
Expand Down
16 changes: 16 additions & 0 deletions cpp/tests/groupby/min_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,22 @@ TEST_F(groupby_dictionary_min_test, basic)
force_use_sort_impl::YES);
}

TEST_F(groupby_dictionary_min_test, fixed_width)
{
using V = int64_t;

// clang-format off
fixed_width_column_wrapper<K> keys{ 1, 2, 3, 1, 2, 2, 1, 3, 3, 2 };
dictionary_column_wrapper<V> vals{ 0xABC, 0xBBB, 0xF1, 0xAAA, 0xFFF, 0xBAA, 0xAAA, 0x01, 0xF1, 0xEEE};
fixed_width_column_wrapper<K> expect_keys { 1, 2, 3 };
fixed_width_column_wrapper<V> expect_vals_w({ 0xAAA, 0xBAA, 0x01 });
// clang-format on

test_single_agg(keys, vals, expect_keys, expect_vals_w, cudf::make_min_aggregation());
test_single_agg(
keys, vals, expect_keys, expect_vals_w, cudf::make_min_aggregation(), force_use_sort_impl::YES);
}

template <typename T>
struct FixedPointTestBothReps : public cudf::test::BaseFixture {
};
Expand Down

0 comments on commit aea6886

Please sign in to comment.