Skip to content

Commit

Permalink
Fix fallback to sort aggregation for grouping only hash aggregate (#9891
Browse files Browse the repository at this point in the history
) (#9898)

The following fixes what looks like an unintended fallback to sort aggregate introduced in #9545 for a grouping only (no aggregation request) case.

In the PR, the `std::all_of` function is used to determine whether the aggregation requests would be for struct types. That said, when there are no aggregation requests the `std::all_of` function will return true, causing a fallback to the sort aggregation (relevant code: https://github.com/rapidsai/cudf/pull/9545/files#diff-e409f72ddc11ad10fa0099e21b409b92f12bfac8ba1817266696c34a620aa081R645-R650).

I added a benchmark `group_no_requests_benchmark.cu` by mostly copying `group_sum_benchmark.cu` but I changed one critical part. I am re-creating the `groupby` object for each `state`:

```
  for (auto _ : state) {
    cuda_event_timer timer(state, true);
    cudf::groupby::groupby gb_obj(cudf::table_view({keys}));e
    auto result = gb_obj.aggregate(requests);
  }
```

This shows what would happen in the scenario where the `groupby` instance is created each time an aggregate is issued, which would re-create the `helper` each time for the sorted case.

If the `groupby` object is not recreated each time, the difference in performance between the before/after cases is negligible. We never recycle a `groupby` instance when using the groupby API from Spark.

Posting this as draft for feedback as I am not sure if I handled the benchmark part correctly.

This was executed on a T4 GPU.

Before the patch:

```
Groupby/BasicNoRequest/10000/manual_time               0.158 ms        0.184 ms         4420
Groupby/BasicNoRequest/1000000/manual_time              1.72 ms         1.74 ms          408
Groupby/BasicNoRequest/10000000/manual_time             18.9 ms         18.9 ms           37
Groupby/BasicNoRequest/100000000/manual_time             198 ms          198 ms            3
```

Authors:
  - Alessandro Bellina (https://github.com/abellina)

Approvers:
  - Jake Hemstad (https://github.com/jrhemstad)
  - Nghia Truong (https://github.com/ttnghia)
  - Conor Hoekstra (https://github.com/codereport)

URL: #9891
  • Loading branch information
abellina authored Dec 16, 2021
1 parent 0930f71 commit fc5661f
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 19 deletions.
8 changes: 6 additions & 2 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,12 @@ ConfigureBench(FILL_BENCH filling/repeat_benchmark.cpp)
# ##################################################################################################
# * groupby benchmark -----------------------------------------------------------------------------
ConfigureBench(
GROUPBY_BENCH groupby/group_sum_benchmark.cu groupby/group_nth_benchmark.cu
groupby/group_shift_benchmark.cu groupby/group_struct_benchmark.cu
GROUPBY_BENCH
groupby/group_sum_benchmark.cu
groupby/group_nth_benchmark.cu
groupby/group_shift_benchmark.cu
groupby/group_struct_benchmark.cu
groupby/group_no_requests_benchmark.cu
)

# ##################################################################################################
Expand Down
115 changes: 115 additions & 0 deletions cpp/benchmarks/groupby/group_no_requests_benchmark.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf/copying.hpp>
#include <cudf/detail/aggregation/aggregation.hpp>
#include <cudf/groupby.hpp>
#include <cudf/sorting.hpp>
#include <cudf/table/table.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <fixture/benchmark_fixture.hpp>
#include <synchronization/synchronization.hpp>

#include <memory>
#include <random>

class Groupby : public cudf::benchmark {
};

// TODO: put it in a struct so `uniform` can be remade with different min, max
template <typename T>
T random_int(T min, T max)
{
static unsigned seed = 13377331;
static std::mt19937 engine{seed};
static std::uniform_int_distribution<T> uniform{min, max};

return uniform(engine);
}

void BM_basic_no_requests(benchmark::State& state)
{
using wrapper = cudf::test::fixed_width_column_wrapper<int64_t>;

const cudf::size_type column_size{(cudf::size_type)state.range(0)};

auto data_it = cudf::detail::make_counting_transform_iterator(
0, [=](cudf::size_type row) { return random_int(0, 100); });

wrapper keys(data_it, data_it + column_size);
wrapper vals(data_it, data_it + column_size);

std::vector<cudf::groupby::aggregation_request> requests;

for (auto _ : state) {
cuda_event_timer timer(state, true);
cudf::groupby::groupby gb_obj(cudf::table_view({keys}));
auto result = gb_obj.aggregate(requests);
}
}

BENCHMARK_DEFINE_F(Groupby, BasicNoRequest)(::benchmark::State& state)
{
BM_basic_no_requests(state);
}

BENCHMARK_REGISTER_F(Groupby, BasicNoRequest)
->UseManualTime()
->Unit(benchmark::kMillisecond)
->Arg(10000)
->Arg(1000000)
->Arg(10000000)
->Arg(100000000);

void BM_pre_sorted_no_requests(benchmark::State& state)
{
using wrapper = cudf::test::fixed_width_column_wrapper<int64_t>;

const cudf::size_type column_size{(cudf::size_type)state.range(0)};

auto data_it = cudf::detail::make_counting_transform_iterator(
0, [=](cudf::size_type row) { return random_int(0, 100); });
auto valid_it = cudf::detail::make_counting_transform_iterator(
0, [=](cudf::size_type row) { return random_int(0, 100) < 90; });

wrapper keys(data_it, data_it + column_size);
wrapper vals(data_it, data_it + column_size, valid_it);

auto keys_table = cudf::table_view({keys});
auto sort_order = cudf::sorted_order(keys_table);
auto sorted_keys = cudf::gather(keys_table, *sort_order);
// No need to sort values using sort_order because they were generated randomly

std::vector<cudf::groupby::aggregation_request> requests;

for (auto _ : state) {
cuda_event_timer timer(state, true);
cudf::groupby::groupby gb_obj(*sorted_keys, cudf::null_policy::EXCLUDE, cudf::sorted::YES);
auto result = gb_obj.aggregate(requests);
}
}

BENCHMARK_DEFINE_F(Groupby, PreSortedNoRequests)(::benchmark::State& state)
{
BM_pre_sorted_no_requests(state);
}

BENCHMARK_REGISTER_F(Groupby, PreSortedNoRequests)
->UseManualTime()
->Unit(benchmark::kMillisecond)
->Arg(1000000)
->Arg(10000000)
->Arg(100000000);
27 changes: 10 additions & 17 deletions cpp/src/groupby/hash/groupby.cu
Original file line number Diff line number Diff line change
Expand Up @@ -635,23 +635,16 @@ std::unique_ptr<table> groupby_null_templated(table_view const& keys,
*/
bool can_use_hash_groupby(table_view const& keys, host_span<aggregation_request const> requests)
{
auto const all_hash_aggregations =
std::all_of(requests.begin(), requests.end(), [](aggregation_request const& r) {
return cudf::has_atomic_support(r.values.type()) and
std::all_of(r.aggregations.begin(), r.aggregations.end(), [](auto const& a) {
return is_hash_aggregation(a->kind);
});
});

// Currently, structs are not supported in any of hash-based aggregations.
// Therefore, if any request contains structs then we must fallback to sort-based aggregations.
// TODO: Support structs in hash-based aggregations.
auto const has_struct =
std::all_of(requests.begin(), requests.end(), [](aggregation_request const& r) {
return r.values.type().id() == type_id::STRUCT;
});

return all_hash_aggregations && !has_struct;
return std::all_of(requests.begin(), requests.end(), [](aggregation_request const& r) {
// Currently, structs are not supported in any of hash-based aggregations.
// Therefore, if any request contains structs then we must fallback to sort-based aggregations.
// TODO: Support structs in hash-based aggregations.
return not(r.values.type().id() == type_id::STRUCT) and
cudf::has_atomic_support(r.values.type()) and
std::all_of(r.aggregations.begin(), r.aggregations.end(), [](auto const& a) {
return is_hash_aggregation(a->kind);
});
});
}

// Hash-based groupby
Expand Down

0 comments on commit fc5661f

Please sign in to comment.