From fc5661fd1336d9809a8d396cd651fc3561bb5f3e Mon Sep 17 00:00:00 2001 From: Alessandro Bellina Date: Thu, 16 Dec 2021 12:44:38 -0600 Subject: [PATCH] Fix fallback to sort aggregation for grouping only hash aggregate (#9891) (#9898) The following fixes what looks like an unintended fallback to sort aggregate introduced in https://github.com/rapidsai/cudf/pull/9545 for a grouping only (no aggregation request) case. In the PR, the `std::all_of` function is used to determine whether the aggregation requests would be for struct types. That said, when there are no aggregation requests the `std::all_of` function will return true, causing a fallback to the sort aggregation (relevant code: https://github.com/rapidsai/cudf/pull/9545/files#diff-e409f72ddc11ad10fa0099e21b409b92f12bfac8ba1817266696c34a620aa081R645-R650). I added a benchmark `group_no_requests_benchmark.cu` by mostly copying `group_sum_benchmark.cu` but I changed one critical part. I am re-creating the `groupby` object for each `state`: ``` for (auto _ : state) { cuda_event_timer timer(state, true); cudf::groupby::groupby gb_obj(cudf::table_view({keys}));e auto result = gb_obj.aggregate(requests); } ``` This shows what would happen in the scenario where the `groupby` instance is created each time an aggregate is issued, which would re-create the `helper` each time for the sorted case. If the `groupby` object is not recreated each time, the difference in performance between the before/after cases is negligible. We never recycle a `groupby` instance when using the groupby API from Spark. Posting this as draft for feedback as I am not sure if I handled the benchmark part correctly. This was executed on a T4 GPU. Before the patch: ``` Groupby/BasicNoRequest/10000/manual_time 0.158 ms 0.184 ms 4420 Groupby/BasicNoRequest/1000000/manual_time 1.72 ms 1.74 ms 408 Groupby/BasicNoRequest/10000000/manual_time 18.9 ms 18.9 ms 37 Groupby/BasicNoRequest/100000000/manual_time 198 ms 198 ms 3 ``` Authors: - Alessandro Bellina (https://github.com/abellina) Approvers: - Jake Hemstad (https://github.com/jrhemstad) - Nghia Truong (https://github.com/ttnghia) - Conor Hoekstra (https://github.com/codereport) URL: https://github.com/rapidsai/cudf/pull/9891 --- cpp/benchmarks/CMakeLists.txt | 8 +- .../groupby/group_no_requests_benchmark.cu | 115 ++++++++++++++++++ cpp/src/groupby/hash/groupby.cu | 27 ++-- 3 files changed, 131 insertions(+), 19 deletions(-) create mode 100644 cpp/benchmarks/groupby/group_no_requests_benchmark.cu diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 72b247ae748..34a5aebff6f 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -156,8 +156,12 @@ ConfigureBench(FILL_BENCH filling/repeat_benchmark.cpp) # ################################################################################################## # * groupby benchmark ----------------------------------------------------------------------------- ConfigureBench( - GROUPBY_BENCH groupby/group_sum_benchmark.cu groupby/group_nth_benchmark.cu - groupby/group_shift_benchmark.cu groupby/group_struct_benchmark.cu + GROUPBY_BENCH + groupby/group_sum_benchmark.cu + groupby/group_nth_benchmark.cu + groupby/group_shift_benchmark.cu + groupby/group_struct_benchmark.cu + groupby/group_no_requests_benchmark.cu ) # ################################################################################################## diff --git a/cpp/benchmarks/groupby/group_no_requests_benchmark.cu b/cpp/benchmarks/groupby/group_no_requests_benchmark.cu new file mode 100644 index 00000000000..7dbe1888cee --- /dev/null +++ b/cpp/benchmarks/groupby/group_no_requests_benchmark.cu @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +class Groupby : public cudf::benchmark { +}; + +// TODO: put it in a struct so `uniform` can be remade with different min, max +template +T random_int(T min, T max) +{ + static unsigned seed = 13377331; + static std::mt19937 engine{seed}; + static std::uniform_int_distribution uniform{min, max}; + + return uniform(engine); +} + +void BM_basic_no_requests(benchmark::State& state) +{ + using wrapper = cudf::test::fixed_width_column_wrapper; + + const cudf::size_type column_size{(cudf::size_type)state.range(0)}; + + auto data_it = cudf::detail::make_counting_transform_iterator( + 0, [=](cudf::size_type row) { return random_int(0, 100); }); + + wrapper keys(data_it, data_it + column_size); + wrapper vals(data_it, data_it + column_size); + + std::vector requests; + + for (auto _ : state) { + cuda_event_timer timer(state, true); + cudf::groupby::groupby gb_obj(cudf::table_view({keys})); + auto result = gb_obj.aggregate(requests); + } +} + +BENCHMARK_DEFINE_F(Groupby, BasicNoRequest)(::benchmark::State& state) +{ + BM_basic_no_requests(state); +} + +BENCHMARK_REGISTER_F(Groupby, BasicNoRequest) + ->UseManualTime() + ->Unit(benchmark::kMillisecond) + ->Arg(10000) + ->Arg(1000000) + ->Arg(10000000) + ->Arg(100000000); + +void BM_pre_sorted_no_requests(benchmark::State& state) +{ + using wrapper = cudf::test::fixed_width_column_wrapper; + + const cudf::size_type column_size{(cudf::size_type)state.range(0)}; + + auto data_it = cudf::detail::make_counting_transform_iterator( + 0, [=](cudf::size_type row) { return random_int(0, 100); }); + auto valid_it = cudf::detail::make_counting_transform_iterator( + 0, [=](cudf::size_type row) { return random_int(0, 100) < 90; }); + + wrapper keys(data_it, data_it + column_size); + wrapper vals(data_it, data_it + column_size, valid_it); + + auto keys_table = cudf::table_view({keys}); + auto sort_order = cudf::sorted_order(keys_table); + auto sorted_keys = cudf::gather(keys_table, *sort_order); + // No need to sort values using sort_order because they were generated randomly + + std::vector requests; + + for (auto _ : state) { + cuda_event_timer timer(state, true); + cudf::groupby::groupby gb_obj(*sorted_keys, cudf::null_policy::EXCLUDE, cudf::sorted::YES); + auto result = gb_obj.aggregate(requests); + } +} + +BENCHMARK_DEFINE_F(Groupby, PreSortedNoRequests)(::benchmark::State& state) +{ + BM_pre_sorted_no_requests(state); +} + +BENCHMARK_REGISTER_F(Groupby, PreSortedNoRequests) + ->UseManualTime() + ->Unit(benchmark::kMillisecond) + ->Arg(1000000) + ->Arg(10000000) + ->Arg(100000000); diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index e35fa36a289..950cc9727fd 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -635,23 +635,16 @@ std::unique_ptr groupby_null_templated(table_view const& keys, */ bool can_use_hash_groupby(table_view const& keys, host_span requests) { - auto const all_hash_aggregations = - std::all_of(requests.begin(), requests.end(), [](aggregation_request const& r) { - return cudf::has_atomic_support(r.values.type()) and - std::all_of(r.aggregations.begin(), r.aggregations.end(), [](auto const& a) { - return is_hash_aggregation(a->kind); - }); - }); - - // Currently, structs are not supported in any of hash-based aggregations. - // Therefore, if any request contains structs then we must fallback to sort-based aggregations. - // TODO: Support structs in hash-based aggregations. - auto const has_struct = - std::all_of(requests.begin(), requests.end(), [](aggregation_request const& r) { - return r.values.type().id() == type_id::STRUCT; - }); - - return all_hash_aggregations && !has_struct; + return std::all_of(requests.begin(), requests.end(), [](aggregation_request const& r) { + // Currently, structs are not supported in any of hash-based aggregations. + // Therefore, if any request contains structs then we must fallback to sort-based aggregations. + // TODO: Support structs in hash-based aggregations. + return not(r.values.type().id() == type_id::STRUCT) and + cudf::has_atomic_support(r.values.type()) and + std::all_of(r.aggregations.begin(), r.aggregations.end(), [](auto const& a) { + return is_hash_aggregation(a->kind); + }); + }); } // Hash-based groupby