From cb4adb40eb6a30d55070e5ccfc9bec95873c594b Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Fri, 7 Jan 2022 15:15:05 -0500 Subject: [PATCH] Add sum scan benchmark --- cpp/benchmarks/CMakeLists.txt | 1 + .../groupby/group_scan_benchmark.cu | 119 ++++++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 cpp/benchmarks/groupby/group_scan_benchmark.cu diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 63f6857ee08..370f84fc14a 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -177,6 +177,7 @@ ConfigureBench( groupby/group_shift_benchmark.cu groupby/group_struct_benchmark.cu groupby/group_no_requests_benchmark.cu + groupby/group_scan_benchmark.cu ) # ################################################################################################## diff --git a/cpp/benchmarks/groupby/group_scan_benchmark.cu b/cpp/benchmarks/groupby/group_scan_benchmark.cu new file mode 100644 index 00000000000..b22b8ea3f14 --- /dev/null +++ b/cpp/benchmarks/groupby/group_scan_benchmark.cu @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +class Groupby : public cudf::benchmark { +}; + +template +T random_int(T min, T max) +{ + static unsigned seed = 13377331; + static std::mt19937 engine{seed}; + static std::uniform_int_distribution uniform{min, max}; + + return uniform(engine); +} + +void BM_basic_sum_scan(benchmark::State& state) +{ + using wrapper = cudf::test::fixed_width_column_wrapper; + + const cudf::size_type column_size{(cudf::size_type)state.range(0)}; + + auto data_it = cudf::detail::make_counting_transform_iterator( + 0, [=](cudf::size_type row) { return random_int(0, 100); }); + + wrapper keys(data_it, data_it + column_size); + wrapper vals(data_it, data_it + column_size); + + cudf::groupby::groupby gb_obj(cudf::table_view({keys, keys, keys})); + + std::vector requests; + requests.emplace_back(cudf::groupby::scan_request()); + requests[0].values = vals; + requests[0].aggregations.push_back(cudf::make_sum_aggregation()); + + for (auto _ : state) { + cuda_event_timer timer(state, true); + + auto result = gb_obj.scan(requests); + } +} + +BENCHMARK_DEFINE_F(Groupby, BasicSumScan)(::benchmark::State& state) { BM_basic_sum_scan(state); } + +BENCHMARK_REGISTER_F(Groupby, BasicSumScan) + ->UseManualTime() + ->Unit(benchmark::kMillisecond) + ->Arg(1000000) + ->Arg(10000000) + ->Arg(100000000); + +void BM_pre_sorted_sum_scan(benchmark::State& state) +{ + using wrapper = cudf::test::fixed_width_column_wrapper; + + const cudf::size_type column_size{(cudf::size_type)state.range(0)}; + + auto data_it = cudf::detail::make_counting_transform_iterator( + 0, [=](cudf::size_type row) { return random_int(0, 100); }); + auto valid_it = cudf::detail::make_counting_transform_iterator( + 0, [=](cudf::size_type row) { return random_int(0, 100) < 90; }); + + wrapper keys(data_it, data_it + column_size); + wrapper vals(data_it, data_it + column_size, valid_it); + + auto keys_table = cudf::table_view({keys}); + auto sort_order = cudf::sorted_order(keys_table); + auto sorted_keys = cudf::gather(keys_table, *sort_order); + // No need to sort values using sort_order because they were generated randomly + + cudf::groupby::groupby gb_obj(*sorted_keys, cudf::null_policy::EXCLUDE, cudf::sorted::YES); + + std::vector requests; + requests.emplace_back(cudf::groupby::scan_request()); + requests[0].values = vals; + requests[0].aggregations.push_back(cudf::make_sum_aggregation()); + + for (auto _ : state) { + cuda_event_timer timer(state, true); + + auto result = gb_obj.scan(requests); + } +} + +BENCHMARK_DEFINE_F(Groupby, PreSortedSumScan)(::benchmark::State& state) +{ + BM_pre_sorted_sum_scan(state); +} + +BENCHMARK_REGISTER_F(Groupby, PreSortedSumScan) + ->UseManualTime() + ->Unit(benchmark::kMillisecond) + ->Arg(1000000) + ->Arg(10000000) + ->Arg(100000000);