diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 63f6857ee08..370f84fc14a 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -177,6 +177,7 @@ ConfigureBench( groupby/group_shift_benchmark.cu groupby/group_struct_benchmark.cu groupby/group_no_requests_benchmark.cu + groupby/group_scan_benchmark.cu ) # ################################################################################################## diff --git a/cpp/benchmarks/groupby/group_benchmark_common.hpp b/cpp/benchmarks/groupby/group_benchmark_common.hpp new file mode 100644 index 00000000000..fba5bc28822 --- /dev/null +++ b/cpp/benchmarks/groupby/group_benchmark_common.hpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +template +T random_int(T min, T max) +{ + static unsigned seed = 13377331; + static std::mt19937 engine{seed}; + static std::uniform_int_distribution uniform{min, max}; + + return uniform(engine); +} diff --git a/cpp/benchmarks/groupby/group_no_requests_benchmark.cu b/cpp/benchmarks/groupby/group_no_requests_benchmark.cu index 7dbe1888cee..209155862bd 100644 --- a/cpp/benchmarks/groupby/group_no_requests_benchmark.cu +++ b/cpp/benchmarks/groupby/group_no_requests_benchmark.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,32 +14,23 @@ * limitations under the License. */ +#include +#include +#include + #include #include #include #include #include + #include -#include -#include #include -#include class Groupby : public cudf::benchmark { }; -// TODO: put it in a struct so `uniform` can be remade with different min, max -template -T random_int(T min, T max) -{ - static unsigned seed = 13377331; - static std::mt19937 engine{seed}; - static std::uniform_int_distribution uniform{min, max}; - - return uniform(engine); -} - void BM_basic_no_requests(benchmark::State& state) { using wrapper = cudf::test::fixed_width_column_wrapper; diff --git a/cpp/benchmarks/groupby/group_nth_benchmark.cu b/cpp/benchmarks/groupby/group_nth_benchmark.cu index 8d1de36db95..107b3839c4c 100644 --- a/cpp/benchmarks/groupby/group_nth_benchmark.cu +++ b/cpp/benchmarks/groupby/group_nth_benchmark.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,32 +14,23 @@ * limitations under the License. */ +#include +#include +#include + #include #include #include #include #include + #include -#include -#include #include -#include class Groupby : public cudf::benchmark { }; -// TODO: put it in a struct so `uniform` can be remade with different min, max -template -T random_int(T min, T max) -{ - static unsigned seed = 13377331; - static std::mt19937 engine{seed}; - static std::uniform_int_distribution uniform{min, max}; - - return uniform(engine); -} - void BM_pre_sorted_nth(benchmark::State& state) { using wrapper = cudf::test::fixed_width_column_wrapper; diff --git a/cpp/benchmarks/groupby/group_scan_benchmark.cu b/cpp/benchmarks/groupby/group_scan_benchmark.cu new file mode 100644 index 00000000000..d9849e53498 --- /dev/null +++ b/cpp/benchmarks/groupby/group_scan_benchmark.cu @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +class Groupby : public cudf::benchmark { +}; + +void BM_basic_sum_scan(benchmark::State& state) +{ + using wrapper = cudf::test::fixed_width_column_wrapper; + + const cudf::size_type column_size{(cudf::size_type)state.range(0)}; + + auto data_it = cudf::detail::make_counting_transform_iterator( + 0, [=](cudf::size_type row) { return random_int(0, 100); }); + + wrapper keys(data_it, data_it + column_size); + wrapper vals(data_it, data_it + column_size); + + cudf::groupby::groupby gb_obj(cudf::table_view({keys, keys, keys})); + + std::vector requests; + requests.emplace_back(cudf::groupby::scan_request()); + requests[0].values = vals; + requests[0].aggregations.push_back(cudf::make_sum_aggregation()); + + for (auto _ : state) { + cuda_event_timer timer(state, true); + + auto result = gb_obj.scan(requests); + } +} + +BENCHMARK_DEFINE_F(Groupby, BasicSumScan)(::benchmark::State& state) { BM_basic_sum_scan(state); } + +BENCHMARK_REGISTER_F(Groupby, BasicSumScan) + ->UseManualTime() + ->Unit(benchmark::kMillisecond) + ->Arg(1000000) + ->Arg(10000000) + ->Arg(100000000); + +void BM_pre_sorted_sum_scan(benchmark::State& state) +{ + using wrapper = cudf::test::fixed_width_column_wrapper; + + const cudf::size_type column_size{(cudf::size_type)state.range(0)}; + + auto data_it = cudf::detail::make_counting_transform_iterator( + 0, [=](cudf::size_type row) { return random_int(0, 100); }); + auto valid_it = cudf::detail::make_counting_transform_iterator( + 0, [=](cudf::size_type row) { return random_int(0, 100) < 90; }); + + wrapper keys(data_it, data_it + column_size); + wrapper vals(data_it, data_it + column_size, valid_it); + + auto keys_table = cudf::table_view({keys}); + auto sort_order = cudf::sorted_order(keys_table); + auto sorted_keys = cudf::gather(keys_table, *sort_order); + // No need to sort values using sort_order because they were generated randomly + + cudf::groupby::groupby gb_obj(*sorted_keys, cudf::null_policy::EXCLUDE, cudf::sorted::YES); + + std::vector requests; + requests.emplace_back(cudf::groupby::scan_request()); + requests[0].values = vals; + requests[0].aggregations.push_back(cudf::make_sum_aggregation()); + + for (auto _ : state) { + cuda_event_timer timer(state, true); + + auto result = gb_obj.scan(requests); + } +} + +BENCHMARK_DEFINE_F(Groupby, PreSortedSumScan)(::benchmark::State& state) +{ + BM_pre_sorted_sum_scan(state); +} + +BENCHMARK_REGISTER_F(Groupby, PreSortedSumScan) + ->UseManualTime() + ->Unit(benchmark::kMillisecond) + ->Arg(1000000) + ->Arg(10000000) + ->Arg(100000000); diff --git a/cpp/benchmarks/groupby/group_shift_benchmark.cu b/cpp/benchmarks/groupby/group_shift_benchmark.cu index 81afcdd80e1..6b0710f4044 100644 --- a/cpp/benchmarks/groupby/group_shift_benchmark.cu +++ b/cpp/benchmarks/groupby/group_shift_benchmark.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include +#include #include #include @@ -24,24 +26,9 @@ #include -#include - -#include - class Groupby : public cudf::benchmark { }; -// TODO: put it in a struct so `uniform` can be remade with different min, max -template -T random_int(T min, T max) -{ - static unsigned seed = 13377331; - static std::mt19937 engine{seed}; - static std::uniform_int_distribution uniform{min, max}; - - return uniform(engine); -} - void BM_group_shift(benchmark::State& state) { using wrapper = cudf::test::fixed_width_column_wrapper; diff --git a/cpp/benchmarks/groupby/group_sum_benchmark.cu b/cpp/benchmarks/groupby/group_sum_benchmark.cu index 0e9f5061a1a..63f9aa02070 100644 --- a/cpp/benchmarks/groupby/group_sum_benchmark.cu +++ b/cpp/benchmarks/groupby/group_sum_benchmark.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,32 +14,23 @@ * limitations under the License. */ +#include +#include +#include + #include #include #include #include #include + #include -#include -#include #include -#include class Groupby : public cudf::benchmark { }; -// TODO: put it in a struct so `uniform` can be remade with different min, max -template -T random_int(T min, T max) -{ - static unsigned seed = 13377331; - static std::mt19937 engine{seed}; - static std::uniform_int_distribution uniform{min, max}; - - return uniform(engine); -} - void BM_basic_sum(benchmark::State& state) { using wrapper = cudf::test::fixed_width_column_wrapper; diff --git a/cpp/src/groupby/sort/functors.hpp b/cpp/src/groupby/sort/functors.hpp index cbe5f08639a..05330a7c492 100644 --- a/cpp/src/groupby/sort/functors.hpp +++ b/cpp/src/groupby/sort/functors.hpp @@ -40,12 +40,23 @@ struct store_result_functor { sort::sort_groupby_helper& helper, cudf::detail::result_cache& cache, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) - : helper(helper), cache(cache), values(values), stream(stream), mr(mr) + rmm::mr::device_memory_resource* mr, + sorted keys_are_sorted = sorted::NO) + : helper(helper), + cache(cache), + values(values), + stream(stream), + mr(mr), + keys_are_sorted(keys_are_sorted) { } protected: + /** + * @brief Check if the groupby keys are presorted + */ + bool is_presorted() const { return keys_are_sorted == sorted::YES; } + /** * @brief Get the grouped values * @@ -54,6 +65,8 @@ struct store_result_functor { */ column_view get_grouped_values() { + if (is_presorted()) { return values; } + // TODO (dm): After implementing single pass multi-agg, explore making a // cache of all grouped value columns rather than one at a time if (grouped_values) @@ -74,6 +87,7 @@ struct store_result_functor { */ column_view get_sorted_values() { + if (is_presorted()) { return values; } return sorted_values ? sorted_values->view() : (sorted_values = helper.sorted_values(values, stream))->view(); }; @@ -86,6 +100,7 @@ struct store_result_functor { rmm::cuda_stream_view stream; ///< CUDA stream on which to execute kernels rmm::mr::device_memory_resource* mr; ///< Memory resource to allocate space for results + sorted keys_are_sorted; ///< Whether the keys are sorted std::unique_ptr sorted_values; ///< Memoised grouped and sorted values std::unique_ptr grouped_values; ///< Memoised grouped values }; diff --git a/cpp/src/groupby/sort/scan.cpp b/cpp/src/groupby/sort/scan.cpp index 6ac416c1a30..402ff8c47ed 100644 --- a/cpp/src/groupby/sort/scan.cpp +++ b/cpp/src/groupby/sort/scan.cpp @@ -55,6 +55,9 @@ struct scan_result_functor final : store_result_functor { private: column_view get_grouped_values() { + // early exit if presorted + if (is_presorted()) { return values; } + // TODO (dm): After implementing single pass multi-agg, explore making a // cache of all grouped value columns rather than one at a time if (grouped_values) @@ -155,7 +158,8 @@ std::pair, std::vector> groupby::sort cudf::detail::result_cache cache(requests.size()); for (auto const& request : requests) { - auto store_functor = detail::scan_result_functor(request.values, helper(), cache, stream, mr); + auto store_functor = + detail::scan_result_functor(request.values, helper(), cache, stream, mr, _keys_are_sorted); for (auto const& aggregation : request.aggregations) { // TODO (dm): single pass compute all supported reductions cudf::detail::aggregation_dispatcher(aggregation->kind, store_functor, *aggregation); diff --git a/cpp/tests/groupby/max_scan_tests.cpp b/cpp/tests/groupby/max_scan_tests.cpp index 196aeed0430..19935dd4c91 100644 --- a/cpp/tests/groupby/max_scan_tests.cpp +++ b/cpp/tests/groupby/max_scan_tests.cpp @@ -59,6 +59,24 @@ TYPED_TEST(groupby_max_scan_test, basic) test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg)); } +TYPED_TEST(groupby_max_scan_test, pre_sorted) +{ + using value_wrapper = typename TestFixture::value_wrapper; + using result_wrapper = typename TestFixture::result_wrapper; + + // clang-format off + key_wrapper keys {1, 1, 1, 2, 2, 2, 2, 3, 3, 3}; + value_wrapper vals({5, 8, 1, 6, 9, 0, 4, 7, 2, 3}); + + key_wrapper expect_keys {1, 1, 1, 2, 2, 2, 2, 3, 3, 3}; + result_wrapper expect_vals({5, 8, 8, 6, 9, 9, 9, 7, 7, 7}); + // clang-format on + + auto agg = cudf::make_max_aggregation(); + test_single_scan( + keys, vals, expect_keys, expect_vals, std::move(agg), null_policy::EXCLUDE, sorted::YES); +} + TYPED_TEST(groupby_max_scan_test, empty_cols) { using value_wrapper = typename TestFixture::value_wrapper; diff --git a/cpp/tests/groupby/min_scan_tests.cpp b/cpp/tests/groupby/min_scan_tests.cpp index e4c018a9ce1..c672209c7b0 100644 --- a/cpp/tests/groupby/min_scan_tests.cpp +++ b/cpp/tests/groupby/min_scan_tests.cpp @@ -57,6 +57,24 @@ TYPED_TEST(groupby_min_scan_test, basic) test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg)); } +TYPED_TEST(groupby_min_scan_test, pre_sorted) +{ + using value_wrapper = typename TestFixture::value_wrapper; + using result_wrapper = typename TestFixture::result_wrapper; + + // clang-format off + key_wrapper keys {1, 1, 1, 2, 2, 2, 2, 3, 3, 3}; + value_wrapper vals({5, 8, 1, 6, 9, 0, 4, 7, 2, 3}); + + key_wrapper expect_keys {1, 1, 1, 2, 2, 2, 2, 3, 3, 3}; + result_wrapper expect_vals({5, 5, 1, 6, 6, 0, 0, 7, 2, 2}); + // clang-format on + + auto agg = cudf::make_min_aggregation(); + test_single_scan( + keys, vals, expect_keys, expect_vals, std::move(agg), null_policy::EXCLUDE, sorted::YES); +} + TYPED_TEST(groupby_min_scan_test, empty_cols) { using value_wrapper = typename TestFixture::value_wrapper; diff --git a/cpp/tests/groupby/sum_scan_tests.cpp b/cpp/tests/groupby/sum_scan_tests.cpp index 3117f8b1557..f4ac3a94d19 100644 --- a/cpp/tests/groupby/sum_scan_tests.cpp +++ b/cpp/tests/groupby/sum_scan_tests.cpp @@ -61,6 +61,24 @@ TYPED_TEST(groupby_sum_scan_test, basic) test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg)); } +TYPED_TEST(groupby_sum_scan_test, pre_sorted) +{ + using value_wrapper = typename TestFixture::value_wrapper; + using result_wrapper = typename TestFixture::result_wrapper; + + // clang-format off + key_wrapper keys {1, 1, 1, 2, 2, 2, 2, 3, 3, 3}; + value_wrapper vals{0, 3, 6, 1, 4, 5, 9, 2, 7, 8}; + + key_wrapper expect_keys {1, 1, 1, 2, 2, 2, 2, 3, 3, 3}; + result_wrapper expect_vals{0, 3, 9, 1, 5, 10, 19, 2, 9, 17}; + // clang-format on + + auto agg = cudf::make_sum_aggregation(); + test_single_scan( + keys, vals, expect_keys, expect_vals, std::move(agg), null_policy::EXCLUDE, sorted::YES); +} + TYPED_TEST(groupby_sum_scan_test, empty_cols) { using value_wrapper = typename TestFixture::value_wrapper;