From 846075addf3818efbe96b20810f6b9f70b9b02b3 Mon Sep 17 00:00:00 2001 From: davidwendt Date: Thu, 4 Mar 2021 15:52:11 -0500 Subject: [PATCH 1/3] Reduce compile time/size for scan.cu --- cpp/benchmarks/CMakeLists.txt | 1 + cpp/benchmarks/reduction/scan_benchmark.cpp | 63 +++++++++++++++++ cpp/src/reductions/scan.cu | 75 ++++++++++++--------- 3 files changed, 106 insertions(+), 33 deletions(-) create mode 100644 cpp/benchmarks/reduction/scan_benchmark.cpp diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 5db32987624..52da1c0e669 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -115,6 +115,7 @@ ConfigureBench(REDUCTION_BENCH reduction/anyall_benchmark.cpp reduction/dictionary_benchmark.cpp reduction/reduce_benchmark.cpp + reduction/scan_benchmark.cpp reduction/minmax_benchmark.cpp) ################################################################################################### diff --git a/cpp/benchmarks/reduction/scan_benchmark.cpp b/cpp/benchmarks/reduction/scan_benchmark.cpp new file mode 100644 index 00000000000..b2d8fcfc004 --- /dev/null +++ b/cpp/benchmarks/reduction/scan_benchmark.cpp @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +class ReductionScan : public cudf::benchmark { +}; + +template +static void BM_reduction_scan(benchmark::State& state, bool include_nulls) +{ + cudf::size_type const n_rows{(cudf::size_type)state.range(0)}; + auto const dtype = cudf::type_to_id(); + auto const table = create_random_table({dtype}, 1, row_count{n_rows}); + if (!include_nulls) table->get_column(0).set_null_mask(rmm::device_buffer{}, 0); + cudf::column_view input(table->view().column(0)); + + for (auto _ : state) { + cuda_event_timer timer(state, true); + auto result = cudf::scan(input, cudf::make_min_aggregation(), cudf::scan_type::INCLUSIVE); + } +} + +#define SCAN_BENCHMARK_DEFINE(name, type, nulls) \ + BENCHMARK_DEFINE_F(ReductionScan, name) \ + (::benchmark::State & state) { BM_reduction_scan(state, nulls); } \ + BENCHMARK_REGISTER_F(ReductionScan, name) \ + ->UseManualTime() \ + ->Arg(10000) /* 10k */ \ + ->Arg(100000) /* 100k */ \ + ->Arg(1000000) /* 1M */ \ + ->Arg(10000000) /* 10M */ \ + ->Arg(100000000); /* 100M */ + +SCAN_BENCHMARK_DEFINE(int8_no_nulls, int8_t, false); +SCAN_BENCHMARK_DEFINE(int32_no_nulls, int32_t, false); +SCAN_BENCHMARK_DEFINE(uint64_no_nulls, uint64_t, false); +SCAN_BENCHMARK_DEFINE(float_no_nulls, float, false); +SCAN_BENCHMARK_DEFINE(int16_nulls, int16_t, true); +SCAN_BENCHMARK_DEFINE(uint32_nulls, uint32_t, true); +SCAN_BENCHMARK_DEFINE(double_nulls, double, true); diff --git a/cpp/src/reductions/scan.cu b/cpp/src/reductions/scan.cu index f73ffb0214a..d159e742290 100644 --- a/cpp/src/reductions/scan.cu +++ b/cpp/src/reductions/scan.cu @@ -21,11 +21,10 @@ #include #include #include -#include +#include #include #include #include -#include #include #include @@ -34,6 +33,32 @@ namespace cudf { namespace detail { + +namespace { +/** + * @brief Accessor handles both nullable and non-nullable columns. + * + * @tparam Element type used for null-replacement value + */ +template +struct null_replace_accessor { + column_device_view const col; ///< column view of column in device + Element const null_replacement{}; ///< value returned when element is null + bool const has_nulls; ///< true if col has null elements + + null_replace_accessor(column_device_view const& _col, Element null_val, bool has_nulls) + : col{_col}, null_replacement{null_val}, has_nulls(has_nulls) + { + CUDF_EXPECTS(data_type(type_to_id()) == col.type(), "the data type mismatch"); + if (has_nulls) CUDF_EXPECTS(_col.nullable(), "column with nulls must have a validity bitmask"); + } + __device__ Element operator()(cudf::size_type i) const + { + return has_nulls && col.is_null_nocheck(i) ? null_replacement : col.element(i); + } +}; +} // namespace + /** * @brief Dispatcher for running Scan operation on input column * Dispatches scan operation on `Op` and creates output column @@ -73,23 +98,14 @@ struct scan_dispatcher { mutable_column_view output = output_column->mutable_view(); auto d_input = column_device_view::create(input_view, stream); - if (input_view.has_nulls()) { - auto input = make_null_replacement_iterator(*d_input, Op::template identity()); - thrust::exclusive_scan(rmm::exec_policy(stream), - input, - input + size, - output.data(), - Op::template identity(), - Op{}); - } else { - auto input = d_input->begin(); - thrust::exclusive_scan(rmm::exec_policy(stream), - input, - input + size, - output.data(), - Op::template identity(), - Op{}); - } + auto input = make_counting_transform_iterator( + 0, null_replace_accessor{*d_input, Op::template identity(), input_view.has_nulls()}); + thrust::exclusive_scan(rmm::exec_policy(stream), + input, + input + size, + output.data(), + Op::template identity(), + Op{}); CHECK_CUDA(stream.value()); return output_column; @@ -147,13 +163,9 @@ struct scan_dispatcher { auto d_input = column_device_view::create(input_view, stream); mutable_column_view output = output_column->mutable_view(); - if (input_view.has_nulls()) { - auto input = make_null_replacement_iterator(*d_input, Op::template identity()); - thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, output.data(), Op{}); - } else { - auto input = d_input->begin(); - thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, output.data(), Op{}); - } + auto const input = make_counting_transform_iterator( + 0, null_replace_accessor{*d_input, Op::template identity(), input_view.has_nulls()}); + thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, output.data(), Op{}); CHECK_CUDA(stream.value()); return output_column; @@ -171,13 +183,10 @@ struct scan_dispatcher { auto d_input = column_device_view::create(input_view, stream); - if (input_view.has_nulls()) { - auto input = make_null_replacement_iterator(*d_input, Op::template identity()); - thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, result.data(), Op{}); - } else { - auto input = d_input->begin(); - thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, result.data(), Op{}); - } + auto input = make_counting_transform_iterator( + 0, null_replace_accessor{*d_input, Op::template identity(), input_view.has_nulls()}); + thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, result.data(), Op{}); + CHECK_CUDA(stream.value()); auto output_column = From 90b433080cb1b3f77eed33408949dc7cb556569d Mon Sep 17 00:00:00 2001 From: davidwendt Date: Fri, 5 Mar 2021 08:34:05 -0500 Subject: [PATCH 2/3] null_replace_accessor column type check conflicts with type_dispatcher --- cpp/src/reductions/scan.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/reductions/scan.cu b/cpp/src/reductions/scan.cu index d159e742290..c80d6e48cb4 100644 --- a/cpp/src/reductions/scan.cu +++ b/cpp/src/reductions/scan.cu @@ -49,7 +49,8 @@ struct null_replace_accessor { null_replace_accessor(column_device_view const& _col, Element null_val, bool has_nulls) : col{_col}, null_replacement{null_val}, has_nulls(has_nulls) { - CUDF_EXPECTS(data_type(type_to_id()) == col.type(), "the data type mismatch"); + CUDF_EXPECTS(type_to_id() == device_storage_type_id(col.type().id()), + "the data type mismatch"); if (has_nulls) CUDF_EXPECTS(_col.nullable(), "column with nulls must have a validity bitmask"); } __device__ Element operator()(cudf::size_type i) const From 25c6cf1cd4c6a3b108685489c64a90f404b47828 Mon Sep 17 00:00:00 2001 From: davidwendt Date: Fri, 5 Mar 2021 13:02:56 -0500 Subject: [PATCH 3/3] merge null-replace-iterators logic --- cpp/include/cudf/detail/iterator.cuh | 48 +++++++++++++---------- cpp/src/reductions/scan.cu | 38 +++--------------- cpp/tests/iterator/value_iterator_test.cu | 4 +- cpp/tests/reductions/scan_tests.cpp | 29 +++++++++++--- 4 files changed, 60 insertions(+), 59 deletions(-) diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index 805cdc02bc6..881afa63ca5 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -71,16 +71,13 @@ inline auto make_counting_transform_iterator(cudf::size_type start, UnaryFunctio } /** - * @brief value accessor of column with null bitmask - * A unary functor returns scalar value at `id`. - * `operator() (cudf::size_type id)` computes `element` and valid flag at `id` - * This functor is only allowed for nullable columns. + * @brief Value accessor of column that may have a null bitmask. * - * the return value for element `i` will return `column[i]` - * if it is valid, or `null_replacement` if it is null. + * This unary functor returns scalar value at `id`. + * The `operator()(cudf::size_type id)` computes the `element` and valid flag at `id`. * - * @throws cudf::logic_error if the column is not nullable. - * @throws cudf::logic_error if column datatype and Element type mismatch. + * The return value for element `i` will return `column[i]` + * if it is valid, or `null_replacement` if it is null. * * @tparam Element The type of elements in the column */ @@ -88,24 +85,33 @@ template struct null_replaced_value_accessor { column_device_view const col; ///< column view of column in device Element const null_replacement{}; ///< value returned when element is null + bool const has_nulls; ///< true if col has null elements /** - * @brief constructor - * @param[in] _col column device view of cudf column + * @brief Creates an accessor for a null-replacement iterator. + * + * @throws cudf::logic_error if `col` type does not match Element type. + * @throws cudf::logic_error if `has_nulls` is true but `col` does not have a validity mask. + * + * @param[in] col column device view of cudf column * @param[in] null_replacement The value to return for null elements + * @param[in] has_nulls Must be set to true if `col` has nulls. */ - null_replaced_value_accessor(column_device_view const& _col, Element null_val) - : col{_col}, null_replacement{null_val} + null_replaced_value_accessor(column_device_view const& col, + Element null_val, + bool has_nulls = true) + : col{col}, null_replacement{null_val}, has_nulls{has_nulls} { - CUDF_EXPECTS(data_type(type_to_id()) == col.type(), "the data type mismatch"); - // verify valid is non-null, otherwise, is_valid_nocheck() will crash - CUDF_EXPECTS(_col.nullable(), "Unexpected non-nullable column."); + CUDF_EXPECTS(type_to_id() == device_storage_type_id(col.type().id()), + "the data type mismatch"); + // verify validity bitmask is non-null, otherwise, is_null_nocheck() will crash + if (has_nulls) CUDF_EXPECTS(col.nullable(), "column with nulls must have a validity bitmask"); } CUDA_DEVICE_CALLABLE Element operator()(cudf::size_type i) const { - return col.is_valid_nocheck(i) ? col.element(i) : null_replacement; + return has_nulls && col.is_null_nocheck(i) ? null_replacement : col.element(i); } }; @@ -140,7 +146,7 @@ struct validity_accessor { * * Dereferencing the returned iterator for element `i` will return `column[i]` * if it is valid, or `null_replacement` if it is null. - * This iterator is only allowed for nullable columns. + * This iterator is only allowed for both nullable and non-nullable columns. * * @throws cudf::logic_error if the column is not nullable. * @throws cudf::logic_error if column datatype and Element type mismatch. @@ -148,15 +154,17 @@ struct validity_accessor { * @tparam Element The type of elements in the column * @param column The column to iterate * @param null_replacement The value to return for null elements - * @return auto Iterator that returns valid column elements, or a null + * @param has_nulls Must be set to true if `column` has nulls. + * @return Iterator that returns valid column elements, or a null * replacement value for null elements. */ template auto make_null_replacement_iterator(column_device_view const& column, - Element const null_replacement = Element{0}) + Element const null_replacement = Element{0}, + bool has_nulls = true) { return make_counting_transform_iterator( - 0, null_replaced_value_accessor{column, null_replacement}); + 0, null_replaced_value_accessor{column, null_replacement, has_nulls}); } /** diff --git a/cpp/src/reductions/scan.cu b/cpp/src/reductions/scan.cu index c80d6e48cb4..c3aadf47794 100644 --- a/cpp/src/reductions/scan.cu +++ b/cpp/src/reductions/scan.cu @@ -34,32 +34,6 @@ namespace cudf { namespace detail { -namespace { -/** - * @brief Accessor handles both nullable and non-nullable columns. - * - * @tparam Element type used for null-replacement value - */ -template -struct null_replace_accessor { - column_device_view const col; ///< column view of column in device - Element const null_replacement{}; ///< value returned when element is null - bool const has_nulls; ///< true if col has null elements - - null_replace_accessor(column_device_view const& _col, Element null_val, bool has_nulls) - : col{_col}, null_replacement{null_val}, has_nulls(has_nulls) - { - CUDF_EXPECTS(type_to_id() == device_storage_type_id(col.type().id()), - "the data type mismatch"); - if (has_nulls) CUDF_EXPECTS(_col.nullable(), "column with nulls must have a validity bitmask"); - } - __device__ Element operator()(cudf::size_type i) const - { - return has_nulls && col.is_null_nocheck(i) ? null_replacement : col.element(i); - } -}; -} // namespace - /** * @brief Dispatcher for running Scan operation on input column * Dispatches scan operation on `Op` and creates output column @@ -99,8 +73,8 @@ struct scan_dispatcher { mutable_column_view output = output_column->mutable_view(); auto d_input = column_device_view::create(input_view, stream); - auto input = make_counting_transform_iterator( - 0, null_replace_accessor{*d_input, Op::template identity(), input_view.has_nulls()}); + auto input = + make_null_replacement_iterator(*d_input, Op::template identity(), input_view.has_nulls()); thrust::exclusive_scan(rmm::exec_policy(stream), input, input + size, @@ -164,8 +138,8 @@ struct scan_dispatcher { auto d_input = column_device_view::create(input_view, stream); mutable_column_view output = output_column->mutable_view(); - auto const input = make_counting_transform_iterator( - 0, null_replace_accessor{*d_input, Op::template identity(), input_view.has_nulls()}); + auto const input = + make_null_replacement_iterator(*d_input, Op::template identity(), input_view.has_nulls()); thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, output.data(), Op{}); CHECK_CUDA(stream.value()); @@ -184,8 +158,8 @@ struct scan_dispatcher { auto d_input = column_device_view::create(input_view, stream); - auto input = make_counting_transform_iterator( - 0, null_replace_accessor{*d_input, Op::template identity(), input_view.has_nulls()}); + auto input = + make_null_replacement_iterator(*d_input, Op::template identity(), input_view.has_nulls()); thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, result.data(), Op{}); CHECK_CUDA(stream.value()); diff --git a/cpp/tests/iterator/value_iterator_test.cu b/cpp/tests/iterator/value_iterator_test.cu index 3ad7ac6d0cd..542123ffd25 100644 --- a/cpp/tests/iterator/value_iterator_test.cu +++ b/cpp/tests/iterator/value_iterator_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -343,7 +343,7 @@ TYPED_TEST(IteratorTest, error_handling) CUDF_EXPECT_THROW_MESSAGE((cudf::detail::make_null_replacement_iterator( *d_col_no_null, cudf::test::make_type_param_scalar(0))), - "Unexpected non-nullable column."); + "column with nulls must have a validity bitmask"); CUDF_EXPECT_THROW_MESSAGE((d_col_no_null->pair_begin()), "Unexpected non-nullable column."); diff --git a/cpp/tests/reductions/scan_tests.cpp b/cpp/tests/reductions/scan_tests.cpp index 549e5e0d215..8372b3977c0 100644 --- a/cpp/tests/reductions/scan_tests.cpp +++ b/cpp/tests/reductions/scan_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -509,8 +509,13 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointScanSum) auto const column = fp_wrapper{{1, 2, 3, 4}, scale}; auto const expected = fp_wrapper{{1, 3, 6, 10}, scale}; auto const result = cudf::scan(column, cudf::make_sum_aggregation(), scan_type::INCLUSIVE); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected); + + auto const with_nulls = fp_wrapper({1, 2, 3, 0, 4, 0}, {1, 1, 1, 0, 1, 0}, scale); + auto const expected_nulls = fp_wrapper({1, 3, 6, 0, 10, 0}, {1, 1, 1, 0, 1, 0}, scale); + auto const result_nulls = + cudf::scan(with_nulls, cudf::make_sum_aggregation(), scan_type::INCLUSIVE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result_nulls->view(), expected_nulls); } } @@ -526,8 +531,13 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointPreScanSum) auto const column = fp_wrapper{{1, 2, 3, 4}, scale}; auto const expected = fp_wrapper{{0, 1, 3, 6}, scale}; auto const result = cudf::scan(column, cudf::make_sum_aggregation(), scan_type::EXCLUSIVE); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected); + + auto const with_nulls = fp_wrapper({0, 1, 2, 3, 0, 4}, {0, 1, 1, 1, 0, 1}, scale); + auto const expected_nulls = fp_wrapper({0, 0, 1, 3, 0, 6}, {0, 1, 1, 1, 0, 1}, scale); + auto const result_nulls = + cudf::scan(with_nulls, cudf::make_sum_aggregation(), scan_type::EXCLUSIVE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result_nulls->view(), expected_nulls); } } @@ -556,8 +566,13 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointScanMin) auto const column = fp_wrapper{{1, 2, 3, 4}, scale}; auto const expected = fp_wrapper{{1, 1, 1, 1}, scale}; auto const result = cudf::scan(column, cudf::make_min_aggregation(), scan_type::INCLUSIVE); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected); + + auto const with_nulls = fp_wrapper({1, 0, 2, 0, 3, 4}, {1, 0, 1, 0, 1, 1}, scale); + auto const expected_nulls = fp_wrapper({1, 0, 1, 0, 1, 1}, {1, 0, 1, 0, 1, 1}, scale); + auto const result_nulls = + cudf::scan(with_nulls, cudf::make_min_aggregation(), scan_type::INCLUSIVE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result_nulls->view(), expected_nulls); } } @@ -572,7 +587,11 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointScanMax) auto const scale = scale_type{i}; auto const column = fp_wrapper{{1, 2, 3, 4}, scale}; auto const result = cudf::scan(column, cudf::make_max_aggregation(), scan_type::INCLUSIVE); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), column); + + auto const with_nulls = fp_wrapper({1, 0, 0, 2, 3, 4}, {1, 0, 0, 1, 1, 1}, scale); + auto const result_nulls = + cudf::scan(with_nulls, cudf::make_max_aggregation(), scan_type::INCLUSIVE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result_nulls->view(), with_nulls); } }