From 56150d9468862bce6c02be290f87d8d992986ceb Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 23 May 2023 17:50:39 -0700 Subject: [PATCH] Remove UNKNOWN_NULL_COUNT (#13372) This is the final PR for removing `UNKNOWN_NULL_COUNT` and the implicit kernel launch in the `null_count` methods of `column` and `column_view`. Depends on #13355 and #13341. Closes #11968 Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - MithunR (https://github.com/mythrocks) - Nghia Truong (https://github.com/ttnghia) - Karthikeyan (https://github.com/karthikeyann) URL: https://github.com/rapidsai/cudf/pull/13372 --- cpp/benchmarks/common/generate_input.cu | 2 +- cpp/include/cudf/column/column.hpp | 2 +- cpp/include/cudf/column/column_view.hpp | 21 ++---------- cpp/include/cudf/detail/copy_range.cuh | 5 --- cpp/include/cudf/types.hpp | 8 ----- cpp/src/column/column.cu | 25 +------------- cpp/src/column/column_view.cpp | 12 +------ cpp/src/groupby/sort/group_std.cu | 38 +++++++++++++++------- cpp/src/transform/compute_column.cu | 3 ++ cpp/tests/copying/utility_tests.cpp | 18 +++------- cpp/tests/groupby/var_tests.cpp | 30 +++++++++++++++++ java/src/main/native/src/row_conversion.cu | 6 ++++ 12 files changed, 75 insertions(+), 95 deletions(-) diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu index ba8c58574b9..c2901dc61ee 100644 --- a/cpp/benchmarks/common/generate_input.cu +++ b/cpp/benchmarks/common/generate_input.cu @@ -442,7 +442,7 @@ std::unique_ptr create_random_column(data_profile const& profile, num_rows, data.release(), profile.get_null_probability().has_value() ? std::move(result_bitmask) : rmm::device_buffer{}, - null_count); + profile.get_null_probability().has_value() ? null_count : 0); } struct valid_or_zero { diff --git a/cpp/include/cudf/column/column.hpp b/cpp/include/cudf/column/column.hpp index 8356d8144f2..a28bf82962b 100644 --- a/cpp/include/cudf/column/column.hpp +++ b/cpp/include/cudf/column/column.hpp @@ -163,7 +163,7 @@ class column { * * @return The number of null elements */ - [[nodiscard]] size_type null_count() const; + [[nodiscard]] size_type null_count() const { return _null_count; } /** * @brief Sets the column's null value indicator bitmask to `new_null_mask`. diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp index d90c956d053..703131053f9 100644 --- a/cpp/include/cudf/column/column_view.hpp +++ b/cpp/include/cudf/column/column_view.hpp @@ -160,14 +160,9 @@ class column_view_base { /** * @brief Returns the count of null elements * - * @note If the column was constructed with `UNKNOWN_NULL_COUNT`, or if at any - * point `set_null_count(UNKNOWN_NULL_COUNT)` was invoked, then the - * first invocation of `null_count()` will compute and store the count of null - * elements indicated by the `null_mask` (if it exists). - * * @return The count of null elements */ - [[nodiscard]] size_type null_count() const; + [[nodiscard]] size_type null_count() const { return _null_count; } /** * @brief Returns the count of null elements in the range [begin, end) @@ -263,10 +258,6 @@ class column_view_base { * * If `null_count()` is zero, `null_mask` is optional. * - * If the null count of the `null_mask` is not specified, it defaults to - * `UNKNOWN_NULL_COUNT`. The first invocation of `null_count()` will then - * compute the null count if `null_mask` exists. - * * If `type` is `EMPTY`, the specified `null_count` will be ignored and * `null_count()` will always return the same value as `size()` * @@ -357,10 +348,6 @@ class column_view : public detail::column_view_base { * * If `null_count()` is zero, `null_mask` is optional. * - * If the null count of the `null_mask` is not specified, it defaults to - * `UNKNOWN_NULL_COUNT`. The first invocation of `null_count()` will then - * compute the null count if `null_mask` exists. - * * If `type` is `EMPTY`, the specified `null_count` will be ignored and * `null_count()` will always return the same value as `size()` * @@ -509,12 +496,8 @@ class mutable_column_view : public detail::column_view_base { /** * @brief Construct a `mutable_column_view` from pointers to device memory for - *the elements and bitmask of the column. + * the elements and bitmask of the column. - * If the null count of the `null_mask` is not specified, it defaults to - * `UNKNOWN_NULL_COUNT`. The first invocation of `null_count()` will then - * compute the null count. - * * If `type` is `EMPTY`, the specified `null_count` will be ignored and * `null_count()` will always return the same value as `size()` * diff --git a/cpp/include/cudf/detail/copy_range.cuh b/cpp/include/cudf/detail/copy_range.cuh index 0d5aa509e08..d8773602252 100644 --- a/cpp/include/cudf/detail/copy_range.cuh +++ b/cpp/include/cudf/detail/copy_range.cuh @@ -154,11 +154,6 @@ void copy_range(SourceValueIterator source_value_begin, auto grid = cudf::detail::grid_1d{num_items, block_size, 1}; if (target.nullable()) { - // TODO: if null_count is UNKNOWN_NULL_COUNT, no need to update null - // count (if null_count is UNKNOWN_NULL_COUNT, invoking null_count() - // will scan the entire bitmask array, and this can be surprising - // in performance if the copy range is small and the column size is - // large). rmm::device_scalar null_count(target.null_count(), stream); auto kernel = diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp index 3bc1f9d6da7..cb35a00909f 100644 --- a/cpp/include/cudf/types.hpp +++ b/cpp/include/cudf/types.hpp @@ -97,14 +97,6 @@ size_type distance(T f, T l) return static_cast(std::distance(f, l)); } -/** - * @brief Indicates an unknown null count. - * - * Use this value when constructing any column-like object to indicate that - * the null count should be computed on the first invocation of `null_count()`. - */ -static constexpr size_type UNKNOWN_NULL_COUNT{-1}; - /** * @brief Indicates the order in which elements should be sorted. */ diff --git a/cpp/src/column/column.cu b/cpp/src/column/column.cu index f0b5719e5b4..1508ded93e8 100644 --- a/cpp/src/column/column.cu +++ b/cpp/src/column/column.cu @@ -117,38 +117,15 @@ mutable_column_view column::mutable_view() child_views.emplace_back(*c); } - // Store the old null count before resetting it. By accessing the value - // directly instead of calling `this->null_count()`, we can avoid a potential - // invocation of `cudf::detail::null_count()`. This does however mean that - // calling `this->null_count()` on the resulting mutable view could still - // potentially invoke `cudf::detail::null_count()`. - auto current_null_count = _null_count; - - // The elements of a column could be changed through a `mutable_column_view`, therefore the - // existing `null_count` is no longer valid. Reset it to `UNKNOWN_NULL_COUNT` forcing it to be - // recomputed on the next invocation of `this->null_count()`. - set_null_count(cudf::UNKNOWN_NULL_COUNT); - return mutable_column_view{type(), size(), _data.data(), static_cast(_null_mask.data()), - current_null_count, + _null_count, 0, child_views}; } -// If the null count is known, return it. Else, compute and return it -size_type column::null_count() const -{ - CUDF_FUNC_RANGE(); - if (_null_count <= cudf::UNKNOWN_NULL_COUNT) { - _null_count = cudf::detail::null_count( - static_cast(_null_mask.data()), 0, size(), cudf::get_default_stream()); - } - return _null_count; -} - void column::set_null_mask(rmm::device_buffer&& new_null_mask, size_type new_null_count) { if (new_null_count > 0) { diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp index 3e18b9734f6..6ffe7c4fcbe 100644 --- a/cpp/src/column/column_view.cpp +++ b/cpp/src/column/column_view.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -63,16 +63,6 @@ column_view_base::column_view_base(data_type type, } } -// If null count is known, returns it. Else, compute and return it -size_type column_view_base::null_count() const -{ - if (_null_count <= cudf::UNKNOWN_NULL_COUNT) { - _null_count = cudf::detail::null_count( - null_mask(), offset(), offset() + size(), cudf::get_default_stream()); - } - return _null_count; -} - size_type column_view_base::null_count(size_type begin, size_type end) const { CUDF_EXPECTS((begin >= 0) && (end <= size()) && (begin <= end), "Range is out of bounds."); diff --git a/cpp/src/groupby/sort/group_std.cu b/cpp/src/groupby/sort/group_std.cu index 8cd2d8baf4e..30b6f67dffe 100644 --- a/cpp/src/groupby/sort/group_std.cu +++ b/cpp/src/groupby/sort/group_std.cu @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -131,18 +132,31 @@ struct var_functor { } // set nulls - auto result_view = mutable_column_device_view::create(*result, stream); - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - group_sizes.size(), - [d_result = *result_view, d_group_sizes, ddof] __device__(size_type i) { - size_type group_size = d_group_sizes[i]; - if (group_size == 0 or group_size - ddof <= 0) - d_result.set_null(i); - else - d_result.set_valid(i); - }); - + auto result_view = mutable_column_device_view::create(*result, stream); + auto null_count = rmm::device_scalar(0, stream, mr); + auto d_null_count = null_count.data(); + thrust::for_each_n( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + group_sizes.size(), + [d_result = *result_view, d_group_sizes, ddof, d_null_count] __device__(size_type i) { + size_type group_size = d_group_sizes[i]; + if (group_size == 0 or group_size - ddof <= 0) { + d_result.set_null(i); + // Assuming that typical data does not have too many nulls this + // atomic shouldn't serialize the code too much. The alternatives + // would be 1) writing a more complex kernel using cub/shmem to + // increase parallelism, or 2) calling `cudf::count_nulls` after the + // fact. (1) is more work than it's worth without benchmarking, and + // this approach should outperform (2) unless large amounts of the + // data is null. + atomicAdd(d_null_count, 1); + } else { + d_result.set_valid(i); + } + }); + + result->set_null_count(null_count.value(stream)); return result; } diff --git a/cpp/src/transform/compute_column.cu b/cpp/src/transform/compute_column.cu index 8fa5e75664f..61293d51ba2 100644 --- a/cpp/src/transform/compute_column.cu +++ b/cpp/src/transform/compute_column.cu @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -128,6 +129,8 @@ std::unique_ptr compute_column(table_view const& table, *table_device, device_expression_data, *mutable_output_device); } CUDF_CHECK_CUDA(stream.value()); + output_column->set_null_count( + cudf::detail::null_count(mutable_output_device->null_mask(), 0, output_column->size(), stream)); return output_column; } diff --git a/cpp/tests/copying/utility_tests.cpp b/cpp/tests/copying/utility_tests.cpp index a9c3c5311f8..2f552b23b63 100644 --- a/cpp/tests/copying/utility_tests.cpp +++ b/cpp/tests/copying/utility_tests.cpp @@ -194,15 +194,7 @@ TYPED_TEST(AllocateLikeTest, ColumnNumericTestSameSize) input = make_numeric_column( cudf::data_type{cudf::type_to_id()}, size, cudf::mask_state::ALL_VALID); got = cudf::allocate_like(input->view()); - EXPECT_EQ(input->type(), got->type()); - EXPECT_EQ(input->size(), got->size()); - EXPECT_EQ(input->nullable(), got->nullable()); - EXPECT_EQ(input->num_children(), got->num_children()); - // CUDF_TEST_EXPECT_COLUMN_PROPERTIES_EQUAL includes checking the null-count property. - // This value will be incorrect since the null mask will contain uninitialized bits - // and the null-count set to UNKNOWN_NULL_COUNT on return from allocate_like(). - // This means any subsequent call to null_count() will try to compute the null-count - // using the uninitialized null-mask. + CUDF_TEST_EXPECT_COLUMN_PROPERTIES_EQUAL(*input, *got); } TYPED_TEST(AllocateLikeTest, ColumnNumericTestSpecifiedSize) @@ -221,15 +213,13 @@ TYPED_TEST(AllocateLikeTest, ColumnNumericTestSpecifiedSize) input = make_numeric_column( cudf::data_type{cudf::type_to_id()}, size, cudf::mask_state::ALL_VALID); got = cudf::allocate_like(input->view(), specified_size); + // Can't use CUDF_TEST_EXPECT_COLUMN_PROPERTIES_EQUAL because the sizes of + // the two columns are different. EXPECT_EQ(input->type(), got->type()); EXPECT_EQ(specified_size, got->size()); + EXPECT_EQ(0, got->null_count()); EXPECT_EQ(input->nullable(), got->nullable()); EXPECT_EQ(input->num_children(), got->num_children()); - // CUDF_TEST_EXPECT_COLUMN_PROPERTIES_EQUAL includes checking the null-count property. - // This value will be incorrect since the null mask will contain uninitialized bits - // and the null-count set to UNKNOWN_NULL_COUNT on return from allocate_like(). - // This means any subsequent call to null_count() will try to compute the null-count - // using the uninitialized null-mask. } CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/groupby/var_tests.cpp b/cpp/tests/groupby/var_tests.cpp index 739fc30af6d..baebc45b975 100644 --- a/cpp/tests/groupby/var_tests.cpp +++ b/cpp/tests/groupby/var_tests.cpp @@ -167,3 +167,33 @@ TYPED_TEST(groupby_var_test, dictionary) expect_vals, cudf::make_variance_aggregation()); } + +// This test ensures that the same results are produced by the sort-based and +// hash-based implementations of groupby-variance. +TYPED_TEST(groupby_var_test, sort_vs_hash) +{ + using K = int32_t; + using V = double; + + cudf::test::fixed_width_column_wrapper keys{50, 30, 90, 80}; + cudf::test::fixed_width_column_wrapper vals{380.0, 370.0, 24.0, 26.0}; + + cudf::groupby::groupby gb_obj(cudf::table_view({keys})); + + auto agg1 = cudf::make_variance_aggregation(); + + std::vector requests; + requests.emplace_back(); + requests[0].values = vals; + requests[0].aggregations.push_back(std::move(agg1)); + + auto result1 = gb_obj.aggregate(requests); + + // This agg forces a sort groupby. + auto agg2 = cudf::make_quantile_aggregation({0.25}); + requests[0].aggregations.push_back(std::move(agg2)); + + auto result2 = gb_obj.aggregate(requests); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result1.second[0].results[0], *result2.second[0].results[0]); +} diff --git a/java/src/main/native/src/row_conversion.cu b/java/src/main/native/src/row_conversion.cu index f1a55fd52c0..a0dbfb3b38c 100644 --- a/java/src/main/native/src/row_conversion.cu +++ b/java/src/main/native/src/row_conversion.cu @@ -2269,6 +2269,9 @@ std::unique_ptr convert_from_rows(lists_column_view const &input, } } + for (auto &col : output_columns) { + col->set_null_count(cudf::null_count(col->view().null_mask(), 0, col->size())); + } return std::make_unique
(std::move(output_columns)); } @@ -2324,6 +2327,9 @@ std::unique_ptr
convert_from_rows_fixed_width_optimized( num_rows, num_columns, size_per_row, dev_column_start.data(), dev_column_size.data(), dev_output_data.data(), dev_output_nm.data(), child.data()); + for (auto &col : output_columns) { + col->set_null_count(cudf::null_count(col->view().null_mask(), 0, col->size())); + } return std::make_unique
(std::move(output_columns)); } else { CUDF_FAIL("Only fixed width types are currently supported");