diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu index c2901dc61ee..ba8c58574b9 100644 --- a/cpp/benchmarks/common/generate_input.cu +++ b/cpp/benchmarks/common/generate_input.cu @@ -442,7 +442,7 @@ std::unique_ptr create_random_column(data_profile const& profile, num_rows, data.release(), profile.get_null_probability().has_value() ? std::move(result_bitmask) : rmm::device_buffer{}, - profile.get_null_probability().has_value() ? null_count : 0); + null_count); } struct valid_or_zero { diff --git a/cpp/include/cudf/column/column.hpp b/cpp/include/cudf/column/column.hpp index a38186458c4..ae448c56447 100644 --- a/cpp/include/cudf/column/column.hpp +++ b/cpp/include/cudf/column/column.hpp @@ -164,7 +164,7 @@ class column { * * @return The number of null elements */ - [[nodiscard]] size_type null_count() const { return _null_count; } + [[nodiscard]] size_type null_count() const; /** * @brief Sets the column's null value indicator bitmask to `new_null_mask`. diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp index d80c720a255..f8ff1028a4f 100644 --- a/cpp/include/cudf/column/column_view.hpp +++ b/cpp/include/cudf/column/column_view.hpp @@ -160,9 +160,14 @@ class column_view_base { /** * @brief Returns the count of null elements * + * @note If the column was constructed with `UNKNOWN_NULL_COUNT`, or if at any + * point `set_null_count(UNKNOWN_NULL_COUNT)` was invoked, then the + * first invocation of `null_count()` will compute and store the count of null + * elements indicated by the `null_mask` (if it exists). + * * @return The count of null elements */ - [[nodiscard]] size_type null_count() const { return _null_count; } + [[nodiscard]] size_type null_count() const; /** * @brief Returns the count of null elements in the range [begin, end) @@ -258,6 +263,10 @@ class column_view_base { * * If `null_count()` is zero, `null_mask` is optional. * + * If the null count of the `null_mask` is not specified, it defaults to + * `UNKNOWN_NULL_COUNT`. The first invocation of `null_count()` will then + * compute the null count if `null_mask` exists. + * * If `type` is `EMPTY`, the specified `null_count` will be ignored and * `null_count()` will always return the same value as `size()` * @@ -348,6 +357,10 @@ class column_view : public detail::column_view_base { * * If `null_count()` is zero, `null_mask` is optional. * + * If the null count of the `null_mask` is not specified, it defaults to + * `UNKNOWN_NULL_COUNT`. The first invocation of `null_count()` will then + * compute the null count if `null_mask` exists. + * * If `type` is `EMPTY`, the specified `null_count` will be ignored and * `null_count()` will always return the same value as `size()` * @@ -497,8 +510,12 @@ class mutable_column_view : public detail::column_view_base { /** * @brief Construct a `mutable_column_view` from pointers to device memory for - * the elements and bitmask of the column. + *the elements and bitmask of the column. + * If the null count of the `null_mask` is not specified, it defaults to + * `UNKNOWN_NULL_COUNT`. The first invocation of `null_count()` will then + * compute the null count. + * * If `type` is `EMPTY`, the specified `null_count` will be ignored and * `null_count()` will always return the same value as `size()` * diff --git a/cpp/include/cudf/detail/copy_range.cuh b/cpp/include/cudf/detail/copy_range.cuh index d8773602252..0d5aa509e08 100644 --- a/cpp/include/cudf/detail/copy_range.cuh +++ b/cpp/include/cudf/detail/copy_range.cuh @@ -154,6 +154,11 @@ void copy_range(SourceValueIterator source_value_begin, auto grid = cudf::detail::grid_1d{num_items, block_size, 1}; if (target.nullable()) { + // TODO: if null_count is UNKNOWN_NULL_COUNT, no need to update null + // count (if null_count is UNKNOWN_NULL_COUNT, invoking null_count() + // will scan the entire bitmask array, and this can be surprising + // in performance if the copy range is small and the column size is + // large). rmm::device_scalar null_count(target.null_count(), stream); auto kernel = diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp index cb35a00909f..3bc1f9d6da7 100644 --- a/cpp/include/cudf/types.hpp +++ b/cpp/include/cudf/types.hpp @@ -97,6 +97,14 @@ size_type distance(T f, T l) return static_cast(std::distance(f, l)); } +/** + * @brief Indicates an unknown null count. + * + * Use this value when constructing any column-like object to indicate that + * the null count should be computed on the first invocation of `null_count()`. + */ +static constexpr size_type UNKNOWN_NULL_COUNT{-1}; + /** * @brief Indicates the order in which elements should be sorted. */ diff --git a/cpp/src/column/column.cu b/cpp/src/column/column.cu index 1508ded93e8..f0b5719e5b4 100644 --- a/cpp/src/column/column.cu +++ b/cpp/src/column/column.cu @@ -117,15 +117,38 @@ mutable_column_view column::mutable_view() child_views.emplace_back(*c); } + // Store the old null count before resetting it. By accessing the value + // directly instead of calling `this->null_count()`, we can avoid a potential + // invocation of `cudf::detail::null_count()`. This does however mean that + // calling `this->null_count()` on the resulting mutable view could still + // potentially invoke `cudf::detail::null_count()`. + auto current_null_count = _null_count; + + // The elements of a column could be changed through a `mutable_column_view`, therefore the + // existing `null_count` is no longer valid. Reset it to `UNKNOWN_NULL_COUNT` forcing it to be + // recomputed on the next invocation of `this->null_count()`. + set_null_count(cudf::UNKNOWN_NULL_COUNT); + return mutable_column_view{type(), size(), _data.data(), static_cast(_null_mask.data()), - _null_count, + current_null_count, 0, child_views}; } +// If the null count is known, return it. Else, compute and return it +size_type column::null_count() const +{ + CUDF_FUNC_RANGE(); + if (_null_count <= cudf::UNKNOWN_NULL_COUNT) { + _null_count = cudf::detail::null_count( + static_cast(_null_mask.data()), 0, size(), cudf::get_default_stream()); + } + return _null_count; +} + void column::set_null_mask(rmm::device_buffer&& new_null_mask, size_type new_null_count) { if (new_null_count > 0) { diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp index 6ffe7c4fcbe..3e18b9734f6 100644 --- a/cpp/src/column/column_view.cpp +++ b/cpp/src/column/column_view.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -63,6 +63,16 @@ column_view_base::column_view_base(data_type type, } } +// If null count is known, returns it. Else, compute and return it +size_type column_view_base::null_count() const +{ + if (_null_count <= cudf::UNKNOWN_NULL_COUNT) { + _null_count = cudf::detail::null_count( + null_mask(), offset(), offset() + size(), cudf::get_default_stream()); + } + return _null_count; +} + size_type column_view_base::null_count(size_type begin, size_type end) const { CUDF_EXPECTS((begin >= 0) && (end <= size()) && (begin <= end), "Range is out of bounds."); diff --git a/cpp/src/groupby/sort/group_std.cu b/cpp/src/groupby/sort/group_std.cu index 30b6f67dffe..8cd2d8baf4e 100644 --- a/cpp/src/groupby/sort/group_std.cu +++ b/cpp/src/groupby/sort/group_std.cu @@ -26,7 +26,6 @@ #include #include -#include #include #include @@ -132,31 +131,18 @@ struct var_functor { } // set nulls - auto result_view = mutable_column_device_view::create(*result, stream); - auto null_count = rmm::device_scalar(0, stream, mr); - auto d_null_count = null_count.data(); - thrust::for_each_n( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - group_sizes.size(), - [d_result = *result_view, d_group_sizes, ddof, d_null_count] __device__(size_type i) { - size_type group_size = d_group_sizes[i]; - if (group_size == 0 or group_size - ddof <= 0) { - d_result.set_null(i); - // Assuming that typical data does not have too many nulls this - // atomic shouldn't serialize the code too much. The alternatives - // would be 1) writing a more complex kernel using cub/shmem to - // increase parallelism, or 2) calling `cudf::count_nulls` after the - // fact. (1) is more work than it's worth without benchmarking, and - // this approach should outperform (2) unless large amounts of the - // data is null. - atomicAdd(d_null_count, 1); - } else { - d_result.set_valid(i); - } - }); - - result->set_null_count(null_count.value(stream)); + auto result_view = mutable_column_device_view::create(*result, stream); + thrust::for_each_n(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + group_sizes.size(), + [d_result = *result_view, d_group_sizes, ddof] __device__(size_type i) { + size_type group_size = d_group_sizes[i]; + if (group_size == 0 or group_size - ddof <= 0) + d_result.set_null(i); + else + d_result.set_valid(i); + }); + return result; } diff --git a/cpp/src/transform/compute_column.cu b/cpp/src/transform/compute_column.cu index 61293d51ba2..8fa5e75664f 100644 --- a/cpp/src/transform/compute_column.cu +++ b/cpp/src/transform/compute_column.cu @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -129,8 +128,6 @@ std::unique_ptr compute_column(table_view const& table, *table_device, device_expression_data, *mutable_output_device); } CUDF_CHECK_CUDA(stream.value()); - output_column->set_null_count( - cudf::detail::null_count(mutable_output_device->null_mask(), 0, output_column->size(), stream)); return output_column; } diff --git a/cpp/tests/copying/utility_tests.cpp b/cpp/tests/copying/utility_tests.cpp index 2f552b23b63..a9c3c5311f8 100644 --- a/cpp/tests/copying/utility_tests.cpp +++ b/cpp/tests/copying/utility_tests.cpp @@ -194,7 +194,15 @@ TYPED_TEST(AllocateLikeTest, ColumnNumericTestSameSize) input = make_numeric_column( cudf::data_type{cudf::type_to_id()}, size, cudf::mask_state::ALL_VALID); got = cudf::allocate_like(input->view()); - CUDF_TEST_EXPECT_COLUMN_PROPERTIES_EQUAL(*input, *got); + EXPECT_EQ(input->type(), got->type()); + EXPECT_EQ(input->size(), got->size()); + EXPECT_EQ(input->nullable(), got->nullable()); + EXPECT_EQ(input->num_children(), got->num_children()); + // CUDF_TEST_EXPECT_COLUMN_PROPERTIES_EQUAL includes checking the null-count property. + // This value will be incorrect since the null mask will contain uninitialized bits + // and the null-count set to UNKNOWN_NULL_COUNT on return from allocate_like(). + // This means any subsequent call to null_count() will try to compute the null-count + // using the uninitialized null-mask. } TYPED_TEST(AllocateLikeTest, ColumnNumericTestSpecifiedSize) @@ -213,13 +221,15 @@ TYPED_TEST(AllocateLikeTest, ColumnNumericTestSpecifiedSize) input = make_numeric_column( cudf::data_type{cudf::type_to_id()}, size, cudf::mask_state::ALL_VALID); got = cudf::allocate_like(input->view(), specified_size); - // Can't use CUDF_TEST_EXPECT_COLUMN_PROPERTIES_EQUAL because the sizes of - // the two columns are different. EXPECT_EQ(input->type(), got->type()); EXPECT_EQ(specified_size, got->size()); - EXPECT_EQ(0, got->null_count()); EXPECT_EQ(input->nullable(), got->nullable()); EXPECT_EQ(input->num_children(), got->num_children()); + // CUDF_TEST_EXPECT_COLUMN_PROPERTIES_EQUAL includes checking the null-count property. + // This value will be incorrect since the null mask will contain uninitialized bits + // and the null-count set to UNKNOWN_NULL_COUNT on return from allocate_like(). + // This means any subsequent call to null_count() will try to compute the null-count + // using the uninitialized null-mask. } CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/groupby/var_tests.cpp b/cpp/tests/groupby/var_tests.cpp index baebc45b975..739fc30af6d 100644 --- a/cpp/tests/groupby/var_tests.cpp +++ b/cpp/tests/groupby/var_tests.cpp @@ -167,33 +167,3 @@ TYPED_TEST(groupby_var_test, dictionary) expect_vals, cudf::make_variance_aggregation()); } - -// This test ensures that the same results are produced by the sort-based and -// hash-based implementations of groupby-variance. -TYPED_TEST(groupby_var_test, sort_vs_hash) -{ - using K = int32_t; - using V = double; - - cudf::test::fixed_width_column_wrapper keys{50, 30, 90, 80}; - cudf::test::fixed_width_column_wrapper vals{380.0, 370.0, 24.0, 26.0}; - - cudf::groupby::groupby gb_obj(cudf::table_view({keys})); - - auto agg1 = cudf::make_variance_aggregation(); - - std::vector requests; - requests.emplace_back(); - requests[0].values = vals; - requests[0].aggregations.push_back(std::move(agg1)); - - auto result1 = gb_obj.aggregate(requests); - - // This agg forces a sort groupby. - auto agg2 = cudf::make_quantile_aggregation({0.25}); - requests[0].aggregations.push_back(std::move(agg2)); - - auto result2 = gb_obj.aggregate(requests); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result1.second[0].results[0], *result2.second[0].results[0]); -} diff --git a/java/src/main/native/src/row_conversion.cu b/java/src/main/native/src/row_conversion.cu index a0dbfb3b38c..f1a55fd52c0 100644 --- a/java/src/main/native/src/row_conversion.cu +++ b/java/src/main/native/src/row_conversion.cu @@ -2269,9 +2269,6 @@ std::unique_ptr convert_from_rows(lists_column_view const &input, } } - for (auto &col : output_columns) { - col->set_null_count(cudf::null_count(col->view().null_mask(), 0, col->size())); - } return std::make_unique
(std::move(output_columns)); } @@ -2327,9 +2324,6 @@ std::unique_ptr
convert_from_rows_fixed_width_optimized( num_rows, num_columns, size_per_row, dev_column_start.data(), dev_column_size.data(), dev_output_data.data(), dev_output_nm.data(), child.data()); - for (auto &col : output_columns) { - col->set_null_count(cudf::null_count(col->view().null_mask(), 0, col->size())); - } return std::make_unique
(std::move(output_columns)); } else { CUDF_FAIL("Only fixed width types are currently supported");