From 4481142196dde4f5e0f959749664206c23d8a8bc Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Fri, 14 Apr 2023 17:38:36 -0400 Subject: [PATCH] Change cudf::test::make_null_mask to also return null-count (#13081) Change the `cudf::test::make_null_mask` to return both the null-mask and the null-count. Callers can then use this null-count instead of `UNKNOWN_NULL_COUNT`. These changes include removing `UNKNOWN_NULL_COUNT` usage from the libcudf C++ test source code. One side-effect found that strings column with all nulls can technically have no children but using `UNKNOWN_NULL_COUNT` allowed the check for this to be bypassed. Therefore many utilities started to fail when `UNKNOWN_NULL_COUNT` was removed. The factory was modified to remove the check which results in an offsets column and an empty chars column as children. More code will likely need to be change when the `UNKNOWN_NULL_COUNT` is no longer used as a default parameter for factories and other column functions. No behavior is changed. Since the `cudf::test::make_null_mask` is technically a public API, this PR could be marked as a breaking change as well. Contributes to: https://github.com/rapidsai/cudf/issues/11968 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - MithunR (https://github.com/mythrocks) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/13081 --- .../detail/strings_column_factories.cuh | 1 - cpp/include/cudf_test/column_utilities.hpp | 40 +++--- cpp/include/cudf_test/column_wrapper.hpp | 107 +++++++++------- cpp/tests/bitmask/bitmask_tests.cpp | 34 ++--- cpp/tests/bitmask/valid_if_tests.cu | 9 +- cpp/tests/column/column_test.cu | 12 +- cpp/tests/column/column_view_shallow_test.cpp | 12 +- cpp/tests/column/factories_test.cpp | 6 +- cpp/tests/copying/concatenate_tests.cu | 9 +- .../copying/copy_if_else_nested_tests.cpp | 32 ++--- cpp/tests/copying/pack_tests.cpp | 10 +- .../copying/purge_nonempty_nulls_tests.cpp | 18 +-- cpp/tests/copying/scatter_list_tests.cpp | 55 ++++---- cpp/tests/copying/split_tests.cpp | 33 +++-- cpp/tests/groupby/rank_scan_tests.cpp | 5 +- cpp/tests/hashing/hash_test.cpp | 48 +++---- cpp/tests/interop/from_arrow_test.cpp | 6 +- cpp/tests/interop/to_arrow_test.cpp | 20 +-- cpp/tests/io/json_test.cpp | 10 +- cpp/tests/io/json_type_cast_test.cu | 8 +- cpp/tests/io/orc_test.cpp | 11 +- cpp/tests/io/parquet_chunked_reader_test.cpp | 6 +- cpp/tests/io/parquet_test.cpp | 29 ++--- .../lists/combine/concatenate_rows_tests.cpp | 64 ++++------ cpp/tests/lists/contains_tests.cpp | 99 ++++++++------- cpp/tests/lists/explode_tests.cpp | 32 ++--- .../apply_boolean_mask_tests.cpp | 41 +++--- .../quantiles/percentile_approx_test.cpp | 32 ++--- cpp/tests/reductions/list_rank_test.cpp | 31 ++--- cpp/tests/reductions/rank_tests.cpp | 7 +- cpp/tests/reshape/byte_cast_tests.cpp | 41 +++--- cpp/tests/rolling/collect_ops_test.cpp | 118 ++++++++++-------- cpp/tests/sort/sort_test.cpp | 5 +- .../stream_compaction/distinct_tests.cpp | 58 +++++---- cpp/tests/stream_compaction/unique_tests.cpp | 40 +++--- cpp/tests/structs/structs_column_tests.cpp | 32 +++-- cpp/tests/transform/bools_to_mask_test.cpp | 10 +- cpp/tests/transform/row_bit_count_test.cu | 8 +- .../column_utilities_tests.cpp | 65 +++++----- .../lists_column_wrapper_tests.cpp | 40 +++--- 40 files changed, 626 insertions(+), 618 deletions(-) diff --git a/cpp/include/cudf/strings/detail/strings_column_factories.cuh b/cpp/include/cudf/strings/detail/strings_column_factories.cuh index a3a5946fe55..b219b28cf9b 100644 --- a/cpp/include/cudf/strings/detail/strings_column_factories.cuh +++ b/cpp/include/cudf/strings/detail/strings_column_factories.cuh @@ -170,7 +170,6 @@ std::unique_ptr make_strings_column(CharIterator chars_begin, size_type bytes = std::distance(chars_begin, chars_end) * sizeof(char); if (strings_count == 0) return make_empty_column(type_id::STRING); - CUDF_EXPECTS(null_count < strings_count, "null strings column not yet supported"); CUDF_EXPECTS(bytes >= 0, "invalid offsets data"); // build offsets column -- this is the number of strings + 1 diff --git a/cpp/include/cudf_test/column_utilities.hpp b/cpp/include/cudf_test/column_utilities.hpp index 4fea8f84c78..f288c30e313 100644 --- a/cpp/include/cudf_test/column_utilities.hpp +++ b/cpp/include/cudf_test/column_utilities.hpp @@ -254,26 +254,26 @@ std::pair, std::vector> to_host(column_view template <> inline std::pair, std::vector> to_host(column_view c) { - auto const scv = strings_column_view(c); - auto const h_chars = cudf::detail::make_std_vector_sync( - cudf::device_span(scv.chars().data(), scv.chars().size()), - cudf::get_default_stream()); - auto const h_offsets = cudf::detail::make_std_vector_sync( - cudf::device_span( - scv.offsets().data() + scv.offset(), scv.size() + 1), - cudf::get_default_stream()); - - // build std::string vector from chars and offsets - std::vector host_data; - host_data.reserve(c.size()); - std::transform( - std::begin(h_offsets), - std::end(h_offsets) - 1, - std::begin(h_offsets) + 1, - std::back_inserter(host_data), - [&](auto start, auto end) { return std::string(h_chars.data() + start, end - start); }); - - return {host_data, bitmask_to_host(c)}; + thrust::host_vector host_data(c.size()); + if (c.size() > c.null_count()) { + auto const scv = strings_column_view(c); + auto const h_chars = cudf::detail::make_std_vector_sync( + cudf::device_span(scv.chars().data(), scv.chars().size()), + cudf::get_default_stream()); + auto const h_offsets = cudf::detail::make_std_vector_sync( + cudf::device_span( + scv.offsets().data() + scv.offset(), scv.size() + 1), + cudf::get_default_stream()); + + // build std::string vector from chars and offsets + std::transform( + std::begin(h_offsets), + std::end(h_offsets) - 1, + std::begin(h_offsets) + 1, + host_data.begin(), + [&](auto start, auto end) { return std::string(h_chars.data() + start, end - start); }); + } + return {std::move(host_data), bitmask_to_host(c)}; } } // namespace cudf::test diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp index 6341e2e10b0..87fbfdc0dad 100644 --- a/cpp/include/cudf_test/column_wrapper.hpp +++ b/cpp/include/cudf_test/column_wrapper.hpp @@ -240,16 +240,23 @@ rmm::device_buffer make_elements(InputIterator begin, InputIterator end) * element in `[begin,end)` that evaluated to `true`. */ template -std::vector make_null_mask_vector(ValidityIterator begin, ValidityIterator end) +std::pair, cudf::size_type> make_null_mask_vector(ValidityIterator begin, + ValidityIterator end) { auto const size = cudf::distance(begin, end); auto const num_words = cudf::bitmask_allocation_size_bytes(size) / sizeof(bitmask_type); - auto null_mask = std::vector(num_words, 0); - for (auto i = 0; i < size; ++i) - if (*(begin + i)) set_bit_unsafe(null_mask.data(), i); + auto null_mask = std::vector(num_words, 0); + auto null_count = cudf::size_type{0}; + for (auto i = 0; i < size; ++i) { + if (*(begin + i)) { + set_bit_unsafe(null_mask.data(), i); + } else { + ++null_count; + } + } - return null_mask; + return {std::move(null_mask), null_count}; } /** @@ -266,12 +273,14 @@ std::vector make_null_mask_vector(ValidityIterator begin, Validity * element in `[begin,end)` that evaluated to `true`. */ template -rmm::device_buffer make_null_mask(ValidityIterator begin, ValidityIterator end) +std::pair make_null_mask(ValidityIterator begin, + ValidityIterator end) { - auto null_mask = make_null_mask_vector(begin, end); - return rmm::device_buffer{null_mask.data(), - null_mask.size() * sizeof(decltype(null_mask.front())), - cudf::get_default_stream()}; + auto [null_mask, null_count] = make_null_mask_vector(begin, end); + auto d_mask = rmm::device_buffer{null_mask.data(), + cudf::bitmask_allocation_size_bytes(cudf::distance(begin, end)), + cudf::get_default_stream()}; + return {std::move(d_mask), null_count}; } /** @@ -319,10 +328,12 @@ class fixed_width_column_wrapper : public detail::column_wrapper { fixed_width_column_wrapper() : column_wrapper{} { std::vector empty; - wrapped.reset(new cudf::column{ - cudf::data_type{cudf::type_to_id()}, - 0, - detail::make_elements(empty.begin(), empty.end())}); + wrapped.reset( + new cudf::column{cudf::data_type{cudf::type_to_id()}, + 0, + detail::make_elements(empty.begin(), empty.end()), + rmm::device_buffer{}, + 0}); } /** @@ -349,7 +360,9 @@ class fixed_width_column_wrapper : public detail::column_wrapper { auto const size = cudf::distance(begin, end); wrapped.reset(new cudf::column{cudf::data_type{cudf::type_to_id()}, size, - detail::make_elements(begin, end)}); + detail::make_elements(begin, end), + rmm::device_buffer{}, + 0}); } /** @@ -379,12 +392,13 @@ class fixed_width_column_wrapper : public detail::column_wrapper { fixed_width_column_wrapper(InputIterator begin, InputIterator end, ValidityIterator v) : column_wrapper{} { - auto const size = cudf::distance(begin, end); + auto const size = cudf::distance(begin, end); + auto [null_mask, null_count] = detail::make_null_mask(v, v + size); wrapped.reset(new cudf::column{cudf::data_type{cudf::type_to_id()}, size, detail::make_elements(begin, end), - detail::make_null_mask(v, v + size), - cudf::UNKNOWN_NULL_COUNT}); + std::move(null_mask), + null_count}); } /** @@ -547,7 +561,9 @@ class fixed_point_column_wrapper : public detail::column_wrapper { wrapped.reset(new cudf::column{ data_type, size, - rmm::device_buffer{elements.data(), size * sizeof(Rep), cudf::get_default_stream()}}); + rmm::device_buffer{elements.data(), size * sizeof(Rep), cudf::get_default_stream()}, + rmm::device_buffer{}, + 0}); } /** @@ -603,17 +619,17 @@ class fixed_point_column_wrapper : public detail::column_wrapper { { CUDF_EXPECTS(numeric::is_supported_representation_type(), "not valid representation type"); - auto const size = cudf::distance(begin, end); - auto const elements = thrust::host_vector(begin, end); - auto const id = type_to_id>(); - auto const data_type = cudf::data_type{id, static_cast(scale)}; - + auto const size = cudf::distance(begin, end); + auto const elements = thrust::host_vector(begin, end); + auto const id = type_to_id>(); + auto const data_type = cudf::data_type{id, static_cast(scale)}; + auto [null_mask, null_count] = detail::make_null_mask(v, v + size); wrapped.reset(new cudf::column{ data_type, size, rmm::device_buffer{elements.data(), size * sizeof(Rep), cudf::get_default_stream()}, - detail::make_null_mask(v, v + size), - cudf::UNKNOWN_NULL_COUNT}); + std::move(null_mask), + null_count}); } /** @@ -736,7 +752,7 @@ class strings_column_wrapper : public detail::column_wrapper { chars, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto d_offsets = cudf::detail::make_device_uvector_sync( offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); - wrapped = cudf::make_strings_column(d_chars, d_offsets); + wrapped = cudf::make_strings_column(d_chars, d_offsets, {}, 0); } /** @@ -771,16 +787,16 @@ class strings_column_wrapper : public detail::column_wrapper { strings_column_wrapper(StringsIterator begin, StringsIterator end, ValidityIterator v) : column_wrapper{} { - size_type num_strings = std::distance(begin, end); - auto [chars, offsets] = detail::make_chars_and_offsets(begin, end, v); - auto null_mask = detail::make_null_mask_vector(v, v + num_strings); - auto d_chars = cudf::detail::make_device_uvector_sync( + size_type num_strings = std::distance(begin, end); + auto [chars, offsets] = detail::make_chars_and_offsets(begin, end, v); + auto [null_mask, null_count] = detail::make_null_mask_vector(v, v + num_strings); + auto d_chars = cudf::detail::make_device_uvector_sync( chars, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto d_offsets = cudf::detail::make_device_uvector_sync( offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto d_bitmask = cudf::detail::make_device_uvector_sync( null_mask, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); - wrapped = cudf::make_strings_column(d_chars, d_offsets, d_bitmask); + wrapped = cudf::make_strings_column(d_chars, d_offsets, d_bitmask, null_count); } /** @@ -1579,14 +1595,14 @@ class lists_column_wrapper : public detail::column_wrapper { // increment depth depth = expected_depth + 1; + auto [null_mask, null_count] = [&] { + if (v.size() <= 0) return std::make_pair(rmm::device_buffer{}, cudf::size_type{0}); + return cudf::test::detail::make_null_mask(v.begin(), v.end()); + }(); + // construct the list column - wrapped = - make_lists_column(cols.size(), - std::move(offsets), - std::move(data), - v.size() <= 0 ? 0 : cudf::UNKNOWN_NULL_COUNT, - v.size() <= 0 ? rmm::device_buffer{} - : cudf::test::detail::make_null_mask(v.begin(), v.end())); + wrapped = make_lists_column( + cols.size(), std::move(offsets), std::move(data), null_count, std::move(null_mask)); } /** @@ -1668,7 +1684,7 @@ class lists_column_wrapper : public detail::column_wrapper { std::make_unique(lcv.offsets()), normalize_column(lists_column_view(col).child(), lists_column_view(expected_hierarchy).child()), - UNKNOWN_NULL_COUNT, + col.null_count(), copy_bitmask(col)); } @@ -1843,12 +1859,13 @@ class structs_column_wrapper : public detail::column_wrapper { CUDF_EXPECTS(validity.size() <= 0 || static_cast(validity.size()) == num_rows, "Validity buffer must have as many elements as rows in the struct column."); + auto [null_mask, null_count] = [&] { + if (validity.size() <= 0) return std::make_pair(rmm::device_buffer{}, cudf::size_type{0}); + return cudf::test::detail::make_null_mask(validity.begin(), validity.end()); + }(); + wrapped = cudf::make_structs_column( - num_rows, - std::move(child_columns), - validity.size() <= 0 ? 0 : cudf::UNKNOWN_NULL_COUNT, - validity.size() <= 0 ? rmm::device_buffer{} - : detail::make_null_mask(validity.begin(), validity.end())); + num_rows, std::move(child_columns), null_count, std::move(null_mask)); } template diff --git a/cpp/tests/bitmask/bitmask_tests.cpp b/cpp/tests/bitmask/bitmask_tests.cpp index 7805828ad55..2a31fb7940b 100644 --- a/cpp/tests/bitmask/bitmask_tests.cpp +++ b/cpp/tests/bitmask/bitmask_tests.cpp @@ -36,7 +36,6 @@ struct BitmaskUtilitiesTest : public cudf::test::BaseFixture { TEST_F(BitmaskUtilitiesTest, StateNullCount) { EXPECT_EQ(0, cudf::state_null_count(cudf::mask_state::UNALLOCATED, 42)); - EXPECT_EQ(cudf::UNKNOWN_NULL_COUNT, cudf::state_null_count(cudf::mask_state::UNINITIALIZED, 42)); EXPECT_EQ(42, cudf::state_null_count(cudf::mask_state::ALL_NULL, 42)); EXPECT_EQ(0, cudf::state_null_count(cudf::mask_state::ALL_VALID, 42)); } @@ -575,12 +574,13 @@ TEST_F(CopyBitmaskTest, TestZeroOffset) for (auto& m : validity_bit) { m = this->generate(); } - auto input_mask = cudf::test::detail::make_null_mask(validity_bit.begin(), validity_bit.end()); + auto input_mask = + std::get<0>(cudf::test::detail::make_null_mask(validity_bit.begin(), validity_bit.end())); int begin_bit = 0; int end_bit = 800; - auto gold_splice_mask = cudf::test::detail::make_null_mask(validity_bit.begin() + begin_bit, - validity_bit.begin() + end_bit); + auto gold_splice_mask = std::get<0>(cudf::test::detail::make_null_mask( + validity_bit.begin() + begin_bit, validity_bit.begin() + end_bit)); auto splice_mask = cudf::copy_bitmask( static_cast(input_mask.data()), begin_bit, end_bit); @@ -597,12 +597,13 @@ TEST_F(CopyBitmaskTest, TestNonZeroOffset) for (auto& m : validity_bit) { m = this->generate(); } - auto input_mask = cudf::test::detail::make_null_mask(validity_bit.begin(), validity_bit.end()); + auto input_mask = + std::get<0>(cudf::test::detail::make_null_mask(validity_bit.begin(), validity_bit.end())); int begin_bit = 321; int end_bit = 998; - auto gold_splice_mask = cudf::test::detail::make_null_mask(validity_bit.begin() + begin_bit, - validity_bit.begin() + end_bit); + auto gold_splice_mask = std::get<0>(cudf::test::detail::make_null_mask( + validity_bit.begin() + begin_bit, validity_bit.begin() + end_bit)); auto splice_mask = cudf::copy_bitmask( static_cast(input_mask.data()), begin_bit, end_bit); @@ -621,7 +622,8 @@ TEST_F(CopyBitmaskTest, TestCopyColumnViewVectorContiguous) for (auto& m : validity_bit) { m = this->generate(); } - auto gold_mask = cudf::test::detail::make_null_mask(validity_bit.begin(), validity_bit.end()); + auto gold_mask = + std::get<0>(cudf::test::detail::make_null_mask(validity_bit.begin(), validity_bit.end())); rmm::device_buffer copy_mask{gold_mask, cudf::get_default_stream()}; cudf::column original{t, @@ -661,18 +663,21 @@ TEST_F(CopyBitmaskTest, TestCopyColumnViewVectorDiscontiguous) for (auto& m : validity_bit) { m = this->generate(); } - auto gold_mask = cudf::test::detail::make_null_mask(validity_bit.begin(), validity_bit.end()); + auto gold_mask = + std::get<0>(cudf::test::detail::make_null_mask(validity_bit.begin(), validity_bit.end())); std::vector split{0, 104, 128, 152, 311, 491, 583, 734, 760, num_elements}; std::vector cols; std::vector views; for (unsigned i = 0; i < split.size() - 1; i++) { + auto [null_mask, null_count] = cudf::test::detail::make_null_mask( + validity_bit.begin() + split[i], validity_bit.begin() + split[i + 1]); cols.emplace_back( t, split[i + 1] - split[i], rmm::device_buffer{sizeof(int) * (split[i + 1] - split[i]), cudf::get_default_stream()}, - cudf::test::detail::make_null_mask(validity_bit.begin() + split[i], - validity_bit.begin() + split[i + 1])); + std::move(null_mask), + null_count); views.push_back(cols.back()); } rmm::device_buffer concatenated_bitmask = cudf::concatenate_masks(views); @@ -706,7 +711,8 @@ TEST_F(MergeBitmaskTest, TestBitmaskAnd) auto odd_indices = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; }); - auto odd = cudf::test::detail::make_null_mask(odd_indices, odd_indices + input2.num_rows()); + auto odd = + std::get<0>(cudf::test::detail::make_null_mask(odd_indices, odd_indices + input2.num_rows())); EXPECT_EQ(nullptr, result1_mask.data()); CUDF_TEST_EXPECT_EQUAL_BUFFERS( @@ -735,8 +741,8 @@ TEST_F(MergeBitmaskTest, TestBitmaskOr) auto all_but_index3 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; }); - auto null3 = - cudf::test::detail::make_null_mask(all_but_index3, all_but_index3 + input2.num_rows()); + auto null3 = std::get<0>( + cudf::test::detail::make_null_mask(all_but_index3, all_but_index3 + input2.num_rows())); EXPECT_EQ(nullptr, result1_mask.data()); CUDF_TEST_EXPECT_EQUAL_BUFFERS( diff --git a/cpp/tests/bitmask/valid_if_tests.cu b/cpp/tests/bitmask/valid_if_tests.cu index cb086cda179..30b244fd8d1 100644 --- a/cpp/tests/bitmask/valid_if_tests.cu +++ b/cpp/tests/bitmask/valid_if_tests.cu @@ -70,8 +70,9 @@ TEST_F(ValidIfTest, OddsValid) odds_valid{}, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); - CUDF_TEST_EXPECT_EQUAL_BUFFERS(expected.data(), actual.first.data(), expected.size()); + CUDF_TEST_EXPECT_EQUAL_BUFFERS(expected.first.data(), actual.first.data(), expected.first.size()); EXPECT_EQ(5000, actual.second); + EXPECT_EQ(expected.second, actual.second); } TEST_F(ValidIfTest, AllValid) @@ -83,8 +84,9 @@ TEST_F(ValidIfTest, AllValid) all_valid{}, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); - CUDF_TEST_EXPECT_EQUAL_BUFFERS(expected.data(), actual.first.data(), expected.size()); + CUDF_TEST_EXPECT_EQUAL_BUFFERS(expected.first.data(), actual.first.data(), expected.first.size()); EXPECT_EQ(0, actual.second); + EXPECT_EQ(expected.second, actual.second); } TEST_F(ValidIfTest, AllNull) @@ -96,6 +98,7 @@ TEST_F(ValidIfTest, AllNull) all_null{}, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); - CUDF_TEST_EXPECT_EQUAL_BUFFERS(expected.data(), actual.first.data(), expected.size()); + CUDF_TEST_EXPECT_EQUAL_BUFFERS(expected.first.data(), actual.first.data(), expected.first.size()); EXPECT_EQ(10000, actual.second); + EXPECT_EQ(expected.second, actual.second); } diff --git a/cpp/tests/column/column_test.cu b/cpp/tests/column/column_test.cu index ceff726d8e0..cb9c488a20e 100644 --- a/cpp/tests/column/column_test.cu +++ b/cpp/tests/column/column_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -196,8 +196,6 @@ TYPED_TEST(TypedColumnTest, ResetNullCountAllNull) this->type(), this->num_elements(), std::move(this->data), std::move(this->all_null_mask)}; EXPECT_EQ(this->num_elements(), col.null_count()); - EXPECT_NO_THROW(col.set_null_count(cudf::UNKNOWN_NULL_COUNT)); - EXPECT_EQ(this->num_elements(), col.null_count()); } TYPED_TEST(TypedColumnTest, ResetNullCountAllValid) @@ -205,8 +203,6 @@ TYPED_TEST(TypedColumnTest, ResetNullCountAllValid) cudf::column col{ this->type(), this->num_elements(), std::move(this->data), std::move(this->all_valid_mask)}; EXPECT_EQ(0, col.null_count()); - EXPECT_NO_THROW(col.set_null_count(cudf::UNKNOWN_NULL_COUNT)); - EXPECT_EQ(0, col.null_count()); } TYPED_TEST(TypedColumnTest, CopyDataNoMask) @@ -388,7 +384,7 @@ TYPED_TEST(TypedColumnTest, DeviceUvectorConstructorWithMask) TYPED_TEST(TypedColumnTest, ConstructWithChildren) { std::vector> children; - ; + children.emplace_back(std::make_unique( cudf::data_type{cudf::type_id::INT8}, 42, @@ -403,7 +399,7 @@ TYPED_TEST(TypedColumnTest, ConstructWithChildren) this->num_elements(), rmm::device_buffer{this->data, cudf::get_default_stream()}, rmm::device_buffer{this->all_valid_mask, cudf::get_default_stream()}, - cudf::UNKNOWN_NULL_COUNT, + 0, std::move(children)}; verify_column_views(col); @@ -448,7 +444,7 @@ TYPED_TEST(TypedColumnTest, ReleaseWithChildren) this->num_elements(), rmm::device_buffer{this->data, cudf::get_default_stream()}, rmm::device_buffer{this->all_valid_mask, cudf::get_default_stream()}, - cudf::UNKNOWN_NULL_COUNT, + 0, std::move(children)}; auto original_data = col.view().head(); diff --git a/cpp/tests/column/column_view_shallow_test.cpp b/cpp/tests/column/column_view_shallow_test.cpp index 8a742b50baa..d7288f926c7 100644 --- a/cpp/tests/column/column_view_shallow_test.cpp +++ b/cpp/tests/column/column_view_shallow_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -175,11 +175,8 @@ TYPED_TEST(ColumnViewShallowTests, shallow_hash_update_data) auto col_view_new = cudf::column_view{*col}; EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_view_new)); } - // set_null_count + new column_view = same hash. set_null_count(UNKNOWN_NULL_COUNT) + // set_null_count + new column_view = same hash. { - col->set_null_count(cudf::UNKNOWN_NULL_COUNT); - auto col_view_new = cudf::column_view{*col}; - EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_view_new)); col->set_null_count(col->size()); auto col_view_new2 = cudf::column_view{*col}; EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_view_new2)); @@ -344,11 +341,8 @@ TYPED_TEST(ColumnViewShallowTests, is_shallow_equivalent_update_data) auto col_view_new = cudf::column_view{*col}; EXPECT_TRUE(is_shallow_equivalent(col_view, col_view_new)); } - // set_null_count + new column_view = same hash. set_null_count(UNKNOWN_NULL_COUNT) + // set_null_count + new column_view = same hash. { - col->set_null_count(cudf::UNKNOWN_NULL_COUNT); - auto col_view_new = cudf::column_view{*col}; - EXPECT_TRUE(is_shallow_equivalent(col_view, col_view_new)); col->set_null_count(col->size()); auto col_view_new2 = cudf::column_view{*col}; EXPECT_TRUE(is_shallow_equivalent(col_view, col_view_new2)); diff --git a/cpp/tests/column/factories_test.cpp b/cpp/tests/column/factories_test.cpp index eeeb23f4b20..37732f67229 100644 --- a/cpp/tests/column/factories_test.cpp +++ b/cpp/tests/column/factories_test.cpp @@ -777,11 +777,11 @@ TEST_F(ListsZeroLengthColumnTest, SuperimposeNulls) .release(); auto offsets = offset_t{0, 3, 3, 5}.release(); - auto const valid_iter = cudf::test::iterators::null_at(2); - auto null_mask = cudf::test::detail::make_null_mask(valid_iter, valid_iter + 3); + auto const valid_iter = cudf::test::iterators::null_at(2); + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(valid_iter, valid_iter + 3); return cudf::make_lists_column( - 3, std::move(offsets), std::move(child), 1, std::move(null_mask)); + 3, std::move(offsets), std::move(child), null_count, std::move(null_mask)); }(); auto const expected_child = diff --git a/cpp/tests/copying/concatenate_tests.cu b/cpp/tests/copying/concatenate_tests.cu index ca343b963d7..dd84892a959 100644 --- a/cpp/tests/copying/concatenate_tests.cu +++ b/cpp/tests/copying/concatenate_tests.cu @@ -847,11 +847,10 @@ TEST_F(StructsColumnTest, ConcatenateStructs) expected_children.push_back(cudf::concatenate(age_col_vec)); expected_children.push_back(cudf::concatenate(is_human_col_vec)); std::vector struct_validity({1, 0, 1, 1, 1, 0}); - auto expected = make_structs_column( - 6, - std::move(expected_children), - 2, - cudf::test::detail::make_null_mask(struct_validity.begin(), struct_validity.end())); + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(struct_validity.begin(), struct_validity.end()); + auto expected = + make_structs_column(6, std::move(expected_children), null_count, std::move(null_mask)); // concatenate as structs std::vector src; diff --git a/cpp/tests/copying/copy_if_else_nested_tests.cpp b/cpp/tests/copying/copy_if_else_nested_tests.cpp index 4641d540bb4..f4dfda3e377 100644 --- a/cpp/tests/copying/copy_if_else_nested_tests.cpp +++ b/cpp/tests/copying/copy_if_else_nested_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -347,24 +347,20 @@ TYPED_TEST(TypedCopyIfElseNestedTest, ListsWithStructs) auto lhs_strings = strings{{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}, null_at_4}; auto lhs_structs = structs{{lhs_ints, lhs_strings}}.release(); auto lhs_offsets = offsets{0, 2, 4, 6, 10, 10}.release(); - auto const lhs = - cudf::make_lists_column(5, - std::move(lhs_offsets), - std::move(lhs_structs), - 1, - cudf::test::detail::make_null_mask(null_at_4, null_at_4 + 5)); + + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(null_at_4, null_at_4 + 5); + auto const lhs = cudf::make_lists_column( + 5, std::move(lhs_offsets), std::move(lhs_structs), null_count, std::move(null_mask)); auto rhs_ints = ints{{0, 11, 22, 33, 44, 55, 66, 77, 88, 99}, null_at_6}; auto rhs_strings = strings{{"00", "11", "22", "33", "44", "55", "66", "77", "88", "99"}, null_at_7}; auto rhs_structs = structs{{rhs_ints, rhs_strings}, null_at_8}; auto rhs_offsets = offsets{0, 0, 4, 6, 8, 10}; - auto const rhs = - cudf::make_lists_column(5, - rhs_offsets.release(), - rhs_structs.release(), - 1, - cudf::test::detail::make_null_mask(null_at_0, null_at_0 + 5)); + + std::tie(null_mask, null_count) = cudf::test::detail::make_null_mask(null_at_0, null_at_0 + 5); + auto const rhs = cudf::make_lists_column( + 5, rhs_offsets.release(), rhs_structs.release(), null_count, std::move(null_mask)); auto selector_column = bools{1, 0, 1, 0, 1}.release(); @@ -376,12 +372,10 @@ TYPED_TEST(TypedCopyIfElseNestedTest, ListsWithStructs) strings{{"0", "1", "00", "11", "22", "33", "", "5", "66", ""}, null_at_6_9}; auto expected_structs = structs{{expected_ints, expected_strings}}; auto expected_offsets = offsets{0, 2, 6, 8, 10, 10}; - auto const expected = - cudf::make_lists_column(5, - expected_offsets.release(), - expected_structs.release(), - 1, - cudf::test::detail::make_null_mask(null_at_4, null_at_4 + 5)); + + std::tie(null_mask, null_count) = cudf::test::detail::make_null_mask(null_at_4, null_at_4 + 5); + auto const expected = cudf::make_lists_column( + 5, expected_offsets.release(), expected_structs.release(), null_count, std::move(null_mask)); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result_column->view(), expected->view()); } diff --git a/cpp/tests/copying/pack_tests.cpp b/cpp/tests/copying/pack_tests.cpp index 44e3ca2669e..a2ef8f3a3db 100644 --- a/cpp/tests/copying/pack_tests.cpp +++ b/cpp/tests/copying/pack_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -269,12 +269,10 @@ std::vector> generate_list_of_struct() std::vector list_validity{1, 1, 1, 1, 1, 0, 1, 0, 1}; cudf::test::fixed_width_column_wrapper offsets{0, 1, 4, 5, 7, 7, 10, 13, 14, 16}; + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(list_validity.begin(), list_validity.begin() + 9); auto list = cudf::make_lists_column( - 9, - offsets.release(), - struct_column.release(), - 2, - cudf::test::detail::make_null_mask(list_validity.begin(), list_validity.begin() + 9)); + 9, offsets.release(), struct_column.release(), null_count, std::move(null_mask)); std::vector> out; out.push_back(std::move(list)); diff --git a/cpp/tests/copying/purge_nonempty_nulls_tests.cpp b/cpp/tests/copying/purge_nonempty_nulls_tests.cpp index 76a36f1489d..f0a3fe1b7a8 100644 --- a/cpp/tests/copying/purge_nonempty_nulls_tests.cpp +++ b/cpp/tests/copying/purge_nonempty_nulls_tests.cpp @@ -343,12 +343,12 @@ TEST_F(PurgeNonEmptyNullsTest, UnsanitizedListOfUnsanitizedStrings) ); // Construct a list column from the strings column. - auto const lists = - cudf::make_lists_column(4, - offsets_col_t{0, 4, 5, 7, 10}.release(), - std::move(strings), - 0, - cudf::test::detail::make_null_mask(no_nulls(), no_nulls() + 4)); + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(no_nulls(), no_nulls() + 4); + auto const lists = cudf::make_lists_column(4, + offsets_col_t{0, 4, 5, 7, 10}.release(), + std::move(strings), + null_count, + std::move(null_mask)); EXPECT_TRUE(cudf::may_have_nonempty_nulls(*lists)); // The child column has non-empty nulls but it has already been sanitized during lists column @@ -405,14 +405,14 @@ TEST_F(PurgeNonEmptyNullsTest, StructOfList) EXPECT_FALSE(cudf::has_nonempty_nulls(child)); return cudf::test::structs_column_wrapper{{child}}.release(); }(); - auto null_mask_buff = [&] { + auto [null_mask, null_count] = [&] { auto const valid_iter = null_at(2); return cudf::test::detail::make_null_mask(valid_iter, valid_iter + structs_input->size()); }(); // Manually set the null mask for the columns, leaving the null at list index 2 unsanitized. - structs_input->child(0).set_null_mask(null_mask_buff); - structs_input->set_null_mask(std::move(null_mask_buff)); + structs_input->child(0).set_null_mask(null_mask, null_count, cudf::get_default_stream()); + structs_input->set_null_mask(std::move(null_mask), null_count); EXPECT_TRUE(cudf::may_have_nonempty_nulls(*structs_input)); EXPECT_TRUE(cudf::has_nonempty_nulls(*structs_input)); diff --git a/cpp/tests/copying/scatter_list_tests.cpp b/cpp/tests/copying/scatter_list_tests.cpp index 53f3da7458b..b596166642f 100644 --- a/cpp/tests/copying/scatter_list_tests.cpp +++ b/cpp/tests/copying/scatter_list_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -172,13 +172,15 @@ TYPED_TEST(TypedScatterListsTest, NullableListsOfNullableFixedWidth) auto src_list_validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 2; }); + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(src_list_validity, src_list_validity + 3); // One null list row, and one row with nulls. auto src_list_column = cudf::make_lists_column( 3, cudf::test::fixed_width_column_wrapper{0, 4, 7, 7}.release(), src_child.release(), - 1, - cudf::test::detail::make_null_mask(src_list_validity, src_list_validity + 3)); + null_count, + std::move(null_mask)); auto target_list_column = cudf::test::lists_column_wrapper{ {0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}}; @@ -194,12 +196,14 @@ TYPED_TEST(TypedScatterListsTest, NullableListsOfNullableFixedWidth) auto expected_validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 5; }); + std::tie(null_mask, null_count) = + cudf::test::detail::make_null_mask(expected_validity, expected_validity + 7); auto expected_lists_column = cudf::make_lists_column( 7, cudf::test::fixed_width_column_wrapper{0, 3, 5, 9, 11, 13, 13, 15}.release(), expected_child_ints.release(), - 1, - cudf::test::detail::make_null_mask(expected_validity, expected_validity + 7)); + null_count, + std::move(null_mask)); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_lists_column->view(), ret->get_column(0)); } @@ -341,12 +345,13 @@ TEST_F(ScatterListsTest, NullableListsOfNullableStrings) auto src_validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; }); - auto src_list_column = cudf::make_lists_column( + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(src_validity, src_validity + 3); + auto src_list_column = cudf::make_lists_column( 3, cudf::test::fixed_width_column_wrapper{0, 5, 5, 7}.release(), src_strings_column.release(), - 1, - cudf::test::detail::make_null_mask(src_validity, src_validity + 3)); + null_count, + std::move(null_mask)); auto target_list_column = cudf::test::lists_column_wrapper{{"zero"}, {"one", "one"}, @@ -379,12 +384,14 @@ TEST_F(ScatterListsTest, NullableListsOfNullableStrings) auto expected_validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; }); + std::tie(null_mask, null_count) = + cudf::test::detail::make_null_mask(expected_validity, expected_validity + 6); auto expected_lists = cudf::make_lists_column( 6, cudf::test::fixed_width_column_wrapper{0, 2, 4, 9, 11, 11, 13}.release(), expected_strings.release(), - 1, - cudf::test::detail::make_null_mask(expected_validity, expected_validity + 6)); + null_count, + std::move(null_mask)); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_lists->view(), ret->get_column(0)); } @@ -882,13 +889,13 @@ TYPED_TEST(TypedScatterListsTest, NullListsOfStructs) auto source_list_null_mask_begin = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 2; }); - auto source_lists = - cudf::make_lists_column(3, - offsets_column{0, 4, 7, 7}.release(), - source_structs.release(), - 1, - cudf::test::detail::make_null_mask(source_list_null_mask_begin, - source_list_null_mask_begin + 3)); + auto [null_mask, null_count] = cudf::test::detail::make_null_mask( + source_list_null_mask_begin, source_list_null_mask_begin + 3); + auto source_lists = cudf::make_lists_column(3, + offsets_column{0, 4, 7, 7}.release(), + source_structs.release(), + null_count, + std::move(null_mask)); // clang-format off auto target_ints = numerics_column{ @@ -951,13 +958,13 @@ TYPED_TEST(TypedScatterListsTest, NullListsOfStructs) auto expected_lists_null_mask_begin = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; }); - auto expected_lists = - cudf::make_lists_column(6, - offsets_column{0, 3, 5, 9, 11, 11, 13}.release(), - expected_structs.release(), - 1, - cudf::test::detail::make_null_mask(expected_lists_null_mask_begin, - expected_lists_null_mask_begin + 6)); + std::tie(null_mask, null_count) = cudf::test::detail::make_null_mask( + expected_lists_null_mask_begin, expected_lists_null_mask_begin + 6); + auto expected_lists = cudf::make_lists_column(6, + offsets_column{0, 3, 5, 9, 11, 11, 13}.release(), + expected_structs.release(), + null_count, + std::move(null_mask)); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_lists->view(), scatter_result->get_column(0)); } diff --git a/cpp/tests/copying/split_tests.cpp b/cpp/tests/copying/split_tests.cpp index 6324fe2bab5..b411c10ab1d 100644 --- a/cpp/tests/copying/split_tests.cpp +++ b/cpp/tests/copying/split_tests.cpp @@ -977,15 +977,14 @@ void split_structs_no_children(SplitFunc Split, CompareFunc Compare) // all nulls { std::vector struct_validity{false, false, false, false}; - auto struct_column = cudf::make_structs_column( - 4, {}, 4, cudf::test::detail::make_null_mask(struct_validity.begin(), struct_validity.end())); + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(struct_validity.begin(), struct_validity.end()); + auto struct_column = cudf::make_structs_column(4, {}, null_count, std::move(null_mask)); std::vector expected_validity{false, false}; - auto expected = cudf::make_structs_column( - 2, - {}, - 2, - cudf::test::detail::make_null_mask(expected_validity.begin(), expected_validity.end())); + std::tie(null_mask, null_count) = + cudf::test::detail::make_null_mask(expected_validity.begin(), expected_validity.end()); + auto expected = cudf::make_structs_column(2, {}, null_count, std::move(null_mask)); // split std::vector splits{2}; @@ -1014,15 +1013,14 @@ void split_structs_no_children(SplitFunc Split, CompareFunc Compare) // all nulls, empty output column { std::vector struct_validity{false, false, false, false}; - auto struct_column = cudf::make_structs_column( - 4, {}, 4, cudf::test::detail::make_null_mask(struct_validity.begin(), struct_validity.end())); + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(struct_validity.begin(), struct_validity.end()); + auto struct_column = cudf::make_structs_column(4, {}, null_count, std::move(null_mask)); std::vector expected_validity0{false, false, false, false}; - auto expected0 = cudf::make_structs_column( - 4, - {}, - 4, - cudf::test::detail::make_null_mask(expected_validity0.begin(), expected_validity0.end())); + std::tie(null_mask, null_count) = + cudf::test::detail::make_null_mask(expected_validity0.begin(), expected_validity0.end()); + auto expected0 = cudf::make_structs_column(4, {}, null_count, std::move(null_mask)); auto expected1 = cudf::make_structs_column(0, {}, 0, rmm::device_buffer{}); @@ -1332,7 +1330,8 @@ TEST_F(ContiguousSplitUntypedTest, ValidityRepartition) }); cudf::size_type const num_rows = 2000000; auto col = cudf::sequence(num_rows, cudf::numeric_scalar{0}); - col->set_null_mask(cudf::test::detail::make_null_mask(rvalids, rvalids + num_rows)); + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(rvalids, rvalids + num_rows); + col->set_null_mask(std::move(null_mask), null_count); cudf::table_view t({*col}); auto result = cudf::contiguous_split(t, {num_rows / 2}); @@ -2034,12 +2033,12 @@ TEST_F(ContiguousSplitNestedTypesTest, ListOfStruct) cudf::test::fixed_width_column_wrapper outer_offsets_col(outer_offsets.begin(), outer_offsets.end()); std::vector outer_validity{1, 1, 1, 0, 1, 1, 0}; - auto outer_null_mask = + auto [outer_null_mask, null_count] = cudf::test::detail::make_null_mask(outer_validity.begin(), outer_validity.end()); auto outer_list = make_lists_column(static_cast(outer_validity.size()), outer_offsets_col.release(), struct_column.release(), - cudf::UNKNOWN_NULL_COUNT, + null_count, std::move(outer_null_mask)); // split diff --git a/cpp/tests/groupby/rank_scan_tests.cpp b/cpp/tests/groupby/rank_scan_tests.cpp index 1b1b4fbd371..dbf70ffbc80 100644 --- a/cpp/tests/groupby/rank_scan_tests.cpp +++ b/cpp/tests/groupby/rank_scan_tests.cpp @@ -347,8 +347,9 @@ TYPED_TEST(typed_groupby_rank_scan_test, structsWithNullPushdown) auto struct_column = cudf::test::structs_column_wrapper{nums_member, strings_member}.release(); // Reset null-mask, a posteriori. Nulls will not be pushed down to children. auto const null_iter = nulls_at({1, 2, 11}); - struct_column->set_null_mask( - cudf::test::detail::make_null_mask(null_iter, null_iter + num_rows)); + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(null_iter, null_iter + num_rows); + struct_column->set_null_mask(std::move(null_mask), null_count); return struct_column; }; diff --git a/cpp/tests/hashing/hash_test.cpp b/cpp/tests/hashing/hash_test.cpp index bb96b50c624..a8546bbe346 100644 --- a/cpp/tests/hashing/hash_test.cpp +++ b/cpp/tests/hashing/hash_test.cpp @@ -221,10 +221,10 @@ TEST_F(HashTest, ListOfStruct) 0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 8, 10, 12, 14, 15, 16, 17, 18}; auto list_nullmask = std::vector{1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - auto nullmask_buf = + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(list_nullmask.begin(), list_nullmask.end()); auto list_column = cudf::make_lists_column( - 17, offsets.release(), struct_col.release(), cudf::UNKNOWN_NULL_COUNT, std::move(nullmask_buf)); + 17, offsets.release(), struct_col.release(), null_count, std::move(null_mask)); auto expect = cudf::test::fixed_width_column_wrapper{83451479, 83451479, @@ -287,21 +287,17 @@ TEST_F(HashTest, ListOfEmptyStruct) // [{}, {}] auto struct_validity = std::vector{0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1}; - auto struct_validity_buffer = + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(struct_validity.begin(), struct_validity.end()); - auto struct_col = - cudf::make_structs_column(14, {}, cudf::UNKNOWN_NULL_COUNT, std::move(struct_validity_buffer)); + auto struct_col = cudf::make_structs_column(14, {}, null_count, std::move(null_mask)); auto offsets = cudf::test::fixed_width_column_wrapper{ 0, 0, 0, 0, 0, 2, 4, 6, 7, 8, 9, 10, 12, 14}; auto list_nullmask = std::vector{1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - auto list_validity_buffer = + std::tie(null_mask, null_count) = cudf::test::detail::make_null_mask(list_nullmask.begin(), list_nullmask.end()); - auto list_column = cudf::make_lists_column(13, - offsets.release(), - std::move(struct_col), - cudf::UNKNOWN_NULL_COUNT, - std::move(list_validity_buffer)); + auto list_column = cudf::make_lists_column( + 13, offsets.release(), std::move(struct_col), null_count, std::move(null_mask)); auto expect = cudf::test::fixed_width_column_wrapper{2271818677u, 2271818677u, @@ -334,13 +330,10 @@ TEST_F(HashTest, EmptyDeepList) auto offsets = cudf::test::fixed_width_column_wrapper{0, 0, 0, 0, 0}; auto list_nullmask = std::vector{1, 1, 0, 0}; - auto list_validity_buffer = + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(list_nullmask.begin(), list_nullmask.end()); - auto list_column = cudf::make_lists_column(4, - offsets.release(), - list1.release(), - cudf::UNKNOWN_NULL_COUNT, - std::move(list_validity_buffer)); + auto list_column = cudf::make_lists_column( + 4, offsets.release(), list1.release(), null_count, std::move(null_mask)); auto expect = cudf::test::fixed_width_column_wrapper{ 2271818677u, 2271818677u, 2271818614u, 2271818614u}; @@ -717,13 +710,11 @@ TEST_F(SparkMurmurHash3Test, ListValues) cudf::test::iterators::nulls_at({0, 14})); auto offsets = cudf::test::fixed_width_column_wrapper{0, 0, 0, 1, 2, 3, 4, 6, 8, 10, 13, 16}; - auto list_validity = cudf::test::iterators::nulls_at({0}); - auto list_validity_buffer = cudf::test::detail::make_null_mask(list_validity, list_validity + 11); - auto list_column = cudf::make_lists_column(11, - offsets.release(), - nested_list.release(), - cudf::UNKNOWN_NULL_COUNT, - std::move(list_validity_buffer)); + auto list_validity = cudf::test::iterators::nulls_at({0}); + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(list_validity, list_validity + 11); + auto list_column = cudf::make_lists_column( + 11, offsets.release(), nested_list.release(), null_count, std::move(null_mask)); auto expect = cudf::test::fixed_width_column_wrapper{42, 42, @@ -836,13 +827,10 @@ TEST_F(SparkMurmurHash3Test, ListOfStructValues) auto offsets = cudf::test::fixed_width_column_wrapper{0, 1, 2, 3, 4, 5, 7, 9, 11}; auto list_nullmask = std::vector(1, 8); - auto list_validity_buffer = + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(list_nullmask.begin(), list_nullmask.end()); - auto list_column = cudf::make_lists_column(8, - offsets.release(), - struct_column.release(), - cudf::UNKNOWN_NULL_COUNT, - std::move(list_validity_buffer)); + auto list_column = cudf::make_lists_column( + 8, offsets.release(), struct_column.release(), null_count, std::move(null_mask)); // TODO: Lists of structs are not yet supported. Once support is added, // remove this EXPECT_THROW and uncomment the rest of this test. diff --git a/cpp/tests/interop/from_arrow_test.cpp b/cpp/tests/interop/from_arrow_test.cpp index 3f4d5bcf20f..291c114e7ab 100644 --- a/cpp/tests/interop/from_arrow_test.cpp +++ b/cpp/tests/interop/from_arrow_test.cpp @@ -186,10 +186,10 @@ TEST_F(FromArrowTest, StructColumn) vector_of_columns cols2; cols2.push_back(std::move(str_col2)); cols2.push_back(std::move(int_col2)); - auto mask = + auto [null_mask, null_count] = cudf::bools_to_mask(cudf::test::fixed_width_column_wrapper{{true, true, false}}); - auto sub_struct_col = cudf::make_structs_column( - num_rows, std::move(cols2), cudf::UNKNOWN_NULL_COUNT, std::move(*(mask.first))); + auto sub_struct_col = + cudf::make_structs_column(num_rows, std::move(cols2), null_count, std::move(*null_mask)); vector_of_columns cols; cols.push_back(std::move(str_col)); cols.push_back(std::move(int_col)); diff --git a/cpp/tests/interop/to_arrow_test.cpp b/cpp/tests/interop/to_arrow_test.cpp index 4b481ade83f..a32447beda0 100644 --- a/cpp/tests/interop/to_arrow_test.cpp +++ b/cpp/tests/interop/to_arrow_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -93,13 +93,13 @@ std::pair, std::shared_ptr> get_table list_int64_data.begin(), list_int64_data.end(), list_int64_data_validity.begin()); auto list_offsets_column = cudf::test::fixed_width_column_wrapper(list_offsets.begin(), list_offsets.end()); - auto list_mask = cudf::bools_to_mask(cudf::test::fixed_width_column_wrapper( + auto [list_mask, list_nulls] = cudf::bools_to_mask(cudf::test::fixed_width_column_wrapper( bool_data_validity.begin(), bool_data_validity.end())); columns.emplace_back(cudf::make_lists_column(length, list_offsets_column.release(), list_child_column.release(), - cudf::UNKNOWN_NULL_COUNT, - std::move(*(list_mask.first)))); + list_nulls, + std::move(*list_mask))); auto int_column = cudf::test::fixed_width_column_wrapper( int64_data.begin(), int64_data.end(), validity.begin()) .release(); @@ -109,10 +109,10 @@ std::pair, std::shared_ptr> get_table vector_of_columns cols; cols.push_back(move(int_column)); cols.push_back(move(str_column)); - auto mask = cudf::bools_to_mask(cudf::test::fixed_width_column_wrapper( + auto [null_mask, null_count] = cudf::bools_to_mask(cudf::test::fixed_width_column_wrapper( bool_data_validity.begin(), bool_data_validity.end())); - columns.emplace_back(cudf::make_structs_column( - length, std::move(cols), cudf::UNKNOWN_NULL_COUNT, std::move(*(mask.first)))); + columns.emplace_back( + cudf::make_structs_column(length, std::move(cols), null_count, std::move(*null_mask))); auto int64array = get_arrow_array(int64_data, validity); @@ -287,10 +287,10 @@ TEST_F(ToArrowTest, StructColumn) vector_of_columns cols2; cols2.push_back(std::move(str_col2)); cols2.push_back(std::move(int_col2)); - auto mask = + auto [null_mask, null_count] = cudf::bools_to_mask(cudf::test::fixed_width_column_wrapper{{true, true, false}}); - auto sub_struct_col = cudf::make_structs_column( - num_rows, std::move(cols2), cudf::UNKNOWN_NULL_COUNT, std::move(*(mask.first))); + auto sub_struct_col = + cudf::make_structs_column(num_rows, std::move(cols2), null_count, std::move(*null_mask)); vector_of_columns cols; cols.push_back(std::move(str_col)); cols.push_back(std::move(int_col)); diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp index 3a4e9a3773a..748f5853dff 100644 --- a/cpp/tests/io/json_test.cpp +++ b/cpp/tests/io/json_test.cpp @@ -936,7 +936,7 @@ TEST_F(JsonReaderTest, ArrowFileSource) .dtypes({dtype()}) .lines(true) .legacy(true); // Support in new reader coming in https://github.com/rapidsai/cudf/pull/12498 - ; + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 1); @@ -1530,12 +1530,14 @@ TEST_F(JsonReaderTest, JsonNestedDtypeSchema) // List column expected auto leaf_child = float_wrapper{{0.0, 123.0}, {false, true}}; auto const validity = {1, 0, 0}; - auto expected = cudf::make_lists_column( + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(validity.begin(), validity.end()); + auto expected = cudf::make_lists_column( 3, int_wrapper{{0, 2, 2, 2}}.release(), cudf::test::structs_column_wrapper{{leaf_child}, {false, true}}.release(), - 2, - cudf::test::detail::make_null_mask(validity.begin(), validity.end())); + null_count, + std::move(null_mask)); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0), *expected); } diff --git a/cpp/tests/io/json_type_cast_test.cu b/cpp/tests/io/json_type_cast_test.cu index 2170ce4a3e2..deb4309a9d9 100644 --- a/cpp/tests/io/json_type_cast_test.cu +++ b/cpp/tests/io/json_type_cast_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -78,7 +78,7 @@ TEST_F(JSONTypeCastTest, String) auto null_mask_it = no_nulls(); auto null_mask = - cudf::test::detail::make_null_mask(null_mask_it, null_mask_it + d_column->size()); + std::get<0>(cudf::test::detail::make_null_mask(null_mask_it, null_mask_it + d_column->size())); auto str_col = cudf::io::json::experimental::detail::parse_data( svs.data(), svs.size(), type, std::move(null_mask), default_json_options().view(), stream, mr); @@ -108,7 +108,7 @@ TEST_F(JSONTypeCastTest, Int) auto null_mask_it = no_nulls(); auto null_mask = - cudf::test::detail::make_null_mask(null_mask_it, null_mask_it + d_column->size()); + std::get<0>(cudf::test::detail::make_null_mask(null_mask_it, null_mask_it + d_column->size())); auto col = cudf::io::json::experimental::detail::parse_data( svs.data(), svs.size(), type, std::move(null_mask), default_json_options().view(), stream, mr); @@ -145,7 +145,7 @@ TEST_F(JSONTypeCastTest, StringEscapes) auto null_mask_it = no_nulls(); auto null_mask = - cudf::test::detail::make_null_mask(null_mask_it, null_mask_it + d_column->size()); + std::get<0>(cudf::test::detail::make_null_mask(null_mask_it, null_mask_it + d_column->size())); auto col = cudf::io::json::experimental::detail::parse_data( svs.data(), svs.size(), type, std::move(null_mask), default_json_options().view(), stream, mr); diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index 8f6ec0dc65c..ceda584b297 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -1426,13 +1426,10 @@ TEST_F(OrcWriterTest, TestMap) } int32_col offsets(row_offsets.begin(), row_offsets.end()); - auto num_list_rows = static_cast(offsets).size() - 1; - auto list_col = - cudf::make_lists_column(num_list_rows, - offsets.release(), - std::move(s_col), - cudf::UNKNOWN_NULL_COUNT, - cudf::test::detail::make_null_mask(valids, valids + num_list_rows)); + auto num_list_rows = static_cast(offsets).size() - 1; + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(valids, valids + num_list_rows); + auto list_col = cudf::make_lists_column( + num_list_rows, offsets.release(), std::move(s_col), null_count, std::move(null_mask)); table_view expected({*list_col}); diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp index 7cb5eeab9dc..4cd3b1d8506 100644 --- a/cpp/tests/io/parquet_chunked_reader_test.cpp +++ b/cpp/tests/io/parquet_chunked_reader_test.cpp @@ -74,11 +74,11 @@ auto write_file(std::vector>& input_columns, cudf::size_type offset{0}; for (auto& col : input_columns) { - auto const null_mask_buff = + auto const [null_mask, null_count] = cudf::test::detail::make_null_mask(valid_iter + offset, valid_iter + col->size() + offset); col = cudf::structs::detail::superimpose_nulls( - static_cast(null_mask_buff.data()), - cudf::UNKNOWN_NULL_COUNT, + static_cast(null_mask.data()), + null_count, std::move(col), cudf::get_default_stream(), rmm::mr::get_current_device_resource()); diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index ce39fd2354f..4b4e613b34c 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -183,14 +183,11 @@ std::unique_ptr make_parquet_list_list_col( auto child = cudf::make_lists_column( child_offsets_size, child_offsets.release(), child_data.release(), 0, rmm::device_buffer{}); - int offsets_size = static_cast(offsets).size() - 1; + int offsets_size = static_cast(offsets).size() - 1; + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(valids, valids + offsets_size); return include_validity ? cudf::make_lists_column( - offsets_size, - offsets.release(), - std::move(child), - cudf::UNKNOWN_NULL_COUNT, - cudf::test::detail::make_null_mask(valids, valids + offsets_size)) + offsets_size, offsets.release(), std::move(child), null_count, std::move(null_mask)) : cudf::make_lists_column( offsets_size, offsets.release(), std::move(child), 0, rmm::device_buffer{}); } @@ -2704,12 +2701,11 @@ TEST_F(ParquetReaderTest, UserBoundsWithNullsMixedTypes) c1_offset_iter, c1_offset_iter + num_rows + 1); cudf::test::fixed_width_column_wrapper c1_floats( values, values + (num_rows * floats_per_row), valids); - auto _c1 = cudf::make_lists_column(num_rows, - c1_offsets.release(), - c1_floats.release(), - cudf::UNKNOWN_NULL_COUNT, - cudf::test::detail::make_null_mask(valids, valids + num_rows)); - auto c1 = cudf::purge_nonempty_nulls(*_c1); + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(valids, valids + num_rows); + + auto _c1 = cudf::make_lists_column( + num_rows, c1_offsets.release(), c1_floats.release(), null_count, std::move(null_mask)); + auto c1 = cudf::purge_nonempty_nulls(*_c1); // list> auto c2 = make_parquet_list_list_col(0, num_rows, 5, 8, true); @@ -2732,12 +2728,9 @@ TEST_F(ParquetReaderTest, UserBoundsWithNullsMixedTypes) cudf::detail::make_counting_transform_iterator(0, [&](int index) { return index % 200; }); std::vector c3_valids(num_rows); std::copy(_c3_valids, _c3_valids + num_rows, c3_valids.begin()); - auto _c3_list = - cudf::make_lists_column(num_rows, - offsets.release(), - string_col.release(), - cudf::UNKNOWN_NULL_COUNT, - cudf::test::detail::make_null_mask(valids, valids + num_rows)); + std::tie(null_mask, null_count) = cudf::test::detail::make_null_mask(valids, valids + num_rows); + auto _c3_list = cudf::make_lists_column( + num_rows, offsets.release(), string_col.release(), null_count, std::move(null_mask)); auto c3_list = cudf::purge_nonempty_nulls(*_c3_list); cudf::test::fixed_width_column_wrapper c3_ints(values, values + num_rows, valids); cudf::test::fixed_width_column_wrapper c3_floats(values, values + num_rows, valids); diff --git a/cpp/tests/lists/combine/concatenate_rows_tests.cpp b/cpp/tests/lists/combine/concatenate_rows_tests.cpp index 49afe70ec07..0b4337e11e3 100644 --- a/cpp/tests/lists/combine/concatenate_rows_tests.cpp +++ b/cpp/tests/lists/combine/concatenate_rows_tests.cpp @@ -767,12 +767,10 @@ TEST_F(ListConcatenateRowsNestedTypesTest, StructWithNulls) cudf::test::fixed_width_column_wrapper l0_offsets{0, 2, 2, 5, 6, 8}; auto const l0_size = static_cast(l0_offsets).size() - 1; std::vector l0_validity{false, true, true, false, true}; + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(l0_validity.begin(), l0_validity.end()); auto l0 = cudf::make_lists_column( - l0_size, - l0_offsets.release(), - s0.release(), - 2, - cudf::test::detail::make_null_mask(l0_validity.begin(), l0_validity.end())); + l0_size, l0_offsets.release(), s0.release(), null_count, std::move(null_mask)); // col1 cudf::test::fixed_width_column_wrapper s1_0{ @@ -799,12 +797,10 @@ TEST_F(ListConcatenateRowsNestedTypesTest, StructWithNulls) cudf::test::fixed_width_column_wrapper l1_offsets{0, 0, 4, 7, 15, 15}; auto const l1_size = static_cast(l1_offsets).size() - 1; std::vector l1_validity{false, true, true, true, true}; + std::tie(null_mask, null_count) = + cudf::test::detail::make_null_mask(l1_validity.begin(), l1_validity.end()); auto l1 = cudf::make_lists_column( - l1_size, - l1_offsets.release(), - s1.release(), - 1, - cudf::test::detail::make_null_mask(l1_validity.begin(), l1_validity.end())); + l1_size, l1_offsets.release(), s1.release(), null_count, std::move(null_mask)); // concatenate_policy::IGNORE_NULLS { @@ -826,12 +822,10 @@ TEST_F(ListConcatenateRowsNestedTypesTest, StructWithNulls) cudf::test::fixed_width_column_wrapper le_offsets{0, 0, 4, 10, 18, 20}; auto const le_size = static_cast(le_offsets).size() - 1; std::vector le_validity{false, true, true, true, true}; + std::tie(null_mask, null_count) = + cudf::test::detail::make_null_mask(le_validity.begin(), le_validity.end()); auto expected = cudf::make_lists_column( - le_size, - le_offsets.release(), - se.release(), - 1, - cudf::test::detail::make_null_mask(le_validity.begin(), le_validity.end())); + le_size, le_offsets.release(), se.release(), null_count, std::move(null_mask)); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, *expected); } @@ -856,12 +850,10 @@ TEST_F(ListConcatenateRowsNestedTypesTest, StructWithNulls) cudf::test::fixed_width_column_wrapper le_offsets{0, 0, 4, 10, 10, 12}; auto const le_size = static_cast(le_offsets).size() - 1; std::vector le_validity{false, true, true, false, true}; + std::tie(null_mask, null_count) = + cudf::test::detail::make_null_mask(le_validity.begin(), le_validity.end()); auto expected = cudf::make_lists_column( - le_size, - le_offsets.release(), - se.release(), - 2, - cudf::test::detail::make_null_mask(le_validity.begin(), le_validity.end())); + le_size, le_offsets.release(), se.release(), null_count, std::move(null_mask)); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, *expected); } @@ -882,12 +874,10 @@ TEST_F(ListConcatenateRowsNestedTypesTest, StructWithNullsSliced) cudf::test::fixed_width_column_wrapper l0_offsets{0, 2, 2, 5, 6, 8}; auto const l0_size = static_cast(l0_offsets).size() - 1; std::vector l0_validity{false, true, false, false, true}; + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(l0_validity.begin(), l0_validity.end()); auto l0_unsliced = cudf::make_lists_column( - l0_size, - l0_offsets.release(), - s0.release(), - 3, - cudf::test::detail::make_null_mask(l0_validity.begin(), l0_validity.end())); + l0_size, l0_offsets.release(), s0.release(), null_count, std::move(null_mask)); auto l0 = cudf::split(*l0_unsliced, {2})[1]; // col1 @@ -915,12 +905,10 @@ TEST_F(ListConcatenateRowsNestedTypesTest, StructWithNullsSliced) cudf::test::fixed_width_column_wrapper l1_offsets{0, 0, 4, 7, 15, 15}; auto const l1_size = static_cast(l1_offsets).size() - 1; std::vector l1_validity{false, true, false, true, true}; + std::tie(null_mask, null_count) = + cudf::test::detail::make_null_mask(l1_validity.begin(), l1_validity.end()); auto l1_unsliced = cudf::make_lists_column( - l1_size, - l1_offsets.release(), - s1.release(), - 2, - cudf::test::detail::make_null_mask(l1_validity.begin(), l1_validity.end())); + l1_size, l1_offsets.release(), s1.release(), null_count, std::move(null_mask)); auto l1 = cudf::split(*l1_unsliced, {2})[1]; // concatenate_policy::IGNORE_NULLS @@ -941,12 +929,10 @@ TEST_F(ListConcatenateRowsNestedTypesTest, StructWithNullsSliced) cudf::test::fixed_width_column_wrapper le_offsets{0, 0, 8, 10}; auto const le_size = static_cast(le_offsets).size() - 1; std::vector le_validity{false, true, true}; + std::tie(null_mask, null_count) = + cudf::test::detail::make_null_mask(le_validity.begin(), le_validity.end()); auto expected = cudf::make_lists_column( - le_size, - le_offsets.release(), - se.release(), - 1, - cudf::test::detail::make_null_mask(le_validity.begin(), le_validity.end())); + le_size, le_offsets.release(), se.release(), null_count, std::move(null_mask)); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, *expected); } @@ -968,12 +954,10 @@ TEST_F(ListConcatenateRowsNestedTypesTest, StructWithNullsSliced) cudf::test::fixed_width_column_wrapper le_offsets{0, 0, 0, 2}; auto const le_size = static_cast(le_offsets).size() - 1; std::vector le_validity{false, false, true}; + std::tie(null_mask, null_count) = + cudf::test::detail::make_null_mask(le_validity.begin(), le_validity.end()); auto expected = cudf::make_lists_column( - le_size, - le_offsets.release(), - se.release(), - 2, - cudf::test::detail::make_null_mask(le_validity.begin(), le_validity.end())); + le_size, le_offsets.release(), se.release(), null_count, std::move(null_mask)); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, *expected); } diff --git a/cpp/tests/lists/contains_tests.cpp b/cpp/tests/lists/contains_tests.cpp index f592819dacb..05dca1d885e 100644 --- a/cpp/tests/lists/contains_tests.cpp +++ b/cpp/tests/lists/contains_tests.cpp @@ -209,7 +209,6 @@ TYPED_TEST(TypedContainsTest, ScalarKeyWithNullLists) TYPED_TEST(TypedContainsTest, SlicedLists) { // Test sliced List columns. - using namespace cudf; using T = TypeParam; auto search_space = cudf::test::lists_column_wrapper{{{0, 1, 2, 1}, @@ -335,12 +334,13 @@ TYPED_TEST(TypedContainsTest, ScalarKeysWithNullsInLists) {X, 1, 2, X, 4, 5, X, 7, 8, X, X, 1, 2, X, 1}, nulls_at({0, 3, 6, 9, 10, 13})}; auto input_null_mask_iter = null_at(4); - auto search_space = cudf::make_lists_column( - 8, - indices_col{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), - numerals.release(), - 1, - cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8)); + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8); + auto search_space = cudf::make_lists_column(8, + indices_col{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), + numerals.release(), + null_count, + std::move(null_mask)); // Search space: [ [x], [1,2], [x,4,5,x], [], x, [7,8,x], [x], [1,2,x,1] ] auto search_key_one = create_scalar_search_key(1); @@ -377,12 +377,14 @@ TEST_F(ContainsTest, BoolScalarWithNullsInLists) auto numerals = cudf::test::fixed_width_column_wrapper{ {X, 1, 1, X, 1, 1, X, 1, 1, X, X, 1, 1, X, 1}, nulls_at({0, 3, 6, 9, 10, 13})}; auto input_null_mask_iter = null_at(4); - auto search_space = cudf::make_lists_column( + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8); + auto search_space = cudf::make_lists_column( 8, cudf::test::fixed_width_column_wrapper{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), numerals.release(), - 1, - cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8)); + null_count, + std::move(null_mask)); // Search space: [ [x], [1,1], [x,1,1,x], [], x, [1,1,x], [x], [1,1,x,1] ] auto search_key_one = create_scalar_search_key(1); @@ -420,12 +422,13 @@ TEST_F(ContainsTest, StringScalarWithNullsInLists) {"X", "1", "2", "X", "4", "5", "X", "7", "8", "X", "X", "1", "2", "X", "1"}, nulls_at({0, 3, 6, 9, 10, 13})}; auto input_null_mask_iter = null_at(4); - auto search_space = cudf::make_lists_column( - 8, - indices_col{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), - strings.release(), - 1, - cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8)); + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8); + auto search_space = cudf::make_lists_column(8, + indices_col{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), + strings.release(), + null_count, + std::move(null_mask)); // Search space: [ [x], [1,2], [x,4,5,x], [], x, [7,8,x], [x], [1,2,x,1] ] auto search_key_one = create_scalar_search_key("1"); @@ -658,13 +661,14 @@ TYPED_TEST(TypedVectorContainsTest, VectorWithNullsInLists) {X, 1, 2, X, 4, 5, X, 7, 8, X, X, 1, 2, X, 1}, nulls_at({0, 3, 6, 9, 10, 13})}; auto input_null_mask_iter = null_at(4); + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8); + auto search_space = cudf::make_lists_column(8, + indices_col{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), + numerals.release(), + null_count, + std::move(null_mask)); - auto search_space = cudf::make_lists_column( - 8, - indices_col{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), - numerals.release(), - 1, - cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8)); // Search space: [ [x], [1,2], [x,4,5,x], [], x, [7,8,x], [x], [1,2,x,1] ] auto search_keys = cudf::test::fixed_width_column_wrapper{1, 2, 3, 1, 2, 3, 1, 1}; @@ -696,13 +700,14 @@ TYPED_TEST(TypedVectorContainsTest, ListContainsVectorWithNullsInListsAndInSearc {X, 1, 2, X, 4, 5, X, 7, 8, X, X, 1, 2, X, 1}, nulls_at({0, 3, 6, 9, 10, 13})}; auto input_null_mask_iter = null_at(4); + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8); + auto search_space = cudf::make_lists_column(8, + indices_col{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), + numerals.release(), + null_count, + std::move(null_mask)); - auto search_space = cudf::make_lists_column( - 8, - indices_col{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), - numerals.release(), - 1, - cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8)); // Search space: [ [x], [1,2], [x,4,5,x], [], x, [7,8,x], [x], [1,2,x,1] ] auto search_keys = @@ -735,13 +740,13 @@ TEST_F(ContainsTest, BoolKeyVectorWithNullsInListsAndInSearchKeys) {X, 0, 1, X, 1, 1, X, 1, 1, X, X, 0, 1, X, 1}, nulls_at({0, 3, 6, 9, 10, 13})}; auto input_null_mask_iter = null_at(4); - - auto search_space = cudf::make_lists_column( - 8, - indices_col{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), - numerals.release(), - 1, - cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8)); + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8); + auto search_space = cudf::make_lists_column(8, + indices_col{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), + numerals.release(), + null_count, + std::move(null_mask)); auto search_keys = cudf::test::fixed_width_column_wrapper{{0, 1, 0, X, 0, 0, 1, 1}, null_at(3)}; @@ -773,12 +778,14 @@ TEST_F(ContainsTest, StringKeyVectorWithNullsInListsAndInSearchKeys) {"X", "1", "2", "X", "4", "5", "X", "7", "8", "X", "X", "1", "2", "X", "1"}, nulls_at({0, 3, 6, 9, 10, 13})}; auto input_null_mask_iter = null_at(4); - auto search_space = cudf::make_lists_column( + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8); + auto search_space = cudf::make_lists_column( 8, cudf::test::fixed_width_column_wrapper{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), strings.release(), - 1, - cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8)); + null_count, + std::move(null_mask)); auto search_keys = cudf::test::strings_column_wrapper{{"1", "2", "3", "X", "2", "3", "1", "1"}, null_at(3)}; @@ -1202,12 +1209,10 @@ TYPED_TEST(TypedStructContainsTest, ScalarKeyWithNullLists) // clang-format on auto child = cudf::test::structs_column_wrapper{{data1, data2}}; auto const validity_iter = nulls_at({3, 10}); + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(validity_iter, validity_iter + 11); return cudf::make_lists_column( - 11, - offsets.release(), - child.release(), - 2, - cudf::test::detail::make_null_mask(validity_iter, validity_iter + 11)); + 11, offsets.release(), child.release(), null_count, std::move(null_mask)); }(); auto const key = [] { @@ -1516,12 +1521,10 @@ TYPED_TEST(TypedStructContainsTest, ColumnKeyWithSlicedListsHavingNulls) // clang-format on auto child = cudf::test::structs_column_wrapper{{data1, data2}, nulls_at({1, 10, 15, 24})}; auto const validity_iter = nulls_at({3, 10}); + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(validity_iter, validity_iter + 11); return cudf::make_lists_column( - 11, - offsets.release(), - child.release(), - 2, - cudf::test::detail::make_null_mask(validity_iter, validity_iter + 11)); + 11, offsets.release(), child.release(), null_count, std::move(null_mask)); }(); auto const keys_original = [] { diff --git a/cpp/tests/lists/explode_tests.cpp b/cpp/tests/lists/explode_tests.cpp index 7b2719196f9..bb52b6ae90d 100644 --- a/cpp/tests/lists/explode_tests.cpp +++ b/cpp/tests/lists/explode_tests.cpp @@ -401,11 +401,9 @@ TEST_F(ExplodeTest, ListOfStructsWithEmpties) std::vector> s2_cols; s2_cols.push_back(i2.release()); std::vector r2_valids{false}; - auto s2 = cudf::make_structs_column( - 1, - std::move(s2_cols), - 1, - cudf::test::detail::make_null_mask(r2_valids.begin(), r2_valids.end())); + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(r2_valids.begin(), r2_valids.end()); + auto s2 = cudf::make_structs_column(1, std::move(s2_cols), null_count, std::move(null_mask)); cudf::test::fixed_width_column_wrapper off2{0, 1}; auto row2 = cudf::make_lists_column(1, off2.release(), std::move(s2), 0, rmm::device_buffer{}); @@ -424,12 +422,10 @@ TEST_F(ExplodeTest, ListOfStructsWithEmpties) auto s4 = cudf::make_structs_column(0, std::move(s4_cols), 0, rmm::device_buffer{}); cudf::test::fixed_width_column_wrapper off4{0, 0}; std::vector r4_valids{false}; + std::tie(null_mask, null_count) = + cudf::test::detail::make_null_mask(r4_valids.begin(), r4_valids.end()); auto row4 = - cudf::make_lists_column(1, - off4.release(), - std::move(s4), - 1, - cudf::test::detail::make_null_mask(r4_valids.begin(), r4_valids.end())); + cudf::make_lists_column(1, off4.release(), std::move(s4), null_count, std::move(null_mask)); // concatenated auto final_col = @@ -1048,11 +1044,9 @@ TEST_F(ExplodeOuterTest, ListOfStructsWithEmpties) std::vector> s2_cols; s2_cols.push_back(i2.release()); std::vector r2_valids{false}; - auto s2 = cudf::make_structs_column( - 1, - std::move(s2_cols), - 1, - cudf::test::detail::make_null_mask(r2_valids.begin(), r2_valids.end())); + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(r2_valids.begin(), r2_valids.end()); + auto s2 = cudf::make_structs_column(1, std::move(s2_cols), null_count, std::move(null_mask)); cudf::test::fixed_width_column_wrapper off2{0, 1}; auto row2 = cudf::make_lists_column(1, off2.release(), std::move(s2), 0, rmm::device_buffer{}); @@ -1071,12 +1065,10 @@ TEST_F(ExplodeOuterTest, ListOfStructsWithEmpties) auto s4 = cudf::make_structs_column(0, std::move(s4_cols), 0, rmm::device_buffer{}); cudf::test::fixed_width_column_wrapper off4{0, 0}; std::vector r4_valids{false}; + std::tie(null_mask, null_count) = + cudf::test::detail::make_null_mask(r4_valids.begin(), r4_valids.end()); auto row4 = - cudf::make_lists_column(1, - off4.release(), - std::move(s4), - 1, - cudf::test::detail::make_null_mask(r4_valids.begin(), r4_valids.end())); + cudf::make_lists_column(1, off4.release(), std::move(s4), null_count, std::move(null_mask)); // concatenated auto final_col = diff --git a/cpp/tests/lists/stream_compaction/apply_boolean_mask_tests.cpp b/cpp/tests/lists/stream_compaction/apply_boolean_mask_tests.cpp index 17265326fde..fa1d732e2fc 100644 --- a/cpp/tests/lists/stream_compaction/apply_boolean_mask_tests.cpp +++ b/cpp/tests/lists/stream_compaction/apply_boolean_mask_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -151,15 +151,16 @@ TYPED_TEST(ApplyBooleanMaskTypedTest, StructInput) auto constexpr num_input_rows = 7; auto const input = [] { - auto child_num = fwcw{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - auto child_str = strings{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}; - auto const null_mask_begin = null_at(5); - auto const null_mask_end = null_mask_begin + num_input_rows; + auto child_num = fwcw{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + auto child_str = strings{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}; + auto const null_mask_begin = null_at(5); + auto const null_mask_end = null_mask_begin + num_input_rows; + auto [null_mask, null_count] = detail::make_null_mask(null_mask_begin, null_mask_end); return cudf::make_lists_column(num_input_rows, offsets{0, 2, 3, 6, 6, 8, 8, 10}.release(), structs_column_wrapper{{child_num, child_str}}.release(), - 1, - detail::make_null_mask(null_mask_begin, null_mask_end)); + null_count, + std::move(null_mask)); }(); { // Unsliced. @@ -168,15 +169,16 @@ TYPED_TEST(ApplyBooleanMaskTypedTest, StructInput) auto const filter = filter_t{{1, 1}, {0}, {0, 1, 0}, {}, {1, 0}, {}, {0, 1}}; auto const result = apply_boolean_mask(lists_column_view{*input}, lists_column_view{filter}); auto const expected = [] { - auto child_num = fwcw{0, 1, 4, 6, 9}; - auto child_str = strings{"0", "1", "4", "6", "9"}; - auto const null_mask_begin = null_at(5); - auto const null_mask_end = null_mask_begin + num_input_rows; + auto child_num = fwcw{0, 1, 4, 6, 9}; + auto child_str = strings{"0", "1", "4", "6", "9"}; + auto const null_mask_begin = null_at(5); + auto const null_mask_end = null_mask_begin + num_input_rows; + auto [null_mask, null_count] = detail::make_null_mask(null_mask_begin, null_mask_end); return cudf::make_lists_column(num_input_rows, offsets{0, 2, 2, 3, 3, 4, 4, 5}.release(), structs_column_wrapper{{child_num, child_str}}.release(), - 1, - detail::make_null_mask(null_mask_begin, null_mask_end)); + null_count, + std::move(null_mask)); }(); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, *expected); } @@ -189,15 +191,16 @@ TYPED_TEST(ApplyBooleanMaskTypedTest, StructInput) auto const result = apply_boolean_mask(lists_column_view{sliced_input}, lists_column_view{filter}); auto const expected = [] { - auto child_num = fwcw{4, 6, 9}; - auto child_str = strings{"4", "6", "9"}; - auto const null_mask_begin = null_at(4); - auto const null_mask_end = null_mask_begin + num_input_rows; + auto child_num = fwcw{4, 6, 9}; + auto child_str = strings{"4", "6", "9"}; + auto const null_mask_begin = null_at(4); + auto const null_mask_end = null_mask_begin + num_input_rows; + auto [null_mask, null_count] = detail::make_null_mask(null_mask_begin, null_mask_end); return cudf::make_lists_column(num_input_rows - 1, offsets{0, 0, 1, 1, 2, 2, 3}.release(), structs_column_wrapper{{child_num, child_str}}.release(), - 1, - detail::make_null_mask(null_mask_begin, null_mask_end)); + null_count, + std::move(null_mask)); }(); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, *expected); } diff --git a/cpp/tests/quantiles/percentile_approx_test.cpp b/cpp/tests/quantiles/percentile_approx_test.cpp index 819b342ff8f..82f03e5aefb 100644 --- a/cpp/tests/quantiles/percentile_approx_test.cpp +++ b/cpp/tests/quantiles/percentile_approx_test.cpp @@ -261,9 +261,8 @@ void grouped_test(cudf::data_type input_type, std::vector> p std::pair make_null_mask(cudf::column_view const& col) { - auto itr = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; }); - auto mask = cudf::test::detail::make_null_mask(itr, itr + col.size()); - return std::make_pair(std::move(mask), col.size() / 2); + auto itr = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; }); + return cudf::test::detail::make_null_mask(itr, itr + col.size()); } void simple_with_nulls_test(cudf::data_type input_type, std::vector> params) @@ -388,12 +387,13 @@ TEST_F(PercentileApproxTest, EmptyInput) cudf::test::fixed_width_column_wrapper offsets{0, 0, 0, 0}; std::vector nulls{0, 0, 0}; - auto expected = - cudf::make_lists_column(3, - offsets.release(), - cudf::make_empty_column(cudf::type_id::FLOAT64), - 3, - cudf::test::detail::make_null_mask(nulls.begin(), nulls.end())); + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(nulls.begin(), nulls.end()); + + auto expected = cudf::make_lists_column(3, + offsets.release(), + cudf::make_empty_column(cudf::type_id::FLOAT64), + null_count, + std::move(null_mask)); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, *expected); } @@ -419,13 +419,13 @@ TEST_F(PercentileApproxTest, EmptyPercentiles) cudf::test::fixed_width_column_wrapper offsets{0, 0, 0}; std::vector nulls{0, 0}; - auto expected = - cudf::make_lists_column(2, - offsets.release(), - cudf::make_empty_column(cudf::type_id::FLOAT64), - 2, - cudf::test::detail::make_null_mask(nulls.begin(), nulls.end())); - // cudf::detail::create_null_mask(2, cudf::mask_state::ALL_NULL, cudf::get_default_stream())); + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(nulls.begin(), nulls.end()); + + auto expected = cudf::make_lists_column(2, + offsets.release(), + cudf::make_empty_column(cudf::type_id::FLOAT64), + null_count, + std::move(null_mask)); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, *expected); } diff --git a/cpp/tests/reductions/list_rank_test.cpp b/cpp/tests/reductions/list_rank_test.cpp index 5f3ab1636ef..68da95fbb12 100644 --- a/cpp/tests/reductions/list_rank_test.cpp +++ b/cpp/tests/reductions/list_rank_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -128,13 +128,13 @@ TEST_F(ListRankScanTest, ListOfStruct) 0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 8, 10, 12, 14, 15, 16, 17, 18}; auto list_nullmask = std::vector{1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - auto nullmask_buf = + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(list_nullmask.begin(), list_nullmask.end()); auto list_column = cudf::column_view(cudf::data_type(cudf::type_id::LIST), 17, nullptr, - static_cast(nullmask_buf.data()), - cudf::UNKNOWN_NULL_COUNT, + static_cast(null_mask.data()), + null_count, 0, {offsets, struct_col}); @@ -179,21 +179,17 @@ TEST_F(ListRankScanTest, ListOfEmptyStruct) // [{}, {}] auto struct_validity = std::vector{0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1}; - auto struct_validity_buffer = + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(struct_validity.begin(), struct_validity.end()); - auto struct_col = - cudf::make_structs_column(14, {}, cudf::UNKNOWN_NULL_COUNT, std::move(struct_validity_buffer)); + auto struct_col = cudf::make_structs_column(14, {}, null_count, std::move(null_mask)); auto offsets = cudf::test::fixed_width_column_wrapper{ 0, 0, 0, 0, 0, 2, 4, 6, 7, 8, 9, 10, 12, 14}; auto list_nullmask = std::vector{1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - auto list_validity_buffer = + std::tie(null_mask, null_count) = cudf::test::detail::make_null_mask(list_nullmask.begin(), list_nullmask.end()); - auto list_column = cudf::make_lists_column(13, - offsets.release(), - std::move(struct_col), - cudf::UNKNOWN_NULL_COUNT, - std::move(list_validity_buffer)); + auto list_column = cudf::make_lists_column( + 13, offsets.release(), std::move(struct_col), null_count, std::move(null_mask)); auto expect = cudf::test::fixed_width_column_wrapper{1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6}; @@ -218,13 +214,10 @@ TEST_F(ListRankScanTest, EmptyDeepList) auto offsets = cudf::test::fixed_width_column_wrapper{0, 0, 0, 0, 0}; auto list_nullmask = std::vector{1, 1, 0, 0}; - auto list_validity_buffer = + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(list_nullmask.begin(), list_nullmask.end()); - auto list_column = cudf::make_lists_column(4, - offsets.release(), - list1.release(), - cudf::UNKNOWN_NULL_COUNT, - std::move(list_validity_buffer)); + auto list_column = cudf::make_lists_column( + 4, offsets.release(), list1.release(), null_count, std::move(null_mask)); auto expect = cudf::test::fixed_width_column_wrapper{1, 1, 2, 2}; diff --git a/cpp/tests/reductions/rank_tests.cpp b/cpp/tests/reductions/rank_tests.cpp index 8167b10dfee..a86656b70a6 100644 --- a/cpp/tests/reductions/rank_tests.cpp +++ b/cpp/tests/reductions/rank_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -231,8 +231,9 @@ TYPED_TEST(TypedRankScanTest, StructsWithNullPushdown) // the ranks are still correct. { auto const null_iter = cudf::test::iterators::nulls_at({1, 2}); - struct_col->set_null_mask( - cudf::test::detail::make_null_mask(null_iter, null_iter + struct_col->size())); + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(null_iter, null_iter + struct_col->size()); + struct_col->set_null_mask(std::move(null_mask), null_count); auto const expected_dense = rank_result_col{1, 2, 2, 3, 4, 5, 6, 6, 7, 8, 8, 9}; auto const expected_rank = rank_result_col{1, 2, 2, 4, 5, 6, 7, 7, 9, 10, 10, 12}; auto const expected_percent = percent_result_col{0.0, diff --git a/cpp/tests/reshape/byte_cast_tests.cpp b/cpp/tests/reshape/byte_cast_tests.cpp index 309e8341bcf..a761e6afd18 100644 --- a/cpp/tests/reshape/byte_cast_tests.cpp +++ b/cpp/tests/reshape/byte_cast_tests.cpp @@ -57,13 +57,14 @@ TEST_F(ByteCastTest, int16ValuesWithNulls) cudf::test::fixed_width_column_wrapper const int16_col( {short(0), short(100), short(-100), limits::min(), limits::max()}, {0, 1, 0, 1, 0}); - auto int16_data = cudf::test::fixed_width_column_wrapper{0x00, 0x64, 0x80, 0x00}; - auto int16_expected = cudf::make_lists_column( + auto int16_data = cudf::test::fixed_width_column_wrapper{0x00, 0x64, 0x80, 0x00}; + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(odd_validity, odd_validity + 5); + auto int16_expected = cudf::make_lists_column( 5, std::move(cudf::test::fixed_width_column_wrapper{0, 0, 2, 2, 4, 4}.release()), std::move(int16_data.release()), - 3, - cudf::test::detail::make_null_mask(odd_validity, odd_validity + 5)); + null_count, + std::move(null_mask)); auto const output_int16 = cudf::byte_cast(int16_col, cudf::flip_endianness::YES); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(output_int16->view(), int16_expected->view()); @@ -103,12 +104,15 @@ TEST_F(ByteCastTest, int32ValuesWithNulls) auto int32_data = cudf::test::fixed_width_column_wrapper{ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x9c, 0x7f, 0xff, 0xff, 0xff}; + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(even_validity, even_validity + 5); + auto int32_expected = cudf::make_lists_column( 5, std::move(cudf::test::fixed_width_column_wrapper{0, 4, 4, 8, 8, 12}.release()), std::move(int32_data.release()), - 2, - cudf::test::detail::make_null_mask(even_validity, even_validity + 5)); + null_count, + std::move(null_mask)); auto const output_int32 = cudf::byte_cast(int32_col, cudf::flip_endianness::YES); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(output_int32->view(), int32_expected->view()); @@ -155,13 +159,14 @@ TEST_F(ByteCastTest, int64ValuesWithNulls) auto int64_data = cudf::test::fixed_width_column_wrapper{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; - auto int64_expected = cudf::make_lists_column( + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(odd_validity, odd_validity + 5); + auto int64_expected = cudf::make_lists_column( 5, std::move( cudf::test::fixed_width_column_wrapper{0, 0, 8, 8, 16, 16}.release()), std::move(int64_data.release()), - 3, - cudf::test::detail::make_null_mask(odd_validity, odd_validity + 5)); + null_count, + std::move(null_mask)); auto const output_int64 = cudf::byte_cast(int64_col, cudf::flip_endianness::YES); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(output_int64->view(), int64_expected->view()); @@ -215,12 +220,14 @@ TEST_F(ByteCastTest, fp32ValuesWithNulls) auto fp32_data = cudf::test::fixed_width_column_wrapper{ 0x00, 0x00, 0x00, 0x00, 0xc2, 0xc8, 0x00, 0x00, 0x7f, 0x7f, 0xff, 0xff}; + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(even_validity, even_validity + 5); auto fp32_expected = cudf::make_lists_column( 5, std::move(cudf::test::fixed_width_column_wrapper{0, 4, 4, 8, 8, 12}.release()), std::move(fp32_data.release()), - 2, - cudf::test::detail::make_null_mask(even_validity, even_validity + 5)); + null_count, + std::move(null_mask)); auto const output_fp32 = cudf::byte_cast(fp32_col, cudf::flip_endianness::YES); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(output_fp32->view(), fp32_expected->view()); @@ -284,13 +291,14 @@ TEST_F(ByteCastTest, fp64ValuesWithNulls) auto fp64_data = cudf::test::fixed_width_column_wrapper{ 0x40, 0x59, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; - auto fp64_expected = cudf::make_lists_column( + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(odd_validity, odd_validity + 5); + auto fp64_expected = cudf::make_lists_column( 5, std::move( cudf::test::fixed_width_column_wrapper{0, 0, 8, 8, 16, 16}.release()), std::move(fp64_data.release()), - 3, - cudf::test::detail::make_null_mask(odd_validity, odd_validity + 5)); + null_count, + std::move(null_mask)); auto const output_fp64 = cudf::byte_cast(fp64_col, cudf::flip_endianness::YES); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(output_fp64->view(), fp64_expected->view()); @@ -340,8 +348,9 @@ TEST_F(ByteCastTest, StringValuesWithNulls) // Set nulls by `set_null_mask` so the output column will have non-empty nulls. // This is intentional. auto const null_iter = cudf::test::iterators::nulls_at({2, 4}); - output->set_null_mask(cudf::test::detail::make_null_mask(null_iter, null_iter + output->size()), - 2); + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(null_iter, null_iter + output->size()); + output->set_null_mask(std::move(null_mask), null_count); return output; }(); diff --git a/cpp/tests/rolling/collect_ops_test.cpp b/cpp/tests/rolling/collect_ops_test.cpp index 56a9b2c45cc..c52f990ec46 100644 --- a/cpp/tests/rolling/collect_ops_test.cpp +++ b/cpp/tests/rolling/collect_ops_test.cpp @@ -268,12 +268,14 @@ TYPED_TEST(TypedCollectListTest, RollingWindowWithNullInputsHonoursMinPeriods) cudf::size_type{0}, [expected_num_rows](auto i) { return i != 0 && i != (expected_num_rows - 1); }); - auto expected_result = cudf::make_lists_column( - expected_num_rows, - std::move(expected_offsets), - expected_result_child.release(), - 2, - cudf::test::detail::make_null_mask(null_mask_iter, null_mask_iter + expected_num_rows)); + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(null_mask_iter, null_mask_iter + expected_num_rows); + + auto expected_result = cudf::make_lists_column(expected_num_rows, + std::move(expected_offsets), + expected_result_child.release(), + null_count, + std::move(null_mask)); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result->view(), result->view()); } @@ -301,12 +303,14 @@ TYPED_TEST(TypedCollectListTest, RollingWindowWithNullInputsHonoursMinPeriods) cudf::size_type{0}, [expected_num_rows](auto i) { return i != 0 && i != (expected_num_rows - 1); }); - auto expected_result = cudf::make_lists_column( - expected_num_rows, - std::move(expected_offsets), - expected_result_child.release(), - 2, - cudf::test::detail::make_null_mask(null_mask_iter, null_mask_iter + expected_num_rows)); + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(null_mask_iter, null_mask_iter + expected_num_rows); + + auto expected_result = cudf::make_lists_column(expected_num_rows, + std::move(expected_offsets), + expected_result_child.release(), + null_count, + std::move(null_mask)); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result->view(), result->view()); } @@ -336,12 +340,14 @@ TYPED_TEST(TypedCollectListTest, RollingWindowWithNullInputsHonoursMinPeriods) auto null_mask_iter = cudf::detail::make_counting_transform_iterator( cudf::size_type{0}, [expected_num_rows](auto i) { return i > 0 && i < 4; }); - auto expected_result = cudf::make_lists_column( - expected_num_rows, - std::move(expected_offsets), - expected_result_child.release(), - 3, - cudf::test::detail::make_null_mask(null_mask_iter, null_mask_iter + expected_num_rows)); + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(null_mask_iter, null_mask_iter + expected_num_rows); + + auto expected_result = cudf::make_lists_column(expected_num_rows, + std::move(expected_offsets), + expected_result_child.release(), + null_count, + std::move(null_mask)); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result->view(), result->view()); } @@ -369,12 +375,14 @@ TYPED_TEST(TypedCollectListTest, RollingWindowWithNullInputsHonoursMinPeriods) auto null_mask_iter = cudf::detail::make_counting_transform_iterator( cudf::size_type{0}, [expected_num_rows](auto i) { return i > 0 && i < 4; }); - auto expected_result = cudf::make_lists_column( - expected_num_rows, - std::move(expected_offsets), - expected_result_child.release(), - 3, - cudf::test::detail::make_null_mask(null_mask_iter, null_mask_iter + expected_num_rows)); + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(null_mask_iter, null_mask_iter + expected_num_rows); + + auto expected_result = cudf::make_lists_column(expected_num_rows, + std::move(expected_offsets), + expected_result_child.release(), + null_count, + std::move(null_mask)); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result->view(), result->view()); } @@ -476,12 +484,14 @@ TEST_F(CollectListTest, RollingWindowHonoursMinPeriodsWithDecimal) cudf::size_type{0}, [expected_num_rows](auto i) { return i != 0 && i != (expected_num_rows - 1); }); - auto expected_result = cudf::make_lists_column( - expected_num_rows, - std::move(expected_offsets), - expected_result_child.release(), - 2, - cudf::test::detail::make_null_mask(null_mask_iter, null_mask_iter + expected_num_rows)); + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(null_mask_iter, null_mask_iter + expected_num_rows); + + auto expected_result = cudf::make_lists_column(expected_num_rows, + std::move(expected_offsets), + expected_result_child.release(), + null_count, + std::move(null_mask)); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result->view(), result->view()); @@ -519,12 +529,14 @@ TEST_F(CollectListTest, RollingWindowHonoursMinPeriodsWithDecimal) auto null_mask_iter = cudf::detail::make_counting_transform_iterator( cudf::size_type{0}, [expected_num_rows](auto i) { return i > 0 && i < 4; }); - auto expected_result = cudf::make_lists_column( - expected_num_rows, - std::move(expected_offsets), - expected_result_child.release(), - 3, - cudf::test::detail::make_null_mask(null_mask_iter, null_mask_iter + expected_num_rows)); + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(null_mask_iter, null_mask_iter + expected_num_rows); + + auto expected_result = cudf::make_lists_column(expected_num_rows, + std::move(expected_offsets), + expected_result_child.release(), + null_count, + std::move(null_mask)); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result->view(), result->view()); @@ -1251,13 +1263,13 @@ TYPED_TEST(TypedCollectListTest, GroupedTimeRangeRollingWindowOnStructsWithMinPe .release(); auto expected_validity_iter = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i < 5; }); - auto expected_null_mask = + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(expected_validity_iter, expected_validity_iter + 9); auto expected_result = cudf::make_lists_column(9, std::move(expected_offsets_column), std::move(expected_structs_column), - 4, - std::move(expected_null_mask)); + null_count, + std::move(null_mask)); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result->view(), result->view()); @@ -1599,12 +1611,14 @@ TEST_F(CollectSetTest, RollingWindowHonoursMinPeriodsWithDecimal) cudf::size_type{0}, [expected_num_rows](auto i) { return i != 0 && i != (expected_num_rows - 1); }); - auto expected_result = cudf::make_lists_column( - expected_num_rows, - std::move(expected_offsets), - expected_result_child.release(), - 2, - cudf::test::detail::make_null_mask(null_mask_iter, null_mask_iter + expected_num_rows)); + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(null_mask_iter, null_mask_iter + expected_num_rows); + + auto expected_result = cudf::make_lists_column(expected_num_rows, + std::move(expected_offsets), + expected_result_child.release(), + null_count, + std::move(null_mask)); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result->view(), result->view()); @@ -1642,12 +1656,14 @@ TEST_F(CollectSetTest, RollingWindowHonoursMinPeriodsWithDecimal) auto null_mask_iter = cudf::detail::make_counting_transform_iterator( cudf::size_type{0}, [expected_num_rows](auto i) { return i > 0 && i < 4; }); - auto expected_result = cudf::make_lists_column( - expected_num_rows, - std::move(expected_offsets), - expected_result_child.release(), - 3, - cudf::test::detail::make_null_mask(null_mask_iter, null_mask_iter + expected_num_rows)); + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(null_mask_iter, null_mask_iter + expected_num_rows); + + auto expected_result = cudf::make_lists_column(expected_num_rows, + std::move(expected_offsets), + expected_result_child.release(), + null_count, + std::move(null_mask)); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result->view(), result->view()); diff --git a/cpp/tests/sort/sort_test.cpp b/cpp/tests/sort/sort_test.cpp index 7c37523fae2..ddb1b0209f2 100644 --- a/cpp/tests/sort/sort_test.cpp +++ b/cpp/tests/sort/sort_test.cpp @@ -311,8 +311,9 @@ TYPED_TEST(Sort, WithNullableStructColumn) auto make_struct = [&](std::vector> child_cols, std::vector nulls) { cudf::test::structs_column_wrapper struct_col(std::move(child_cols)); - auto struct_ = struct_col.release(); - struct_->set_null_mask(cudf::test::detail::make_null_mask(nulls.begin(), nulls.end())); + auto struct_ = struct_col.release(); + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(nulls.begin(), nulls.end()); + struct_->set_null_mask(std::move(null_mask), null_count); return struct_; }; diff --git a/cpp/tests/stream_compaction/distinct_tests.cpp b/cpp/tests/stream_compaction/distinct_tests.cpp index 3622f6400cb..0b29d9c075e 100644 --- a/cpp/tests/stream_compaction/distinct_tests.cpp +++ b/cpp/tests/stream_compaction/distinct_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -881,14 +881,16 @@ TEST_F(DistinctKeepAny, ListsOfStructs) return structs_col{{child1, child2}, nulls_at({0, 1, 2, 3, 4})}; }(); - auto const offsets = int32s_col{0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 8, 10, 12, 14, 15, 16, 17, 18}; - auto const null_it = nulls_at({2, 3}); - auto const nullmask_buf = cudf::test::detail::make_null_mask(null_it, null_it + 17); - auto const keys = cudf::column_view(cudf::data_type(cudf::type_id::LIST), + auto const offsets = int32s_col{0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 8, 10, 12, 14, 15, 16, 17, 18}; + auto const null_it = nulls_at({2, 3}); + + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(null_it, null_it + 17); + + auto const keys = cudf::column_view(cudf::data_type(cudf::type_id::LIST), 17, nullptr, - static_cast(nullmask_buf.data()), - cudf::UNKNOWN_NULL_COUNT, + static_cast(null_mask.data()), + null_count, 0, {offsets, structs}); @@ -965,14 +967,16 @@ TEST_F(DistinctKeepFirstLastNone, ListsOfStructs) return structs_col{{child1, child2}, nulls_at({0, 1, 2, 3, 4})}; }(); - auto const offsets = int32s_col{0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 8, 10, 12, 14, 15, 16, 17, 18}; - auto const null_it = nulls_at({2, 3}); - auto const nullmask_buf = cudf::test::detail::make_null_mask(null_it, null_it + 17); - auto const keys = cudf::column_view(cudf::data_type(cudf::type_id::LIST), + auto const offsets = int32s_col{0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 8, 10, 12, 14, 15, 16, 17, 18}; + auto const null_it = nulls_at({2, 3}); + + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(null_it, null_it + 17); + + auto const keys = cudf::column_view(cudf::data_type(cudf::type_id::LIST), 17, nullptr, - static_cast(nullmask_buf.data()), - cudf::UNKNOWN_NULL_COUNT, + static_cast(null_mask.data()), + null_count, 0, {offsets, structs}); @@ -1059,14 +1063,16 @@ TEST_F(DistinctKeepAny, SlicedListsOfStructs) return structs_col{{child1, child2}, nulls_at({0, 1, 2, 3, 4})}; }(); - auto const offsets = int32s_col{0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 8, 10, 12, 14, 15, 16, 17, 18}; - auto const null_it = nulls_at({2, 3}); - auto const nullmask_buf = cudf::test::detail::make_null_mask(null_it, null_it + 17); - auto const keys = cudf::column_view(cudf::data_type(cudf::type_id::LIST), + auto const offsets = int32s_col{0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 8, 10, 12, 14, 15, 16, 17, 18}; + auto const null_it = nulls_at({2, 3}); + + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(null_it, null_it + 17); + + auto const keys = cudf::column_view(cudf::data_type(cudf::type_id::LIST), 17, nullptr, - static_cast(nullmask_buf.data()), - cudf::UNKNOWN_NULL_COUNT, + static_cast(null_mask.data()), + null_count, 0, {offsets, structs}); @@ -1113,27 +1119,25 @@ TEST_F(DistinctKeepAny, ListsOfEmptyStructs) // 12. [{}, {}] auto const structs_null_it = nulls_at({0, 1, 2, 3, 4, 5, 6, 7}); - auto const structs_nullmask_buf = + auto [structs_null_mask, structs_null_count] = cudf::test::detail::make_null_mask(structs_null_it, structs_null_it + 14); auto const structs = cudf::column_view(cudf::data_type(cudf::type_id::STRUCT), 14, nullptr, - static_cast(structs_nullmask_buf.data()), - cudf::UNKNOWN_NULL_COUNT, - 0, - {}); + static_cast(structs_null_mask.data()), + structs_null_count); auto const offsets = int32s_col{0, 0, 0, 0, 0, 2, 4, 6, 7, 8, 9, 10, 12, 14}; auto const lists_null_it = nulls_at({2, 3}); - auto const lists_nullmask_buf = + auto [lists_null_mask, lists_null_count] = cudf::test::detail::make_null_mask(lists_null_it, lists_null_it + 13); auto const keys = cudf::column_view(cudf::data_type(cudf::type_id::LIST), 13, nullptr, - static_cast(lists_nullmask_buf.data()), - cudf::UNKNOWN_NULL_COUNT, + static_cast(lists_null_mask.data()), + lists_null_count, 0, {offsets, structs}); diff --git a/cpp/tests/stream_compaction/unique_tests.cpp b/cpp/tests/stream_compaction/unique_tests.cpp index 5191f51bbb7..afa932f07e1 100644 --- a/cpp/tests/stream_compaction/unique_tests.cpp +++ b/cpp/tests/stream_compaction/unique_tests.cpp @@ -467,14 +467,15 @@ TEST_F(Unique, ListsOfStructsKeepAny) return structs_col{{child1, child2}, nulls_at({0, 1, 2, 3, 4})}; }(); - auto const offsets = int32s_col{0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 8, 10, 12, 14, 15, 16, 17, 18}; - auto const null_it = nulls_at({2, 3}); - auto const nullmask_buf = cudf::test::detail::make_null_mask(null_it, null_it + 17); - auto const keys = cudf::column_view(cudf::data_type(cudf::type_id::LIST), + auto const offsets = int32s_col{0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 8, 10, 12, 14, 15, 16, 17, 18}; + auto const null_it = nulls_at({2, 3}); + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(null_it, null_it + 17); + + auto const keys = cudf::column_view(cudf::data_type(cudf::type_id::LIST), 17, nullptr, - static_cast(nullmask_buf.data()), - cudf::UNKNOWN_NULL_COUNT, + static_cast(null_mask.data()), + null_count, 0, {offsets, structs}); @@ -549,14 +550,15 @@ TEST_F(Unique, ListsOfStructsKeepFirstLastNone) return structs_col{{child1, child2}, nulls_at({0, 1, 2, 3, 4})}; }(); - auto const offsets = int32s_col{0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 8, 10, 12, 14, 15, 16, 17, 18}; - auto const null_it = nulls_at({2, 3}); - auto const nullmask_buf = cudf::test::detail::make_null_mask(null_it, null_it + 17); - auto const keys = cudf::column_view(cudf::data_type(cudf::type_id::LIST), + auto const offsets = int32s_col{0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 8, 10, 12, 14, 15, 16, 17, 18}; + auto const null_it = nulls_at({2, 3}); + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(null_it, null_it + 17); + + auto const keys = cudf::column_view(cudf::data_type(cudf::type_id::LIST), 17, nullptr, - static_cast(nullmask_buf.data()), - cudf::UNKNOWN_NULL_COUNT, + static_cast(null_mask.data()), + null_count, 0, {offsets, structs}); @@ -609,27 +611,25 @@ TEST_F(Unique, ListsOfEmptyStructsKeepAny) // 12. [{}, {}] auto const structs_null_it = nulls_at({0, 1, 2, 3, 4, 5, 6, 7}); - auto const structs_nullmask_buf = + auto [structs_null_mask, structs_null_count] = cudf::test::detail::make_null_mask(structs_null_it, structs_null_it + 14); auto const structs = cudf::column_view(cudf::data_type(cudf::type_id::STRUCT), 14, nullptr, - static_cast(structs_nullmask_buf.data()), - cudf::UNKNOWN_NULL_COUNT, - 0, - {}); + static_cast(structs_null_mask.data()), + structs_null_count); auto const offsets = int32s_col{0, 0, 0, 0, 0, 2, 4, 6, 7, 8, 9, 10, 12, 14}; auto const lists_null_it = nulls_at({2, 3}); - auto const lists_nullmask_buf = + auto [lists_null_mask, lists_null_count] = cudf::test::detail::make_null_mask(lists_null_it, lists_null_it + 13); auto const keys = cudf::column_view(cudf::data_type(cudf::type_id::LIST), 13, nullptr, - static_cast(lists_nullmask_buf.data()), - cudf::UNKNOWN_NULL_COUNT, + static_cast(lists_null_mask.data()), + lists_null_count, 0, {offsets, structs}); diff --git a/cpp/tests/structs/structs_column_tests.cpp b/cpp/tests/structs/structs_column_tests.cpp index 54158e486f3..981b5d426f1 100644 --- a/cpp/tests/structs/structs_column_tests.cpp +++ b/cpp/tests/structs/structs_column_tests.cpp @@ -447,12 +447,14 @@ TYPED_TEST(TypedStructColumnWrapperTest, ListOfStructOfList) auto list_of_struct_of_list_validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 3; }); + auto [null_mask, null_count] = + detail::make_null_mask(list_of_struct_of_list_validity, list_of_struct_of_list_validity + 5); auto list_of_struct_of_list = cudf::make_lists_column( 5, std::move(fixed_width_column_wrapper{0, 2, 4, 6, 8, 10}.release()), std::move(struct_of_lists_col), - cudf::UNKNOWN_NULL_COUNT, - detail::make_null_mask(list_of_struct_of_list_validity, list_of_struct_of_list_validity + 5)); + null_count, + std::move(null_mask)); // Compare with expected values. @@ -465,12 +467,14 @@ TYPED_TEST(TypedStructColumnWrapperTest, ListOfStructOfList) CUDF_TEST_EXPECT_COLUMNS_EQUAL(cudf::lists_column_view(*list_of_struct_of_list).child(), *expected_level2_struct); + std::tie(null_mask, null_count) = + detail::make_null_mask(list_of_struct_of_list_validity, list_of_struct_of_list_validity + 5); auto expected_level3_list = cudf::make_lists_column( 5, std::move(fixed_width_column_wrapper{0, 0, 2, 4, 4, 6}.release()), std::move(expected_level2_struct), - cudf::UNKNOWN_NULL_COUNT, - detail::make_null_mask(list_of_struct_of_list_validity, list_of_struct_of_list_validity + 5)); + null_count, + std::move(null_mask)); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*list_of_struct_of_list, *expected_level3_list); } @@ -493,12 +497,14 @@ TYPED_TEST(TypedStructColumnWrapperTest, StructOfListOfStruct) auto list_validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 3; }); + auto [null_mask, null_count] = detail::make_null_mask(list_validity, list_validity + 5); + auto lists_col = cudf::make_lists_column( 5, std::move(fixed_width_column_wrapper{0, 2, 4, 6, 8, 10}.release()), std::move(structs_col), - cudf::UNKNOWN_NULL_COUNT, - detail::make_null_mask(list_validity, list_validity + 5)); + null_count, + std::move(null_mask)); std::vector> cols; cols.push_back(std::move(lists_col)); @@ -512,12 +518,14 @@ TYPED_TEST(TypedStructColumnWrapperTest, StructOfListOfStruct) auto expected_structs_col = structs_column_wrapper{{expected_ints_col}, {1, 1, 1, 1, 1, 1, 0, 0, 0, 0}}.release(); + std::tie(null_mask, null_count) = detail::make_null_mask(list_validity, list_validity + 5); + auto expected_lists_col = cudf::make_lists_column( 5, std::move(fixed_width_column_wrapper{0, 2, 4, 6, 8, 10}.release()), std::move(expected_structs_col), - cudf::UNKNOWN_NULL_COUNT, - detail::make_null_mask(list_validity, list_validity + 5)); + null_count, + std::move(null_mask)); // Test that the lists child column is as expected. CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected_lists_col, struct_of_list_of_struct->child(0)); @@ -625,9 +633,11 @@ TEST_F(StructColumnWrapperTest, TestStructsColumnWithEmptyChild) int num_rows{empty_col->size()}; vector_of_columns cols; cols.push_back(std::move(empty_col)); - auto mask_vec = std::vector{true, false, false}; - auto mask = cudf::test::detail::make_null_mask(mask_vec.begin(), mask_vec.end()); - auto structs_col = cudf::make_structs_column(num_rows, std::move(cols), 2, std::move(mask)); + auto mask_vec = std::vector{true, false, false}; + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(mask_vec.begin(), mask_vec.end()); + auto structs_col = + cudf::make_structs_column(num_rows, std::move(cols), null_count, std::move(null_mask)); EXPECT_NO_THROW(structs_col->view()); } diff --git a/cpp/tests/transform/bools_to_mask_test.cpp b/cpp/tests/transform/bools_to_mask_test.cpp index 798f2b4935e..b7950052cf9 100644 --- a/cpp/tests/transform/bools_to_mask_test.cpp +++ b/cpp/tests/transform/bools_to_mask_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,9 +38,9 @@ struct MaskToNullTest : public cudf::test::BaseFixture { cudf::test::fixed_width_column_wrapper expected( sample, sample + input.size(), input.begin()); - auto got_mask = cudf::bools_to_mask(input_column); + auto [null_mask, null_count] = cudf::bools_to_mask(input_column); cudf::column got_column(expected); - got_column.set_null_mask(std::move(*(got_mask.first))); + got_column.set_null_mask(std::move(*null_mask), null_count); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got_column.view()); } @@ -53,9 +53,9 @@ struct MaskToNullTest : public cudf::test::BaseFixture { cudf::test::fixed_width_column_wrapper expected( sample, sample + input.size(), input.begin()); - auto got_mask = cudf::bools_to_mask(input_column); + auto [null_mask, null_count] = cudf::bools_to_mask(input_column); cudf::column got_column(expected); - got_column.set_null_mask(std::move(*(got_mask.first))); + got_column.set_null_mask(std::move(*null_mask), null_count); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got_column.view()); } diff --git a/cpp/tests/transform/row_bit_count_test.cu b/cpp/tests/transform/row_bit_count_test.cu index 615e948f545..15577469331 100644 --- a/cpp/tests/transform/row_bit_count_test.cu +++ b/cpp/tests/transform/row_bit_count_test.cu @@ -145,12 +145,10 @@ TYPED_TEST(RowBitCountTyped, ListsWithNulls) {1, 1, 1, 0, 1, 1, 0, 1, 0}}; cudf::test::fixed_width_column_wrapper inner_offsets{0, 2, 5, 6, 9, 9}; std::vector inner_list_validity{1, 1, 1, 1, 0}; + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(inner_list_validity.begin(), inner_list_validity.end()); auto inner_list = cudf::make_lists_column( - 5, - inner_offsets.release(), - values.release(), - 1, - cudf::test::detail::make_null_mask(inner_list_validity.begin(), inner_list_validity.end())); + 5, inner_offsets.release(), values.release(), null_count, std::move(null_mask)); cudf::test::fixed_width_column_wrapper outer_offsets{0, 2, 2, 3, 5}; auto list = cudf::make_lists_column(4, outer_offsets.release(), std::move(inner_list), 0, {}); diff --git a/cpp/tests/utilities_tests/column_utilities_tests.cpp b/cpp/tests/utilities_tests/column_utilities_tests.cpp index 60f2ac768ec..236483f45fa 100644 --- a/cpp/tests/utilities_tests/column_utilities_tests.cpp +++ b/cpp/tests/utilities_tests/column_utilities_tests.cpp @@ -111,7 +111,7 @@ TYPED_TEST(ColumnUtilitiesTest, NullableToHostWithOffset) EXPECT_TRUE(std::equal(expected_data.begin(), expected_data.end(), host_data.first.begin())); - auto masks = cudf::test::detail::make_null_mask_vector(valid + split, valid + size); + auto masks = std::get<0>(cudf::test::detail::make_null_mask_vector(valid + split, valid + size)); EXPECT_TRUE(cudf::test::validate_host_masks(masks, host_data.second, expected_data.size())); } @@ -132,7 +132,7 @@ TYPED_TEST(ColumnUtilitiesTest, NullableToHostAllValid) EXPECT_TRUE(std::equal(data.begin(), data.end(), host_data.first.begin())); - auto masks = cudf::test::detail::make_null_mask_vector(all_valid, all_valid + size); + auto masks = std::get<0>(cudf::test::detail::make_null_mask_vector(all_valid, all_valid + size)); EXPECT_TRUE(cudf::test::validate_host_masks(masks, host_data.second, size)); } @@ -425,19 +425,20 @@ TEST_F(ColumnUtilitiesListsTest, DifferentPhysicalStructureBeforeConstruction) cudf::test::fixed_width_column_wrapper c0_offsets{0, 3, 6, 8, 11, 14, 16, 19}; cudf::test::fixed_width_column_wrapper c0_data{ 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7}; - auto c0 = make_lists_column(7, - c0_offsets.release(), - c0_data.release(), - 5, - cudf::test::detail::make_null_mask(valids.begin(), valids.end())); + + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(valids.begin(), valids.end()); + + auto c0 = make_lists_column( + 7, c0_offsets.release(), c0_data.release(), null_count, std::move(null_mask)); cudf::test::fixed_width_column_wrapper c1_offsets{0, 0, 0, 2, 2, 5, 5, 5}; cudf::test::fixed_width_column_wrapper c1_data{3, 3, 5, 5, 5}; - auto c1 = make_lists_column(7, - c1_offsets.release(), - c1_data.release(), - 5, - cudf::test::detail::make_null_mask(valids.begin(), valids.end())); + auto c1 = make_lists_column( + 7, + c1_offsets.release(), + c1_data.release(), + null_count, + std::get<0>(cudf::test::detail::make_null_mask(valids.begin(), valids.end()))); // properties CUDF_TEST_EXPECT_COLUMN_PROPERTIES_EQUAL(*c0, *c1); @@ -458,18 +459,16 @@ TEST_F(ColumnUtilitiesListsTest, DifferentPhysicalStructureBeforeConstruction) 1, 1, 10, 20, 30, 1, 1, 40, 50, 60, 70, 80, 90, 100}; cudf::test::structs_column_wrapper c0_l2_data({c0_l3_ints, c0_l3_floats}); std::vector c0_l2_valids = {1, 1, 1, 0, 0, 1, 1}; - auto c0_l2 = make_lists_column( - 7, - c0_l2_offsets.release(), - c0_l2_data.release(), - 2, - cudf::test::detail::make_null_mask(c0_l2_valids.begin(), c0_l2_valids.end())); + + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(c0_l2_valids.begin(), c0_l2_valids.end()); + auto c0_l2 = make_lists_column( + 7, c0_l2_offsets.release(), c0_l2_data.release(), null_count, std::move(null_mask)); + + std::tie(null_mask, null_count) = + cudf::test::detail::make_null_mask(level1_valids.begin(), level1_valids.end()); auto c0 = make_lists_column( - 7, - c0_l1_offsets.release(), - std::move(c0_l2), - 5, - cudf::test::detail::make_null_mask(level1_valids.begin(), level1_valids.end())); + 7, c0_l1_offsets.release(), std::move(c0_l2), null_count, std::move(null_mask)); cudf::test::fixed_width_column_wrapper c1_l1_offsets{0, 0, 0, 2, 2, 5, 5, 5}; cudf::test::fixed_width_column_wrapper c1_l2_offsets{0, 3, 3, 3, 6, 10}; @@ -478,18 +477,16 @@ TEST_F(ColumnUtilitiesListsTest, DifferentPhysicalStructureBeforeConstruction) 10, 20, 30, 40, 50, 60, 70, 80, 90, 100}; cudf::test::structs_column_wrapper c1_l2_data({c1_l3_ints, c1_l3_floats}); std::vector c1_l2_valids = {1, 0, 0, 1, 1}; - auto c1_l2 = make_lists_column( - 5, - c1_l2_offsets.release(), - c1_l2_data.release(), - 2, - cudf::test::detail::make_null_mask(c1_l2_valids.begin(), c1_l2_valids.end())); + + std::tie(null_mask, null_count) = + cudf::test::detail::make_null_mask(c1_l2_valids.begin(), c1_l2_valids.end()); + auto c1_l2 = make_lists_column( + 5, c1_l2_offsets.release(), c1_l2_data.release(), null_count, std::move(null_mask)); + + std::tie(null_mask, null_count) = + cudf::test::detail::make_null_mask(level1_valids.begin(), level1_valids.end()); auto c1 = make_lists_column( - 7, - c1_l1_offsets.release(), - std::move(c1_l2), - 5, - cudf::test::detail::make_null_mask(level1_valids.begin(), level1_valids.end())); + 7, c1_l1_offsets.release(), std::move(c1_l2), null_count, std::move(null_mask)); // properties CUDF_TEST_EXPECT_COLUMN_PROPERTIES_EQUAL(*c0, *c1); diff --git a/cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp b/cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp index 387dd4c0714..478e3e21052 100644 --- a/cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp +++ b/cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp @@ -1393,12 +1393,13 @@ TYPED_TEST(ListColumnWrapperTestTyped, ListsOfStructsWithValidity) cudf::test::fixed_width_column_wrapper{0, 2, 4, 8}.release(); auto list_null_mask = {1, 1, 0}; auto num_lists = lists_column_offsets->size() - 1; - auto lists_column = make_lists_column( - num_lists, - std::move(lists_column_offsets), - std::move(struct_column), - 1, - cudf::test::detail::make_null_mask(list_null_mask.begin(), list_null_mask.end())); + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(list_null_mask.begin(), list_null_mask.end()); + auto lists_column = cudf::make_lists_column(num_lists, + std::move(lists_column_offsets), + std::move(struct_column), + null_count, + std::move(null_mask)); // Check if child column is unchanged. @@ -1465,24 +1466,27 @@ TYPED_TEST(ListColumnWrapperTestTyped, ListsOfListsOfStructsWithValidity) cudf::test::fixed_width_column_wrapper{0, 2, 4, 8}.release(); auto num_lists = lists_column_offsets->size() - 1; auto list_null_mask = {1, 1, 0}; - auto lists_column = make_lists_column( - num_lists, - std::move(lists_column_offsets), - std::move(struct_column), - 1, - cudf::test::detail::make_null_mask(list_null_mask.begin(), list_null_mask.end())); + auto [null_mask, null_count] = + cudf::test::detail::make_null_mask(list_null_mask.begin(), list_null_mask.end()); + auto lists_column = cudf::make_lists_column(num_lists, + std::move(lists_column_offsets), + std::move(struct_column), + null_count, + std::move(null_mask)); auto lists_of_lists_column_offsets = cudf::test::fixed_width_column_wrapper{0, 2, 3}.release(); auto num_lists_of_lists = lists_of_lists_column_offsets->size() - 1; auto list_of_lists_null_mask = {1, 0}; + + std::tie(null_mask, null_count) = cudf::test::detail::make_null_mask( + list_of_lists_null_mask.begin(), list_of_lists_null_mask.end()); auto lists_of_lists_of_structs_column = - make_lists_column(num_lists_of_lists, - std::move(lists_of_lists_column_offsets), - std::move(lists_column), - 1, - cudf::test::detail::make_null_mask(list_of_lists_null_mask.begin(), - list_of_lists_null_mask.end())); + cudf::make_lists_column(num_lists_of_lists, + std::move(lists_of_lists_column_offsets), + std::move(lists_column), + null_count, + std::move(null_mask)); // Check if child column is unchanged.