diff --git a/cpp/include/cudf/detail/transform.hpp b/cpp/include/cudf/detail/transform.hpp index 215ad50aed6..965fea84860 100644 --- a/cpp/include/cudf/detail/transform.hpp +++ b/cpp/include/cudf/detail/transform.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -100,5 +100,15 @@ std::unique_ptr row_bit_count(table_view const& t, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); +/** + * @copydoc cudf::segmented_row_bit_count + * + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +std::unique_ptr segmented_row_bit_count(table_view const& t, + size_type segment_length, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/transform.hpp index 412fe17ef26..49ec3d7c0d5 100644 --- a/cpp/include/cudf/transform.hpp +++ b/cpp/include/cudf/transform.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -224,5 +224,28 @@ std::unique_ptr row_bit_count( table_view const& t, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Returns an approximate cumulative size in bits of all columns in the `table_view` for + * each segment of rows. + * + * This is similar to counting bit size per row for the input table in `cudf::row_bit_count`, + * except that row sizes are accumulated by segments. + * + * Currently, only fixed-length segments are supported. In case the input table has number of rows + * not divisible by `segment_length`, its last segment is considered as shorter than the others. + * + * @throw std::invalid_argument if the input `segment_length` is non-positive or larger than the + * number of rows in the input table. + * + * @param t The table view to perform the computation on + * @param segment_length The number of rows in each segment for which the total size is computed + * @param mr Device memory resource used to allocate the returned columns' device memory + * @return A 32-bit integer column containing the bit counts for each segment of rows + */ +std::unique_ptr segmented_row_bit_count( + table_view const& t, + size_type segment_length, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** @} */ // end of group } // namespace cudf diff --git a/cpp/src/transform/row_bit_count.cu b/cpp/src/transform/row_bit_count.cu index eda8ec7a463..78bd558501b 100644 --- a/cpp/src/transform/row_bit_count.cu +++ b/cpp/src/transform/row_bit_count.cu @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -31,8 +32,10 @@ #include #include -#include +#include +#include #include +#include namespace cudf { namespace detail { @@ -398,26 +401,32 @@ __device__ size_type row_size_functor::operator()(column_device_vie * @param cols An span of column_device_views representing a column hierarchy * @param info An span of column_info structs corresponding the elements in `cols` * @param output Output span of size (# rows) where per-row bit sizes are stored + * @param segment_length The number of rows in each segment for which the total size is computed * @param max_branch_depth Maximum depth of the span stack needed per-thread */ -CUDF_KERNEL void compute_row_sizes(device_span cols, - device_span info, - device_span output, - size_type max_branch_depth) +CUDF_KERNEL void compute_segment_sizes(device_span cols, + device_span info, + device_span output, + size_type segment_length, + size_type max_branch_depth) { extern __shared__ row_span thread_branch_stacks[]; int const tid = threadIdx.x + blockIdx.x * blockDim.x; - auto const num_rows = output.size(); - if (tid >= num_rows) { return; } + auto const num_segments = static_cast(output.size()); + if (tid >= num_segments) { return; } // my_branch_stack points to the last span prior to branching. a branch occurs only // when we are inside of a list contained within a struct column. row_span* my_branch_stack = thread_branch_stacks + (threadIdx.x * max_branch_depth); size_type branch_depth{0}; - // current row span - always starts at 1 row. - row_span cur_span{tid, tid + 1}; + // current row span - always starts at spanning over `segment_length` rows. + auto const num_rows = cols[0].size(); + auto const get_default_row_span = [=] { + return row_span{tid * segment_length, cuda::std::min((tid + 1) * segment_length, num_rows)}; + }; + auto cur_span = get_default_row_span(); // output size size_type& size = output[tid]; @@ -444,7 +453,7 @@ CUDF_KERNEL void compute_row_sizes(device_span cols, if (info[idx].depth == 0) { branch_depth = 0; last_branch_depth = 0; - cur_span = row_span{tid, tid + 1}; + cur_span = get_default_row_span(); } // add the contributing size of this row @@ -465,17 +474,18 @@ CUDF_KERNEL void compute_row_sizes(device_span cols, } // anonymous namespace -/** - * @copydoc cudf::detail::row_bit_count - * - */ -std::unique_ptr row_bit_count(table_view const& t, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::unique_ptr segmented_row_bit_count(table_view const& t, + size_type segment_length, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { - // no rows + // If there is no rows, segment_length will not be checked. if (t.num_rows() <= 0) { return cudf::make_empty_column(type_id::INT32); } + CUDF_EXPECTS(segment_length >= 1 && segment_length <= t.num_rows(), + "Invalid segment length.", + std::invalid_argument); + // flatten the hierarchy and determine some information about it. std::vector cols; std::vector info; @@ -484,17 +494,28 @@ std::unique_ptr row_bit_count(table_view const& t, CUDF_EXPECTS(info.size() == cols.size(), "Size/info mismatch"); // create output buffer and view - auto output = cudf::make_fixed_width_column( - data_type{type_id::INT32}, t.num_rows(), mask_state::UNALLOCATED, stream, mr); + auto const num_segments = cudf::util::div_rounding_up_safe(t.num_rows(), segment_length); + auto output = cudf::make_fixed_width_column( + data_type{type_id::INT32}, num_segments, mask_state::UNALLOCATED, stream, mr); mutable_column_view mcv = output->mutable_view(); // simple case. if we have no complex types (lists, strings, etc), the per-row size is already // trivially computed if (h_info.complex_type_count <= 0) { - thrust::fill(rmm::exec_policy(stream), - mcv.begin(), - mcv.end(), - h_info.simple_per_row_size); + thrust::tabulate( + rmm::exec_policy_nosync(stream), + mcv.begin(), + mcv.end(), + cuda::proclaim_return_type( + [segment_length, + num_rows = t.num_rows(), + per_row_size = h_info.simple_per_row_size] __device__(size_type const segment_idx) { + // Since the number of rows may not divisible by segment_length, + // the last segment may be shorter than the others. + auto const current_length = + cuda::std::min(segment_length, num_rows - segment_length * segment_idx); + return per_row_size * current_length; + })); return output; } @@ -523,22 +544,34 @@ std::unique_ptr row_bit_count(table_view const& t, // should we be aborting if we reach some extremely small block size, or just if we hit 0? CUDF_EXPECTS(block_size > 0, "Encountered a column hierarchy too complex for row_bit_count"); - cudf::detail::grid_1d grid{t.num_rows(), block_size, 1}; - compute_row_sizes<<>>( + cudf::detail::grid_1d grid{num_segments, block_size, 1}; + compute_segment_sizes<<>>( {std::get<1>(d_cols), cols.size()}, {d_info.data(), info.size()}, - {mcv.data(), static_cast(t.num_rows())}, + {mcv.data(), static_cast(mcv.size())}, + segment_length, h_info.max_branch_depth); return output; } +std::unique_ptr row_bit_count(table_view const& t, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + return segmented_row_bit_count(t, 1, stream, mr); +} + } // namespace detail -/** - * @copydoc cudf::row_bit_count - * - */ +std::unique_ptr segmented_row_bit_count(table_view const& t, + size_type segment_length, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::segmented_row_bit_count(t, segment_length, cudf::get_default_stream(), mr); +} + std::unique_ptr row_bit_count(table_view const& t, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 3e377b07eee..93443b04bd5 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -259,6 +259,7 @@ ConfigureTest( transform/mask_to_bools_test.cpp transform/bools_to_mask_test.cpp transform/row_bit_count_test.cu + transform/segmented_row_bit_count_test.cu transform/one_hot_encode_tests.cpp ) diff --git a/cpp/tests/transform/row_bit_count_test.cu b/cpp/tests/transform/row_bit_count_test.cu index 236407e62f3..01a042130d6 100644 --- a/cpp/tests/transform/row_bit_count_test.cu +++ b/cpp/tests/transform/row_bit_count_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,6 +35,148 @@ #include +namespace row_bit_count_test { + +template +std::pair, std::unique_ptr> build_list_column() +{ + using LCW = cudf::test::lists_column_wrapper; + constexpr cudf::size_type type_size = sizeof(cudf::device_storage_type_t) * CHAR_BIT; + + // { + // {{1, 2}, {3, 4, 5}}, + // {{}}, + // {LCW{10}}, + // {{6, 7, 8}, {9}}, + // {{-1, -2}, {-3, -4}}, + // {{-5, -6, -7}, {-8, -9}} + // } + cudf::test::fixed_width_column_wrapper values{ + 1, 2, 3, 4, 5, 10, 6, 7, 8, 9, -1, -2, -3, -4, -5, -6, -7, -8, -9}; + cudf::test::fixed_width_column_wrapper inner_offsets{ + 0, 2, 5, 6, 9, 10, 12, 14, 17, 19}; + auto inner_list = cudf::make_lists_column(9, inner_offsets.release(), values.release(), 0, {}); + cudf::test::fixed_width_column_wrapper outer_offsets{0, 2, 2, 3, 5, 7, 9}; + auto list = cudf::make_lists_column(6, outer_offsets.release(), std::move(inner_list), 0, {}); + + // expected size = (num rows at level 1 + num_rows at level 2) + # values in the leaf + cudf::test::fixed_width_column_wrapper expected{ + ((4 + 8) * CHAR_BIT) + (type_size * 5), + ((4 + 0) * CHAR_BIT) + (type_size * 0), + ((4 + 4) * CHAR_BIT) + (type_size * 1), + ((4 + 8) * CHAR_BIT) + (type_size * 4), + ((4 + 8) * CHAR_BIT) + (type_size * 4), + ((4 + 8) * CHAR_BIT) + (type_size * 5)}; + + return {std::move(list), expected.release()}; +} + +std::pair, std::unique_ptr> build_struct_column() +{ + std::vector struct_validity{0, 1, 1, 1, 1, 0}; + std::vector strings{"abc", "def", "", "z", "bananas", "daïs"}; + + cudf::test::fixed_width_column_wrapper col0{0, 1, 2, 3, 4, 5}; + cudf::test::fixed_width_column_wrapper col1{{8, 9, 10, 11, 12, 13}, {1, 0, 1, 1, 1, 1}}; + cudf::test::strings_column_wrapper col2(strings.begin(), strings.end()); + + // creating a struct column will cause all child columns to be promoted to have validity + cudf::test::structs_column_wrapper struct_col({col0, col1, col2}, struct_validity); + + // expect (1 offset (4 bytes) + (length of string if row is valid) + 1 validity bit) + + // (1 float + 1 validity bit) + + // (1 int16_t + 1 validity bit) + + // (1 validity bit) + cudf::test::fixed_width_column_wrapper expected_sizes{84, 108, 84, 92, 140, 84}; + + return {struct_col.release(), expected_sizes.release()}; +} + +std::unique_ptr build_nested_column1(std::vector const& struct_validity) +{ + // tests the "branching" case -> list ...>>> + + // List, float, int16> + + // Inner list column + cudf::test::lists_column_wrapper list{{1, 2, 3, 4, 5}, + {6, 7, 8}, + {33, 34, 35, 36, 37, 38, 39}, + {-1, -2}, + {-10, -11, -1, -20}, + {40, 41, 42}, + {100, 200, 300}, + {-100, -200, -300}}; + + // floats + std::vector ages{5, 10, 15, 20, 4, 75, 16, -16}; + std::vector ages_validity = {1, 1, 1, 1, 0, 1, 0, 1}; + auto ages_column = + cudf::test::fixed_width_column_wrapper(ages.begin(), ages.end(), ages_validity.begin()); + + // int16 values + std::vector vals{-1, -2, -3, 1, 2, 3, 8, 9}; + auto i16_column = cudf::test::fixed_width_column_wrapper(vals.begin(), vals.end()); + + // Assemble struct column + auto struct_column = + cudf::test::structs_column_wrapper({list, ages_column, i16_column}, struct_validity); + + // wrap in a list + std::vector outer_offsets{0, 1, 1, 3, 6, 7, 8}; + cudf::test::fixed_width_column_wrapper outer_offsets_col(outer_offsets.begin(), + outer_offsets.end()); + auto const size = static_cast(outer_offsets_col).size() - 1; + + // Each struct (list child) has size: + // (1 offset (4 bytes) + (list size if row is valid) + 1 validity bit) + + // (1 float + 1 validity bit) + + // (1 int16_t + 1 validity bit) + + // (1 validity bit) + // Each top level list has size: + // 1 offset (4 bytes) + (list size if row is valid). + + return cudf::make_lists_column(static_cast(size), + outer_offsets_col.release(), + struct_column.release(), + 0, + rmm::device_buffer{}); +} + +std::unique_ptr build_nested_column2(std::vector const& struct_validity) +{ + // List>, Struct>> + + // Inner list column + // clang-format off + cudf::test::lists_column_wrapper list{ + {{1, 2, 3, 4, 5}, {2, 3}}, + {{6, 7, 8}, {8, 9}}, + {{1, 2}, {3, 4, 5}, {33, 34, 35, 36, 37, 38, 39}}}; + // clang-format on + + // Inner struct + std::vector vals{-1, -2, -3}; + auto i16_column = cudf::test::fixed_width_column_wrapper(vals.begin(), vals.end()); + auto inner_struct = cudf::test::structs_column_wrapper({i16_column}); + + // outer struct + auto outer_struct = cudf::test::structs_column_wrapper({list, inner_struct}, struct_validity); + + // wrap in a list + std::vector outer_offsets{0, 1, 1, 3}; + cudf::test::fixed_width_column_wrapper outer_offsets_col(outer_offsets.begin(), + outer_offsets.end()); + auto const size = static_cast(outer_offsets_col).size() - 1; + return cudf::make_lists_column(static_cast(size), + outer_offsets_col.release(), + outer_struct.release(), + 0, + rmm::device_buffer{}); +} + +} // namespace row_bit_count_test + template struct RowBitCountTyped : public cudf::test::BaseFixture {}; @@ -82,45 +224,11 @@ TYPED_TEST(RowBitCountTyped, SimpleTypesWithNulls) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected, *result); } -template -std::pair, std::unique_ptr> build_list_column() -{ - using LCW = cudf::test::lists_column_wrapper; - constexpr cudf::size_type type_size = sizeof(cudf::device_storage_type_t) * CHAR_BIT; - - // { - // {{1, 2}, {3, 4, 5}}, - // {{}}, - // {LCW{10}}, - // {{6, 7, 8}, {9}}, - // {{-1, -2}, {-3, -4}}, - // {{-5, -6, -7}, {-8, -9}} - // } - cudf::test::fixed_width_column_wrapper values{ - 1, 2, 3, 4, 5, 10, 6, 7, 8, 9, -1, -2, -3, -4, -5, -6, -7, -8, -9}; - cudf::test::fixed_width_column_wrapper inner_offsets{ - 0, 2, 5, 6, 9, 10, 12, 14, 17, 19}; - auto inner_list = cudf::make_lists_column(9, inner_offsets.release(), values.release(), 0, {}); - cudf::test::fixed_width_column_wrapper outer_offsets{0, 2, 2, 3, 5, 7, 9}; - auto list = cudf::make_lists_column(6, outer_offsets.release(), std::move(inner_list), 0, {}); - - // expected size = (num rows at level 1 + num_rows at level 2) + # values in the leaf - cudf::test::fixed_width_column_wrapper expected{ - ((4 + 8) * CHAR_BIT) + (type_size * 5), - ((4 + 0) * CHAR_BIT) + (type_size * 0), - ((4 + 4) * CHAR_BIT) + (type_size * 1), - ((4 + 8) * CHAR_BIT) + (type_size * 4), - ((4 + 8) * CHAR_BIT) + (type_size * 4), - ((4 + 8) * CHAR_BIT) + (type_size * 5)}; - - return {std::move(list), expected.release()}; -} - TYPED_TEST(RowBitCountTyped, Lists) { using T = TypeParam; - auto [col, expected_sizes] = build_list_column(); + auto [col, expected_sizes] = row_bit_count_test::build_list_column(); cudf::table_view t({*col}); auto result = cudf::row_bit_count(t); @@ -272,27 +380,6 @@ TEST_F(RowBitCount, StructsWithLists_RowsExceedingASingleBlock) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(row_bit_counts->view(), expected_row_bit_counts->view()); } -std::pair, std::unique_ptr> build_struct_column() -{ - std::vector struct_validity{0, 1, 1, 1, 1, 0}; - std::vector strings{"abc", "def", "", "z", "bananas", "daïs"}; - - cudf::test::fixed_width_column_wrapper col0{0, 1, 2, 3, 4, 5}; - cudf::test::fixed_width_column_wrapper col1{{8, 9, 10, 11, 12, 13}, {1, 0, 1, 1, 1, 1}}; - cudf::test::strings_column_wrapper col2(strings.begin(), strings.end()); - - // creating a struct column will cause all child columns to be promoted to have validity - cudf::test::structs_column_wrapper struct_col({col0, col1, col2}, struct_validity); - - // expect (1 offset (4 bytes) + (length of string if row is valid) + 1 validity bit) + - // (1 float + 1 validity bit) + - // (1 int16_t + 1 validity bit) + - // (1 validity bit) - cudf::test::fixed_width_column_wrapper expected_sizes{84, 108, 84, 92, 140, 84}; - - return {struct_col.release(), expected_sizes.release()}; -} - TEST_F(RowBitCount, StructsNoNulls) { std::vector strings{"abc", "daïs", "", "z", "bananas", "warp"}; @@ -319,7 +406,7 @@ TEST_F(RowBitCount, StructsNoNulls) TEST_F(RowBitCount, StructsNulls) { - auto [struct_col, expected_sizes] = build_struct_column(); + auto [struct_col, expected_sizes] = row_bit_count_test::build_struct_column(); cudf::table_view t({*struct_col}); auto result = cudf::row_bit_count(t); @@ -346,101 +433,18 @@ TEST_F(RowBitCount, StructsNested) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); } -std::unique_ptr build_nested_column1(std::vector const& struct_validity) -{ - // tests the "branching" case -> list ...>>> - - // List, float, int16> - - // Inner list column - cudf::test::lists_column_wrapper list{{1, 2, 3, 4, 5}, - {6, 7, 8}, - {33, 34, 35, 36, 37, 38, 39}, - {-1, -2}, - {-10, -11, -1, -20}, - {40, 41, 42}, - {100, 200, 300}, - {-100, -200, -300}}; - - // floats - std::vector ages{5, 10, 15, 20, 4, 75, 16, -16}; - std::vector ages_validity = {1, 1, 1, 1, 0, 1, 0, 1}; - auto ages_column = - cudf::test::fixed_width_column_wrapper(ages.begin(), ages.end(), ages_validity.begin()); - - // int16 values - std::vector vals{-1, -2, -3, 1, 2, 3, 8, 9}; - auto i16_column = cudf::test::fixed_width_column_wrapper(vals.begin(), vals.end()); - - // Assemble struct column - auto struct_column = - cudf::test::structs_column_wrapper({list, ages_column, i16_column}, struct_validity); - - // wrap in a list - std::vector outer_offsets{0, 1, 1, 3, 6, 7, 8}; - cudf::test::fixed_width_column_wrapper outer_offsets_col(outer_offsets.begin(), - outer_offsets.end()); - auto const size = static_cast(outer_offsets_col).size() - 1; - - // Each struct (list child) has size: - // (1 offset (4 bytes) + (list size if row is valid) + 1 validity bit) + - // (1 float + 1 validity bit) + - // (1 int16_t + 1 validity bit) + - // (1 validity bit) - // Each top level list has size: - // 1 offset (4 bytes) + (list size if row is valid). - - return cudf::make_lists_column(static_cast(size), - outer_offsets_col.release(), - struct_column.release(), - 0, - rmm::device_buffer{}); -} - -std::unique_ptr build_nested_column2(std::vector const& struct_validity) -{ - // List>, Struct>> - - // Inner list column - // clang-format off - cudf::test::lists_column_wrapper list{ - {{1, 2, 3, 4, 5}, {2, 3}}, - {{6, 7, 8}, {8, 9}}, - {{1, 2}, {3, 4, 5}, {33, 34, 35, 36, 37, 38, 39}}}; - // clang-format on - - // Inner struct - std::vector vals{-1, -2, -3}; - auto i16_column = cudf::test::fixed_width_column_wrapper(vals.begin(), vals.end()); - auto inner_struct = cudf::test::structs_column_wrapper({i16_column}); - - // outer struct - auto outer_struct = cudf::test::structs_column_wrapper({list, inner_struct}, struct_validity); - - // wrap in a list - std::vector outer_offsets{0, 1, 1, 3}; - cudf::test::fixed_width_column_wrapper outer_offsets_col(outer_offsets.begin(), - outer_offsets.end()); - auto const size = static_cast(outer_offsets_col).size() - 1; - return make_lists_column(static_cast(size), - outer_offsets_col.release(), - outer_struct.release(), - 0, - rmm::device_buffer{}); -} - TEST_F(RowBitCount, NestedTypes) { // List, float, List, int16> { - auto const col_no_nulls = build_nested_column1({1, 1, 1, 1, 1, 1, 1, 1}); + auto const col_no_nulls = row_bit_count_test::build_nested_column1({1, 1, 1, 1, 1, 1, 1, 1}); auto const expected_sizes_no_nulls = cudf::test::fixed_width_column_wrapper{276, 32, 520, 572, 212, 212} .release(); cudf::table_view no_nulls_t({*col_no_nulls}); auto no_nulls_result = cudf::row_bit_count(no_nulls_t); - auto const col_nulls = build_nested_column1({0, 0, 1, 1, 1, 1, 1, 1}); + auto const col_nulls = row_bit_count_test::build_nested_column1({0, 0, 1, 1, 1, 1, 1, 1}); auto const expected_sizes_with_nulls = cudf::test::fixed_width_column_wrapper{116, 32, 424, 572, 212, 212} .release(); @@ -469,11 +473,11 @@ TEST_F(RowBitCount, NestedTypes) // List>, Struct>> { - auto col_no_nulls = build_nested_column2({1, 1, 1}); + auto col_no_nulls = row_bit_count_test::build_nested_column2({1, 1, 1}); cudf::table_view no_nulls_t({*col_no_nulls}); auto no_nulls_result = cudf::row_bit_count(no_nulls_t); - auto col_nulls = build_nested_column2({1, 0, 1}); + auto col_nulls = row_bit_count_test::build_nested_column2({1, 0, 1}); cudf::table_view nulls_t({*col_nulls}); auto nulls_result = cudf::row_bit_count(nulls_t); @@ -597,15 +601,15 @@ struct sum_functor { TEST_F(RowBitCount, Table) { // complex nested column - auto col0 = build_nested_column1({1, 1, 1, 1, 1, 1, 1, 1}); + auto col0 = row_bit_count_test::build_nested_column1({1, 1, 1, 1, 1, 1, 1, 1}); auto col0_sizes = cudf::test::fixed_width_column_wrapper{276, 32, 520, 572, 212, 212}.release(); // struct column - auto [col1, col1_sizes] = build_struct_column(); + auto [col1, col1_sizes] = row_bit_count_test::build_struct_column(); // list column - auto [col2, col2_sizes] = build_list_column(); + auto [col2, col2_sizes] = row_bit_count_test::build_list_column(); cudf::table_view t({*col0, *col1, *col2}); auto result = cudf::row_bit_count(t); diff --git a/cpp/tests/transform/segmented_row_bit_count_test.cu b/cpp/tests/transform/segmented_row_bit_count_test.cu new file mode 100644 index 00000000000..652b9053582 --- /dev/null +++ b/cpp/tests/transform/segmented_row_bit_count_test.cu @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +// Reuse function defined in `row_bit_count_test.cu`. +namespace row_bit_count_test { +template +std::pair, std::unique_ptr> build_list_column(); +std::pair, std::unique_ptr> build_struct_column(); +std::unique_ptr build_nested_column1(std::vector const& struct_validity); +std::unique_ptr build_nested_column2(std::vector const& struct_validity); +} // namespace row_bit_count_test + +namespace { + +// Compute row bit count, then sum up sizes for each segment of rows. +std::pair, std::unique_ptr> +compute_segmented_row_bit_count(cudf::table_view const& input, cudf::size_type segment_length) +{ + // The expected values are computed with the assumption that + // the outputs of `cudf::row_bit_count` are correct. + // This should be fine as they are verified by their own unit tests in `row_bit_count_test.cu`. + auto const row_sizes = cudf::row_bit_count(input); + auto const num_segments = cudf::util::div_rounding_up_safe(row_sizes->size(), segment_length); + auto expected = + cudf::make_fixed_width_column(cudf::data_type{cudf::type_id::INT32}, num_segments); + + thrust::transform( + rmm::exec_policy(cudf::get_default_stream()), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_segments), + expected->mutable_view().begin(), + cuda::proclaim_return_type( + [segment_length, + num_segments, + num_rows = row_sizes->size(), + d_sizes = row_sizes->view().begin()] __device__(auto const segment_idx) { + // Since the number of rows may not divisible by segment_length, + // the last segment may be shorter than the others. + auto const size_begin = d_sizes + segment_idx * segment_length; + auto const size_end = std::min(size_begin + segment_length, d_sizes + num_rows); + return thrust::reduce(thrust::seq, size_begin, size_end); + })); + + auto actual = cudf::segmented_row_bit_count(input, segment_length); + return {std::move(expected), std::move(actual)}; +} + +} // namespace + +struct SegmentedRowBitCount : public cudf::test::BaseFixture {}; + +TEST_F(SegmentedRowBitCount, Lists) +{ + auto const col = std::get<0>(row_bit_count_test::build_list_column()); + auto const input = cudf::table_view({*col}); + + auto constexpr segment_length = 3; + auto const [expected, actual] = compute_segmented_row_bit_count(input, segment_length); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected, *actual); +} + +TEST_F(SegmentedRowBitCount, StringsWithNulls) +{ + // clang-format off + std::vector const strings { "daïs", "def", "", "z", "bananas", "warp", "", "zing" }; + std::vector const valids { 1, 0, 0, 1, 0, 1, 1, 1 }; + // clang-format on + cudf::test::strings_column_wrapper const col(strings.begin(), strings.end(), valids.begin()); + auto const input = cudf::table_view({col}); + + auto constexpr segment_length = 2; + auto const [expected, actual] = compute_segmented_row_bit_count(input, segment_length); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected, *actual); +} + +TEST_F(SegmentedRowBitCount, StructsWithNulls) +{ + auto const col = std::get<0>(row_bit_count_test::build_struct_column()); + auto const input = cudf::table_view({*col}); + + auto constexpr segment_length = 2; + auto const [expected, actual] = compute_segmented_row_bit_count(input, segment_length); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected, *actual); +} + +TEST_F(SegmentedRowBitCount, NestedTypes) +{ + auto constexpr segment_length = 2; + + { + // List, float, List, int16> + auto const col = row_bit_count_test::build_nested_column1({1, 1, 1, 1, 1, 1, 1, 1}); + auto const input = cudf::table_view({*col}); + auto const [expected, actual] = compute_segmented_row_bit_count(input, segment_length); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected, *actual); + } + { + // List, float, List, int16> + auto const col = row_bit_count_test::build_nested_column1({0, 0, 1, 1, 1, 1, 1, 1}); + auto const input = cudf::table_view({*col}); + auto const [expected, actual] = compute_segmented_row_bit_count(input, segment_length); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected, *actual); + } + + { + // List>, Struct>> + auto const col = row_bit_count_test::build_nested_column2({1, 1, 1}); + auto const input = cudf::table_view({*col}); + auto const [expected, actual] = compute_segmented_row_bit_count(input, segment_length); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected, *actual); + } + { + // List>, Struct>> + auto const col = row_bit_count_test::build_nested_column2({1, 0, 1}); + auto const input = cudf::table_view({*col}); + auto const [expected, actual] = compute_segmented_row_bit_count(input, segment_length); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected, *actual); + } +} + +TEST_F(SegmentedRowBitCount, NestedTypesTable) +{ + auto const col0 = row_bit_count_test::build_nested_column1({1, 1, 1, 1, 1, 1, 1, 1}); + auto const col1 = std::get<0>(row_bit_count_test::build_struct_column()); + auto const col2 = std::get<0>(row_bit_count_test::build_list_column()); + auto const input = cudf::table_view({*col0, *col1, *col2}); + + { + auto const segment_length = 2; + auto const [expected, actual] = compute_segmented_row_bit_count(input, segment_length); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected, *actual); + } + + { + auto const segment_length = 4; + auto const [expected, actual] = compute_segmented_row_bit_count(input, segment_length); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected, *actual); + } + + { + auto const segment_length = 5; + auto const [expected, actual] = compute_segmented_row_bit_count(input, segment_length); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected, *actual); + } +} + +TEST_F(SegmentedRowBitCount, EmptyInput) +{ + { + auto const input = cudf::table_view{}; + { + auto const result = cudf::segmented_row_bit_count(input, 0); + EXPECT_TRUE(result != nullptr && result->size() == 0); + } + { + auto const result = cudf::segmented_row_bit_count(input, 1000); + EXPECT_TRUE(result != nullptr && result->size() == 0); + } + } + + { + auto const strings = cudf::make_empty_column(cudf::type_id::STRING); + auto const ints = cudf::make_empty_column(cudf::type_id::INT32); + auto const input = cudf::table_view{{*strings, *ints}}; + { + auto const result = cudf::segmented_row_bit_count(input, 0); + EXPECT_TRUE(result != nullptr && result->size() == 0); + } + { + auto const result = cudf::segmented_row_bit_count(input, 1000); + EXPECT_TRUE(result != nullptr && result->size() == 0); + } + } +} + +TEST_F(SegmentedRowBitCount, InvalidSegment) +{ + auto const col = cudf::make_fixed_width_column(cudf::data_type{cudf::type_to_id()}, 16); + auto const input = cudf::table_view({*col}); + + EXPECT_NO_THROW(cudf::segmented_row_bit_count(input, 1)); + EXPECT_NO_THROW(cudf::segmented_row_bit_count(input, input.num_rows())); + EXPECT_THROW(cudf::segmented_row_bit_count(input, -1), std::invalid_argument); + EXPECT_THROW(cudf::segmented_row_bit_count(input, 0), std::invalid_argument); + EXPECT_THROW(cudf::segmented_row_bit_count(input, input.num_rows() + 1), std::invalid_argument); + EXPECT_THROW(cudf::segmented_row_bit_count(input, 1000), std::invalid_argument); +} + +TEST_F(SegmentedRowBitCount, EdgeCases) +{ + auto const col0 = row_bit_count_test::build_nested_column1({1, 1, 1, 1, 1, 1, 1, 1}); + auto const col1 = std::get<0>(row_bit_count_test::build_struct_column()); + auto const col2 = std::get<0>(row_bit_count_test::build_list_column()); + auto const input = cudf::table_view({*col0, *col1, *col2}); + + { + auto const segment_length = 1; + auto const [expected, actual] = compute_segmented_row_bit_count(input, segment_length); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected, *actual); + } + + { + EXPECT_EQ(input.num_rows(), 6); + auto const segment_length = 4; // input.num_rows()==6, not divisible by segment_length . + auto const [expected, actual] = compute_segmented_row_bit_count(input, segment_length); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected, *actual); + } + + { + auto const segment_length = input.num_rows(); + auto const [expected, actual] = compute_segmented_row_bit_count(input, segment_length); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected, *actual); + } +}