From 5cea6b5c9b60e71296b493d2366b3d507fdf53bf Mon Sep 17 00:00:00 2001 From: MithunR Date: Thu, 30 Sep 2021 10:19:35 -0700 Subject: [PATCH] Optionally nullify out-of-bounds indices in segmented_gather(). (#9318) The behaviour of `cudf::lists::segmented_gather()` is currently undefined for any index value `i` that falls outside the range `[-n, n)`, where `n` is the number of elements in the list row. This commit adds support to explicitly specify an `out_of_bounds_policy`, like in `cudf::gather()`. The erstwhile behaviour is retained when the bounds policy is set to `DONT_CHECK`. If the bounds policy is specified as `NULLIFY`, then for any index falling outside the range `[-n, n)`, the list element is set to `null`. E.g. ```c++ auto source_column = [{"a", "b", "c", "d"}, {"1", "2", "3", "4"}, {"x", "y", "z"}]; auto gather_map = [{0, -1, 4, -5}, {1, 3, 5}, {}]; auto result = segmented_gather(source_column, gather_map, NULLIFY); result == [{"a", "d", null, null}, {"2", "4", null}, {}]; ``` Authors: - MithunR (https://github.com/mythrocks) Approvers: - Conor Hoekstra (https://github.com/codereport) - Karthikeyan (https://github.com/karthikeyann) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/9318 --- cpp/include/cudf/lists/detail/gather.cuh | 2 + cpp/include/cudf/lists/gather.hpp | 19 +- cpp/src/lists/copying/segmented_gather.cu | 43 +- .../copying/segmented_gather_list_tests.cpp | 460 +++++++++++------- 4 files changed, 331 insertions(+), 193 deletions(-) diff --git a/cpp/include/cudf/lists/detail/gather.cuh b/cpp/include/cudf/lists/detail/gather.cuh index d62b54208d5..7c2979c56cd 100644 --- a/cpp/include/cudf/lists/detail/gather.cuh +++ b/cpp/include/cudf/lists/detail/gather.cuh @@ -283,6 +283,7 @@ std::unique_ptr gather_list_leaf( /** * @copydoc cudf::lists::segmented_gather(lists_column_view const& source_column, * lists_column_view const& gather_map_list, + * out_of_bounds_policy bounds_policy, * rmm::mr::device_memory_resource* mr) * * @param stream CUDA stream on which to execute kernels @@ -290,6 +291,7 @@ std::unique_ptr gather_list_leaf( std::unique_ptr segmented_gather( lists_column_view const& source_column, lists_column_view const& gather_map_list, + out_of_bounds_policy bounds_policy = out_of_bounds_policy::DONT_CHECK, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/lists/gather.hpp b/cpp/include/cudf/lists/gather.hpp index 66d7fb137b2..23054b91592 100644 --- a/cpp/include/cudf/lists/gather.hpp +++ b/cpp/include/cudf/lists/gather.hpp @@ -17,6 +17,7 @@ #include #include +#include #include namespace cudf { @@ -32,7 +33,7 @@ namespace lists { * * `source_column` with any depth and `gather_map_list` with depth 1 are only supported. * - * * @code{.pseudo} + * @code{.pseudo} * source_column : [{"a", "b", "c", "d"}, {"1", "2", "3", "4"}, {"x", "y", "z"}] * gather_map_list : [{0, 1, 3, 2}, {1, 3, 2}, {}] * @@ -44,11 +45,24 @@ namespace lists { * @throws cudf::logic_error if gather_map is not list column of an index type. * * If indices in `gather_map_list` are outside the range `[-n, n)`, where `n` is the number of - * elements in corresponding row of the source column, the behavior is undefined. + * elements in corresponding row of the source column, the behaviour is as follows: + * 1. If `bounds_policy` is set to `DONT_CHECK`, the behaviour is undefined. + * 2. If `bounds_policy` is set to `NULLIFY`, the corresponding element in the list row + * is set to null in the output column. + * + * @code{.pseudo} + * source_column : [{"a", "b", "c", "d"}, {"1", "2", "3", "4"}, {"x", "y", "z"}] + * gather_map_list : [{0, -1, 4, -5}, {1, 3, 5}, {}] + * + * result_with_nullify : [{"a", "d", null, null}, {"2", "4", null}, {}] + * @endcode * * @param source_column View into the list column to gather from * @param gather_map_list View into a non-nullable list column of integral indices that maps the * element in list of each row in the source columns to rows of lists in the destination columns. + * @param bounds_policy Can be `DONT_CHECK` or `NULLIFY`. Selects whether or not to nullify the + * output list row's element, when the gather index falls outside the range `[-n, n)`, + * where `n` is the number of elements in list row corresponding to the gather-map row. * @param mr Device memory resource to allocate any returned objects * @return column with elements in list of rows gathered based on `gather_map_list` * @@ -56,6 +70,7 @@ namespace lists { std::unique_ptr segmented_gather( lists_column_view const& source_column, lists_column_view const& gather_map_list, + out_of_bounds_policy bounds_policy = out_of_bounds_policy::DONT_CHECK, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/src/lists/copying/segmented_gather.cu b/cpp/src/lists/copying/segmented_gather.cu index da20cabdd8f..77d41c5ddc9 100644 --- a/cpp/src/lists/copying/segmented_gather.cu +++ b/cpp/src/lists/copying/segmented_gather.cu @@ -21,6 +21,7 @@ #include #include + #include namespace cudf { @@ -29,6 +30,7 @@ namespace detail { std::unique_ptr segmented_gather(lists_column_view const& value_column, lists_column_view const& gather_map, + out_of_bounds_policy bounds_policy, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -38,27 +40,38 @@ std::unique_ptr segmented_gather(lists_column_view const& value_column, CUDF_EXPECTS(value_column.size() == gather_map.size(), "Gather map and list column should be same size"); - auto gather_map_sliced_child = gather_map.get_sliced_child(stream); - auto const gather_map_size = gather_map_sliced_child.size(); - auto gather_index_begin = gather_map.offsets().begin() + 1 + gather_map.offset(); - auto gather_index_end = gather_index_begin + gather_map.size(); - auto value_offsets = value_column.offsets().begin() + value_column.offset(); - auto map_begin = cudf::detail::indexalator_factory::make_input_iterator(gather_map_sliced_child); + auto const gather_map_sliced_child = gather_map.get_sliced_child(stream); + auto const gather_map_size = gather_map_sliced_child.size(); + auto const gather_index_begin = gather_map.offsets_begin() + 1; + auto const gather_index_end = gather_map.offsets_end(); + auto const value_offsets = value_column.offsets_begin(); + auto const map_begin = + cudf::detail::indexalator_factory::make_input_iterator(gather_map_sliced_child); + auto const out_of_bounds = [] __device__(auto const index, auto const list_size) { + return index >= list_size || (index < 0 && -index > list_size); + }; // Calculate Flattened gather indices (value_offset[row]+sub_index - auto transformer = [value_offsets, map_begin, gather_index_begin, gather_index_end] __device__( - size_type index) -> size_type { + auto transformer = [value_offsets, + map_begin, + gather_index_begin, + gather_index_end, + bounds_policy, + out_of_bounds] __device__(size_type index) -> size_type { // Get each row's offset. (Each row is a list). auto offset_idx = thrust::upper_bound( thrust::seq, gather_index_begin, gather_index_end, gather_index_begin[-1] + index) - gather_index_begin; // Get each sub_index in list in each row of gather_map. - auto sub_index = map_begin[index]; - auto list_size = value_offsets[offset_idx + 1] - value_offsets[offset_idx]; - auto wrapped_sub_index = (sub_index % list_size + list_size) % list_size; + auto sub_index = map_begin[index]; + auto list_size = value_offsets[offset_idx + 1] - value_offsets[offset_idx]; + auto wrapped_sub_index = sub_index < 0 ? sub_index + list_size : sub_index; + auto constexpr null_idx = cuda::std::numeric_limits::max(); // Add sub_index to value_column offsets, to get gather indices of child of value_column - return value_offsets[offset_idx] + wrapped_sub_index - value_offsets[0]; + return (bounds_policy == out_of_bounds_policy::NULLIFY && out_of_bounds(sub_index, list_size)) + ? null_idx + : value_offsets[offset_idx] + wrapped_sub_index - value_offsets[0]; }; auto child_gather_index_begin = cudf::detail::make_counting_transform_iterator(0, transformer); @@ -66,7 +79,7 @@ std::unique_ptr segmented_gather(lists_column_view const& value_column, auto child_table = cudf::detail::gather(table_view({value_column.get_sliced_child(stream)}), child_gather_index_begin, child_gather_index_begin + gather_map_size, - out_of_bounds_policy::DONT_CHECK, + bounds_policy, stream, mr); auto child = std::move(child_table->release().front()); @@ -94,9 +107,11 @@ std::unique_ptr segmented_gather(lists_column_view const& value_column, std::unique_ptr segmented_gather(lists_column_view const& source_column, lists_column_view const& gather_map_list, + out_of_bounds_policy bounds_policy, rmm::mr::device_memory_resource* mr) { - return detail::segmented_gather(source_column, gather_map_list, rmm::cuda_stream_default, mr); + return detail::segmented_gather( + source_column, gather_map_list, bounds_policy, rmm::cuda_stream_default, mr); } } // namespace lists diff --git a/cpp/tests/copying/segmented_gather_list_tests.cpp b/cpp/tests/copying/segmented_gather_list_tests.cpp index b02d0ad387d..528986e2a8d 100644 --- a/cpp/tests/copying/segmented_gather_list_tests.cpp +++ b/cpp/tests/copying/segmented_gather_list_tests.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include template @@ -31,7 +32,7 @@ using FixedWidthTypesNotBool = cudf::test::Concat; -TYPED_TEST_CASE(SegmentedGatherTest, FixedWidthTypesNotBool); +TYPED_TEST_SUITE(SegmentedGatherTest, FixedWidthTypesNotBool); class SegmentedGatherTestList : public cudf::test::BaseFixture { }; @@ -42,6 +43,11 @@ class SegmentedGatherTestList : public cudf::test::BaseFixture { template using LCW = cudf::test::lists_column_wrapper; using cudf::lists_column_view; +using cudf::lists::detail::segmented_gather; +using cudf::test::iterators::no_nulls; +using cudf::test::iterators::null_at; +using cudf::test::iterators::nulls_at; +auto constexpr NULLIFY = cudf::out_of_bounds_policy::NULLIFY; TYPED_TEST(SegmentedGatherTest, Gather) { @@ -49,13 +55,23 @@ TYPED_TEST(SegmentedGatherTest, Gather) // List LCW list{{1, 2, 3, 4}, {5}, {6, 7}, {8, 9, 10}}; - LCW gather_map{{3, 2, 1, 0}, {0}, {0, 1}, {0, 2, 1}}; - LCW expected{{4, 3, 2, 1}, {5}, {6, 7}, {8, 10, 9}}; - auto results = - cudf::lists::detail::segmented_gather(lists_column_view{list}, lists_column_view{gather_map}); + { + // Straight-line case. + auto const gather_map = LCW{{3, 2, 1, 0}, {0}, {0, 1}, {0, 2, 1}}; + auto const expected = LCW{{4, 3, 2, 1}, {5}, {6, 7}, {8, 10, 9}}; + auto const results = segmented_gather(lists_column_view{list}, lists_column_view{gather_map}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + } - CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + { + // Nullify out-of-bounds values. + auto const gather_map = LCW{{3, 2, 4, 0}, {0}, {0, -3}, {0, 2, 1}}; + auto const expected = LCW{{{4, 3, 2, 1}, null_at(2)}, {5}, {{6, 7}, null_at(1)}, {8, 10, 9}}; + auto const results = + segmented_gather(lists_column_view{list}, lists_column_view{gather_map}, NULLIFY); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + } } TYPED_TEST(SegmentedGatherTest, GatherNothing) @@ -65,41 +81,31 @@ TYPED_TEST(SegmentedGatherTest, GatherNothing) // List { - LCW list{{1, 2, 3, 4}, {5}, {6, 7}, {8, 9, 10}}; - LCW gather_map{LCW{}, LCW{}, LCW{}, LCW{}}; - - auto results = - cudf::lists::detail::segmented_gather(lists_column_view{list}, lists_column_view{gather_map}); - - LCW expected{LCW{}, LCW{}, LCW{}, LCW{}}; + auto const list = LCW{{1, 2, 3, 4}, {5}, {6, 7}, {8, 9, 10}}; + auto const gather_map = LCW{LCW{}, LCW{}, LCW{}, LCW{}}; + auto const results = segmented_gather(lists_column_view{list}, lists_column_view{gather_map}); + auto const expected = LCW{LCW{}, LCW{}, LCW{}, LCW{}}; CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } // List> { - LCW list{{{1, 2, 3, 4}, {5}}, {{6, 7}}, {{}, {8, 9, 10}}}; - LCW gather_map{LCW{}, LCW{}, LCW{}}; - - auto results = - cudf::lists::detail::segmented_gather(lists_column_view{list}, lists_column_view{gather_map}); + auto const list = LCW{{{1, 2, 3, 4}, {5}}, {{6, 7}}, {{}, {8, 9, 10}}}; + auto const gather_map = LCW{LCW{}, LCW{}, LCW{}}; + auto const results = segmented_gather(lists_column_view{list}, lists_column_view{gather_map}); // hack to get column of empty list of list - LCW expected_dummy{{{1, 2, 3, 4}, {5}}, LCW{}, LCW{}, LCW{}}; - auto expected = cudf::split(expected_dummy, {1})[1]; + auto const expected_dummy = LCW{{{1, 2, 3, 4}, {5}}, LCW{}, LCW{}, LCW{}}; + auto const expected = cudf::split(expected_dummy, {1})[1]; CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } - // List>> { - LCW list{{{{1, 2, 3, 4}, {5}}}, {{{6, 7}, {8, 9, 10}}}}; - LCW gather_map{LCW{}, LCW{}}; - - auto results = - cudf::lists::detail::segmented_gather(lists_column_view{list}, lists_column_view{gather_map}); - - LCW expected_dummy{{{{1, 2, 3, 4}}}, // hack to get column of empty list of list of list - LCW{}, - LCW{}}; - auto expected = cudf::split(expected_dummy, {1})[1]; + auto const list = LCW{{{{1, 2, 3, 4}, {5}}}, {{{6, 7}, {8, 9, 10}}}}; + auto const gather_map = LCW{LCW{}, LCW{}}; + auto const results = segmented_gather(lists_column_view{list}, lists_column_view{gather_map}); + // hack to get column of empty list of list of list + auto const expected_dummy = LCW{{{{1, 2, 3, 4}}}, LCW{}, LCW{}}; + auto const expected = cudf::split(expected_dummy, {1})[1]; CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); // the result should preserve the full List>> hierarchy @@ -120,18 +126,29 @@ TYPED_TEST(SegmentedGatherTest, GatherNulls) { using T = TypeParam; - auto valids = cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return i % 2 == 0 ? true : false; }); + auto valids = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; }); // List - LCW list{{{1, 2, 3, 4}, valids}, {5}, {{6, 7}, valids}, {{8, 9, 10}, valids}}; - LCW gather_map{{0, 1}, LCW{}, {1}, {2, 1, 0}}; - - auto results = - cudf::lists::detail::segmented_gather(lists_column_view{list}, lists_column_view{gather_map}); + auto const list = LCW{{{1, 2, 3, 4}, valids}, {5}, {{6, 7}, valids}, {{8, 9, 10}, valids}}; - LCW expected{{{1, 2}, valids}, LCW{}, {{7}, valids + 1}, {{10, 9, 8}, valids}}; - CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + { + // Test gathering on lists that contain nulls. + auto const gather_map = LCW{{0, 1}, LCW{}, {1}, {2, 1, 0}}; + auto const results = segmented_gather(lists_column_view{list}, lists_column_view{gather_map}); + auto const expected = + LCW{{{1, 2}, valids}, LCW{}, {{7}, valids + 1}, {{10, 9, 8}, valids}}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + } + { + // Test gathering on lists that contain nulls, with out-of-bounds indices. + auto const gather_map = LCW{{10, -10}, LCW{}, {1}, {2, -10, 0}}; + auto const results = + segmented_gather(lists_column_view{list}, lists_column_view{gather_map}, NULLIFY); + auto const expected = + LCW{{{0, 0}, nulls_at({0, 1})}, LCW{}, {{7}, valids + 1}, {{10, 0, 8}, null_at(1)}}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + } } TYPED_TEST(SegmentedGatherTest, GatherNested) @@ -140,39 +157,76 @@ TYPED_TEST(SegmentedGatherTest, GatherNested) // List> { - LCW list{{{2, 3}, {4, 5}}, - {{6, 7, 8}, {9, 10, 11}, {12, 13, 14}}, - {{15, 16}, {17, 18}, {17, 18}, {17, 18}, {-17, -18}}}; - LCW gather_map{{0, 2, -2}, {1}, {1, 0, -1, 5}}; - - auto results = - cudf::lists::detail::segmented_gather(lists_column_view{list}, lists_column_view{gather_map}); + // clang-format off + auto const list = LCW{{{2, 3}, {4, 5}}, + {{6, 7, 8}, {9, 10, 11}, {12, 13, 14}}, + {{15, 16}, {17, 18}, {17, 18}, {17, 18}, {-17, -18}}}; + auto const gather_map = LCW{{0, -2, -2}, {1}, {1, 0, -1, -5}}; + auto const results = segmented_gather(lists_column_view{list}, lists_column_view{gather_map}); + auto const expected = LCW{{{2, 3}, {2, 3}, {2, 3}}, + {{9, 10, 11}}, + {{17, 18}, {15, 16}, {-17, -18}, {15, 16}}}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + // clang-format on + } - LCW expected{ - {{2, 3}, {2, 3}, {2, 3}}, {{9, 10, 11}}, {{17, 18}, {15, 16}, {-17, -18}, {15, 16}}}; + // List>, with out-of-bounds gather indices. + { + // clang-format off + auto const list = LCW{{{2, 3}, {4, 5}}, + {{6, 7, 8}, {9, 10, 11}, {12, 13, 14}}, + {{15, 16}, {17, 18}, {17, 18}, {17, 18}, {-17, -18}}}; + auto const gather_map = LCW{{0, 2, -2}, {1}, {1, 0, -1, -6}}; + auto const results = + segmented_gather(lists_column_view{list}, lists_column_view{gather_map}, NULLIFY); + auto const expected = LCW{{{{2, 3}, LCW{}, {2, 3}}, null_at(1)}, + {{9, 10, 11}}, + {{{17, 18}, {15, 16}, {-17, -18}, LCW{}}, null_at(3)}}; CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + // clang-format on } // List>> { - LCW list{{{{2, 3}, {4, 5}}, {{6, 7, 8}, {9, 10, 11}, {12, 13, 14}}}, - {{{15, 16}, {17, 18}, {17, 18}, {17, 18}, {17, 18}}}, - {{LCW{0}}}, - {{{10}, {20, 30, 40, 50}, {60, 70, 80}}, - {{0, 1, 3}, {5}}, - {{11, 12, 13, 14, 15}, {16, 17}, {0}}}, - {{{10, 20}}, {LCW{30}}, {{40, 50}, {60, 70, 80}}}}; - LCW gather_map{{1}, LCW{}, {0}, {1}, {0, -1, 1}}; - - auto results = - cudf::lists::detail::segmented_gather(lists_column_view{list}, lists_column_view{gather_map}); + // clang-format off + auto const list = LCW{{{{2, 3}, {4, 5}}, {{6, 7, 8}, {9, 10, 11}, {12, 13, 14}}}, + {{{15, 16}, {17, 18}, {17, 18}, {17, 18}, {17, 18}}}, + {{LCW{0}}}, + {{{10}, {20, 30, 40, 50}, {60, 70, 80}}, + {{0, 1, 3}, {5}}, + {{11, 12, 13, 14, 15}, {16, 17}, {0}}}, + {{{10, 20}}, {LCW{30}}, {{40, 50}, {60, 70, 80}}}}; + auto const gather_map = LCW{{1}, LCW{}, {0}, {1}, {0, -1, 1}}; + auto const results = segmented_gather(lists_column_view{list}, lists_column_view{gather_map}); + auto const expected = LCW{{{{6, 7, 8}, {9, 10, 11}, {12, 13, 14}}}, + LCW{}, + {{LCW{0}}}, + {{{0, 1, 3}, {5}}}, + {{{10, 20}}, {{40, 50}, {60, 70, 80}}, {LCW{30}}}}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + // clang-format on + } - LCW expected{{{{6, 7, 8}, {9, 10, 11}, {12, 13, 14}}}, - LCW{}, - {{LCW{0}}}, - {{{0, 1, 3}, {5}}}, - {{{10, 20}}, {{40, 50}, {60, 70, 80}}, {LCW{30}}}}; + // List>>, with out-of-bounds gather indices. + { + auto const list = LCW{{{{2, 3}, {4, 5}}, {{6, 7, 8}, {9, 10, 11}, {12, 13, 14}}}, + {{{15, 16}, {17, 18}, {17, 18}, {17, 18}, {17, 18}}}, + {{LCW{0}}}, + {{{10}, {20, 30, 40, 50}, {60, 70, 80}}, + {{0, 1, 3}, {5}}, + {{11, 12, 13, 14, 15}, {16, 17}, {0}}}, + {{{10, 20}}, {LCW{30}}, {{40, 50}, {60, 70, 80}}}}; + auto const gather_map = LCW{{1}, LCW{}, {0}, {1}, {0, -1, 3, -4}}; + auto const results = + segmented_gather(lists_column_view{list}, lists_column_view{gather_map}, NULLIFY); + auto const expected = + LCW{{{{6, 7, 8}, {9, 10, 11}, {12, 13, 14}}}, + LCW{}, + {{LCW{0}}}, + {{{0, 1, 3}, {5}}}, + {{{{10, 20}}, {{40, 50}, {60, 70, 80}}, LCW{}, LCW{}}, nulls_at({2, 3})}}; CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + // clang-format on } } @@ -182,19 +236,32 @@ TYPED_TEST(SegmentedGatherTest, GatherOutOfOrder) // List> { - LCW list{{{2, 3}, {4, 5}}, - {{6, 7, 8}, {9, 10, 11}, {12, 13, 14}}, - {{15, 16}, {17, 18}, {17, 18}, {17, 18}, {17, 18}}}; - LCW gather_map{{1, 0}, {1, 2, 0}, {5, 4, 3, 2, 1, 0}}; - - auto results = - cudf::lists::detail::segmented_gather(lists_column_view{list}, lists_column_view{gather_map}); - - LCW expected{{{4, 5}, {2, 3}}, - {{9, 10, 11}, {12, 13, 14}, {6, 7, 8}}, - {{15, 16}, {17, 18}, {17, 18}, {17, 18}, {17, 18}, {15, 16}}}; + // clang-format off + auto const list = LCW{{{2, 3}, {4, 5}}, + {{6, 7, 8}, {9, 10, 11}, {12, 13, 14}}, + {{15, 16}, {17, 18}, {17, 18}, {17, 18}, {17, 18}}}; + auto const gather_map = LCW{{1, 0}, {1, 2, 0}, {4, 3, 2, 1, 0}}; + auto const results = segmented_gather(lists_column_view{list}, lists_column_view{gather_map}); + auto const expected = LCW{{{4, 5}, {2, 3}}, + {{9, 10, 11}, {12, 13, 14}, {6, 7, 8}}, + {{17, 18}, {17, 18}, {17, 18}, {17, 18}, {15, 16}}}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + // clang-format on + } + // List>, with out-of-bounds gather indices. + { + // clang-format off + auto const list = LCW{{{2, 3}, {4, 5}}, + {{6, 7, 8}, {9, 10, 11}, {12, 13, 14}}, + {{15, 16}, {17, 18}, {17, 18}, {17, 18}, {17, 18}}}; + auto const gather_map = LCW{{1, 0}, {3, -1, -4}, {5, 4, 3, 2, 1, 0}}; + auto const results = segmented_gather(lists_column_view{list}, lists_column_view{gather_map}, NULLIFY); + auto const expected = LCW{{{4, 5}, {2, 3}}, + {{LCW{}, {12, 13, 14}, LCW{}}, nulls_at({0, 2})}, + {{LCW{}, {17, 18}, {17, 18}, {17, 18}, {17, 18}, {15, 16}}, null_at(0)}}; CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + // clang-format on } } @@ -204,19 +271,32 @@ TYPED_TEST(SegmentedGatherTest, GatherNegatives) // List> { - LCW list{{{2, 3}, {4, 5}}, - {{6, 7, 8}, {9, 10, 11}, {12, 13, 14}}, - {{15, 16}, {17, 18}, {17, 18}, {17, 18}, {17, 18}}}; - LCW gather_map{{-1, 0}, {-2, -1, 0}, {-5, -4, -3, -2, -1, 0}}; - - auto results = - cudf::lists::detail::segmented_gather(lists_column_view{list}, lists_column_view{gather_map}); - - LCW expected{{{4, 5}, {2, 3}}, - {{9, 10, 11}, {12, 13, 14}, {6, 7, 8}}, - {{15, 16}, {17, 18}, {17, 18}, {17, 18}, {17, 18}, {15, 16}}}; - + // clang-format off + auto const list = LCW{{{2, 3}, {4, 5}}, + {{6, 7, 8}, {9, 10, 11}, {12, 13, 14}}, + {{15, 16}, {17, 18}, {17, 18}, {17, 18}, {17, 18}}}; + auto const gather_map = LCW{{-1, 0}, {-2, -1, 0}, {-5, -4, -3, -2, -1, 0}}; + auto const results = segmented_gather(lists_column_view{list}, lists_column_view{gather_map}); + auto const expected = LCW{{{4, 5}, {2, 3}}, + {{9, 10, 11}, {12, 13, 14}, {6, 7, 8}}, + {{15, 16}, {17, 18}, {17, 18}, {17, 18}, {17, 18}, {15, 16}}}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + // clang-format on + } + // List>, with out-of-bounds gather indices. + { + // clang-format off + auto const list = LCW{{{2, 3}, {4, 5}}, + {{6, 7, 8}, {9, 10, 11}, {12, 13, 14}}, + {{15, 16}, {17, 18}, {17, 18}, {17, 18}, {17, 18}}}; + auto const gather_map = LCW{{-1, 0}, {-2, -1, -4}, {-6, -4, -3, -2, -1, 0}}; + auto const results = + segmented_gather(lists_column_view{list}, lists_column_view{gather_map}, NULLIFY); + auto const expected = LCW{{{4, 5}, {2, 3}}, + {{{9, 10, 11}, {12, 13, 14}, LCW{}}, null_at(2)}, + {{LCW{}, {17, 18}, {17, 18}, {17, 18}, {17, 18}, {15, 16}}, null_at(0)}}; CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + // clang-format on } } @@ -224,51 +304,43 @@ TYPED_TEST(SegmentedGatherTest, GatherNestedNulls) { using T = TypeParam; - auto valids = cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return i % 2 == 0 ? true : false; }); + auto valids = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; }); // List> { - LCW list{{{{2, 3}, valids}, {4, 5}}, - {{{6, 7, 8}, {9, 10, 11}, {12, 13, 14}}, valids}, - {{15, 16}, {17, 18}, {17, 18}, {17, 18}, {17, 18}}, - {{{{25, 26}, valids}, {27, 28}, {{29, 30}, valids}, {31, 32}, {33, 34}}, valids}}; - - LCW gather_map{{0, 1}, {0, 2}, LCW{}, {0, 1, 4}}; - - auto results = - cudf::lists::detail::segmented_gather(lists_column_view{list}, lists_column_view{gather_map}); - - auto trues = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; }); - - LCW expected{{{{2, 3}, valids}, {4, 5}}, - {{{6, 7, 8}, {12, 13, 14}}, trues}, - LCW{}, - {{{{25, 26}, valids}, {27, 28}, {33, 34}}, valids}}; - + // clang-format off + auto const list = LCW{{{{2, 3}, valids}, {4, 5}}, + {{{6, 7, 8}, {9, 10, 11}, {12, 13, 14}}, valids}, + {{15, 16}, {17, 18}, {17, 18}, {17, 18}, {17, 18}}, + {{{{25, 26}, valids}, {27, 28}, {{29, 30}, valids}, {31, 32}, {33, 34}}, valids}}; + auto const gather_map = LCW{{0, 1}, {0, 2}, LCW{}, {0, 1, 4}}; + auto const results = segmented_gather(lists_column_view{list}, lists_column_view{gather_map}); + auto const expected = LCW{{{{2, 3}, valids}, {4, 5}}, + {{{6, 7, 8}, {12, 13, 14}}, no_nulls()}, + LCW{}, + {{{{25, 26}, valids}, {27, 28}, {33, 34}}, valids}}; CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + // clang-format on } // List>>> { - LCW list{{{{{2, 3}, {4, 5}}, {{6, 7, 8}, {9, 10, 11}, {12, 13, 14}}}, - {{{15, 16}, {{27, 28}, valids}, {{37, 38}, valids}, {47, 48}, {57, 58}}}, - {{LCW{0}}}, - {{{10}, {20, 30, 40, 50}, {60, 70, 80}}, - {{0, 1, 3}, {5}}, - {{11, 12, 13, 14, 15}, {16, 17}, {0}}}, - {{{{{10, 20}, valids}}, {LCW{30}}, {{40, 50}, {60, 70, 80}}}, valids}}}; - - LCW gather_map{{1, 2, 4}}; - - auto results = - cudf::lists::detail::segmented_gather(lists_column_view{list}, lists_column_view{gather_map}); - - LCW expected{{{{{15, 16}, {{27, 28}, valids}, {{37, 38}, valids}, {47, 48}, {57, 58}}}, - {{LCW{0}}}, - {{{{{10, 20}, valids}}, {LCW{30}}, {{40, 50}, {60, 70, 80}}}, valids}}}; - + // clang-format off + auto const list = LCW{{{{{2, 3}, {4, 5}}, {{6, 7, 8}, {9, 10, 11}, {12, 13, 14}}}, + {{{15, 16}, {{27, 28}, valids}, {{37, 38}, valids}, {47, 48}, {57, 58}}}, + {{LCW{0}}}, + {{{10}, {20, 30, 40, 50}, {60, 70, 80}}, + {{0, 1, 3}, {5}}, + {{11, 12, 13, 14, 15}, {16, 17}, {0}}}, + {{{{{10, 20}, valids}}, {LCW{30}}, {{40, 50}, {60, 70, 80}}}, valids}}}; + auto const gather_map = LCW{{1, 2, 4}}; + auto const results = segmented_gather(lists_column_view{list}, lists_column_view{gather_map}); + auto const expected = LCW{{{{{15, 16}, {{27, 28}, valids}, {{37, 38}, valids}, {47, 48}, {57, 58}}}, + {{LCW{0}}}, + {{{{{10, 20}, valids}}, {LCW{30}}, {{40, 50}, {60, 70, 80}}}, valids}}}; CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + // clang-format on } } @@ -276,15 +348,11 @@ TYPED_TEST(SegmentedGatherTest, GatherNestedWithEmpties) { using T = TypeParam; - LCW list{{{2, 3}, LCW{}}, {{6, 7, 8}, {9, 10, 11}, {12, 13, 14}}, {LCW{}}}; - LCW gather_map{LCW{0}, LCW{0}, LCW{0}}; - - auto results = - cudf::lists::detail::segmented_gather(lists_column_view{list}, lists_column_view{gather_map}); - - // skip one null, gather one null. - LCW expected{{{2, 3}}, {{6, 7, 8}}, {LCW{}}}; - + auto const list = LCW{{{2, 3}, LCW{}}, {{6, 7, 8}, {9, 10, 11}, {12, 13, 14}}, {LCW{}}}; + auto const gather_map = LCW{LCW{0}, LCW{0}, LCW{0}}; + auto results = segmented_gather(lists_column_view{list}, lists_column_view{gather_map}); + auto const expected = + LCW{{{2, 3}}, {{6, 7, 8}}, {LCW{}}}; // skip one null, gather one null. CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); } @@ -292,7 +360,7 @@ TYPED_TEST(SegmentedGatherTest, GatherSliced) { using T = TypeParam; { - LCW a{ + auto const a = LCW{ {{1, 1, 1}, {2, 2}, {3, 3}}, {{4, 4, 4}, {5, 5}, {6, 6}}, {{7, 7, 7}, {8, 8}, {9, 9}}, @@ -302,23 +370,27 @@ TYPED_TEST(SegmentedGatherTest, GatherSliced) {{50, 50, 50, 50}, {6, 13}}, {{70, 70, 70, 70}, {80}}, }; - auto split_a = cudf::split(a, {3}); - - auto result0 = cudf::lists::detail::segmented_gather( - lists_column_view{split_a[0]}, lists_column_view{LCW{{1, 2}, {0, 2}, {0, 1}}}); - LCW expected0{ - {{2, 2}, {3, 3}}, - {{4, 4, 4}, {6, 6}}, - {{7, 7, 7}, {8, 8}}, - }; - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected0, result0->view()); - - auto result1 = cudf::lists::detail::segmented_gather( - lists_column_view{split_a[1]}, - lists_column_view{LCW{{0, 1}, LCW{}, LCW{}, {0, 1}, LCW{}}}); - LCW expected1{ - {{10, 10, 10}, {11, 11}}, LCW{}, LCW{}, {{50, 50, 50, 50}, {6, 13}}, LCW{}}; - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, result1->view()); + auto const split_a = cudf::split(a, {3}); + + { + auto const gather_map = lists_column_view{LCW{{1, 2}, {0, 2}, {0, 1}}}; + auto const result = segmented_gather(lists_column_view{split_a[0]}, gather_map); + auto const expected = LCW{ + {{2, 2}, {3, 3}}, + {{4, 4, 4}, {6, 6}}, + {{7, 7, 7}, {8, 8}}, + }; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); + } + + { + auto const gather_map = + lists_column_view{LCW{{0, 1}, LCW{}, LCW{}, {0, 1}, LCW{}}}; + auto const result = segmented_gather(lists_column_view{split_a[1]}, gather_map); + auto const expected = + LCW{{{10, 10, 10}, {11, 11}}, LCW{}, LCW{}, {{50, 50, 50, 50}, {6, 13}}, LCW{}}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); + } } auto valids = @@ -404,13 +476,24 @@ TEST_F(SegmentedGatherTestString, StringGather) { using T = cudf::string_view; // List - LCW list{{"a", "b", "c", "d"}, {"1", "22", "333", "4"}, {"x", "y", "z"}}; - LCW gather_map{{0, 1, 3, 2}, {1, 0, 3, 2}, LCW{}}; - LCW expected{{"a", "b", "d", "c"}, {"22", "1", "4", "333"}, LCW{}}; + { + auto const list = LCW{{"a", "b", "c", "d"}, {"1", "22", "333", "4"}, {"x", "y", "z"}}; + auto const gather_map = LCW{{0, 1, 3, 2}, {1, 0, 3, 2}, LCW{}}; + auto const expected = LCW{{"a", "b", "d", "c"}, {"22", "1", "4", "333"}, LCW{}}; + auto const result = segmented_gather(lists_column_view{list}, lists_column_view{gather_map}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); + } - auto result = - cudf::lists::detail::segmented_gather(lists_column_view{list}, lists_column_view{gather_map}); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); + // List, with out-of-order gather indices. + { + auto const list = LCW{{"a", "b", "c", "d"}, {"1", "22", "333", "4"}, {"x", "y", "z"}}; + auto const gather_map = LCW{{0, 1, 3, 4}, {1, -5, 3, 2}, LCW{}}; + auto const expected = LCW{{{"a", "b", "d", "c"}, cudf::test::iterators::null_at(3)}, + {{"22", "1", "4", "333"}, cudf::test::iterators::null_at(1)}, + LCW{}}; + auto result = segmented_gather(lists_column_view{list}, lists_column_view{gather_map}, NULLIFY); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); + } } using SegmentedGatherTestFloat = SegmentedGatherTest; @@ -419,28 +502,51 @@ TEST_F(SegmentedGatherTestFloat, GatherMapSliced) using T = float; // List - LCW list{{1, 2, 3, 4}, {5}, {6, 7}, {8, 9, 10}, {11, 12}, {13, 14, 15, 16}}; - LCW gather_map{{3, 2, 1, 0}, {0}, {0, 1}, {0, 2, 1}, {0}, {1}}; - // gather_map.offset: 0, 4, 5, 7, 10, 11, 12 - LCW expected{{4, 3, 2, 1}, {5}, {6, 7}, {8, 10, 9}, {11}, {14}}; + { + auto const list = LCW{{1, 2, 3, 4}, {5}, {6, 7}, {8, 9, 10}, {11, 12}, {13, 14, 15, 16}}; + auto const gather_map = LCW{{3, 2, 1, 0}, {0}, {0, 1}, {0, 2, 1}, {0}, {1}}; + // gather_map.offset: 0, 4, 5, 7, 10, 11, 12 + auto const expected = LCW{{4, 3, 2, 1}, {5}, {6, 7}, {8, 10, 9}, {11}, {14}}; + auto const results = segmented_gather(lists_column_view{list}, lists_column_view{gather_map}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); - auto results = - cudf::lists::detail::segmented_gather(lists_column_view{list}, lists_column_view{gather_map}); + auto const sliced = cudf::split(list, {1, 4}); + auto const split_m = cudf::split(gather_map, {1, 4}); + auto const split_e = cudf::split(expected, {1, 4}); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); - auto sliced = cudf::split(list, {1, 4}); - auto split_m = cudf::split(gather_map, {1, 4}); - auto split_e = cudf::split(expected, {1, 4}); - - auto result0 = cudf::lists::detail::segmented_gather(lists_column_view{sliced[0]}, - lists_column_view{split_m[0]}); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(split_e[0], result0->view()); - auto result1 = cudf::lists::detail::segmented_gather(lists_column_view{sliced[1]}, - lists_column_view{split_m[1]}); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(split_e[1], result1->view()); - auto result2 = cudf::lists::detail::segmented_gather(lists_column_view{sliced[2]}, - lists_column_view{split_m[2]}); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(split_e[2], result2->view()); + auto result0 = segmented_gather(lists_column_view{sliced[0]}, lists_column_view{split_m[0]}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(split_e[0], result0->view()); + auto result1 = segmented_gather(lists_column_view{sliced[1]}, lists_column_view{split_m[1]}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(split_e[1], result1->view()); + auto result2 = segmented_gather(lists_column_view{sliced[2]}, lists_column_view{split_m[2]}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(split_e[2], result2->view()); + } + + // List, with out-of-bounds gather indices. + { + auto const list = LCW{{1, 2, 3, 4}, {5}, {6, 7}, {8, 9, 10}, {11, 12}, {13, 14, 15, 16}}; + auto const gather_map = LCW{{3, -5, 1, 0}, {0}, {0, 1}, {0, 2, 3}, {0}, {1}}; + // gather_map.offset: 0, 4, 5, 7, 10, 11, 12 + auto const expected = + LCW{{{4, 0, 2, 1}, null_at(1)}, {5}, {6, 7}, {{8, 10, 9}, null_at(2)}, {11}, {14}}; + auto results = + segmented_gather(lists_column_view{list}, lists_column_view{gather_map}, NULLIFY); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + + auto const sliced = cudf::split(list, {1, 4}); + auto const split_m = cudf::split(gather_map, {1, 4}); + auto const split_e = cudf::split(expected, {1, 4}); + + auto const result0 = + segmented_gather(lists_column_view{sliced[0]}, lists_column_view{split_m[0]}, NULLIFY); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(split_e[0], result0->view()); + auto const result1 = + segmented_gather(lists_column_view{sliced[1]}, lists_column_view{split_m[1]}, NULLIFY); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(split_e[1], result1->view()); + auto const result2 = + segmented_gather(lists_column_view{sliced[2]}, lists_column_view{split_m[2]}, NULLIFY); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(split_e[2], result2->view()); + } } TEST_F(SegmentedGatherTestFloat, Fails)