From bb9b1becf0fff5d6ead13e4b271b61fac65e18c0 Mon Sep 17 00:00:00 2001 From: nvdbaranec <56695930+nvdbaranec@users.noreply.github.com> Date: Mon, 19 Jul 2021 17:50:51 -0500 Subject: [PATCH] Handle sliced structs properly in pack/contiguous_split. (#8739) Pre-sliced struct columns (those with a positive offset) were not being handled correctly. Dependent PR has been merged. Authors: - https://github.com/nvdbaranec Approvers: - Nghia Truong (https://github.com/ttnghia) - Robert Maynard (https://github.com/robertmaynard) - Vukasin Milovanovic (https://github.com/vuule) URL: https://github.com/rapidsai/cudf/pull/8739 --- cpp/src/copying/contiguous_split.cu | 44 ++++++-- cpp/tests/copying/pack_tests.cpp | 97 ++++++++++++----- cpp/tests/copying/split_tests.cpp | 162 ++++++++++++++++++++++++++-- 3 files changed, 262 insertions(+), 41 deletions(-) diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu index d4d54a3f94f..4a2c7b651e7 100644 --- a/cpp/src/copying/contiguous_split.cu +++ b/cpp/src/copying/contiguous_split.cu @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -595,8 +596,15 @@ std::pair buf_info_functor::operator() sliced_children; + sliced_children.reserve(scv.num_children()); + std::transform(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(scv.num_children()), + std::back_inserter(sliced_children), + [&scv](size_type child_index) { return scv.get_sliced_child(child_index); }); + return setup_source_buf_info(sliced_children.begin(), + sliced_children.end(), head, current, offset_stack_pos, @@ -783,17 +791,35 @@ std::vector contiguous_split(cudf::table_view const& input, // if inputs are empty, just return num_partitions empty tables if (input.column(0).size() == 0) { + // sanitize the inputs (to handle corner cases like sliced tables) + std::vector> empty_columns; + empty_columns.reserve(input.num_columns()); + std::transform( + input.begin(), input.end(), std::back_inserter(empty_columns), [](column_view const& col) { + return cudf::empty_like(col); + }); + std::vector empty_column_views; + empty_column_views.reserve(input.num_columns()); + std::transform(empty_columns.begin(), + empty_columns.end(), + std::back_inserter(empty_column_views), + [](std::unique_ptr const& col) { return col->view(); }); + table_view empty_inputs(empty_column_views); + // build the empty results std::vector result; result.reserve(num_partitions); auto iter = thrust::make_counting_iterator(0); - std::transform( - iter, iter + num_partitions, std::back_inserter(result), [&input](int partition_index) { - return packed_table{input, - packed_columns{std::make_unique(pack_metadata( - input, static_cast(nullptr), 0)), - std::make_unique()}}; - }); + std::transform(iter, + iter + num_partitions, + std::back_inserter(result), + [&empty_inputs](int partition_index) { + return packed_table{ + empty_inputs, + packed_columns{std::make_unique(pack_metadata( + empty_inputs, static_cast(nullptr), 0)), + std::make_unique()}}; + }); return result; } diff --git a/cpp/tests/copying/pack_tests.cpp b/cpp/tests/copying/pack_tests.cpp index 11aa505d163..e23911a3d76 100644 --- a/cpp/tests/copying/pack_tests.cpp +++ b/cpp/tests/copying/pack_tests.cpp @@ -444,32 +444,59 @@ TEST_F(PackUnpackTest, NestedEmpty) TEST_F(PackUnpackTest, NestedSliced) { - auto valids = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; }); + // list + { + auto valids = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; }); - using LCW = cudf::test::lists_column_wrapper; + using LCW = cudf::test::lists_column_wrapper; - cudf::test::lists_column_wrapper col0{{{{1, 2, 3}, valids}, {4, 5}}, - {{LCW{}, LCW{}, {7, 8}, LCW{}}, valids}, - {{6, 12}}, - {{{7, 8}, {{9, 10, 11}, valids}, LCW{}}, valids}, - {{LCW{}, {-1, -2, -3, -4, -5}}, valids}, - {LCW{}}, - {{-10}, {-100, -200}}}; - - cudf::test::strings_column_wrapper col1{ - "Vimes", "Carrot", "Angua", "Cheery", "Detritus", "Slant", "Fred"}; - cudf::test::fixed_width_column_wrapper col2{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}; - - std::vector> children; - children.push_back(std::make_unique(col2)); - children.push_back(std::make_unique(col0)); - children.push_back(std::make_unique(col1)); - auto col3 = cudf::make_structs_column( - static_cast(col0).size(), std::move(children), 0, rmm::device_buffer{}); - - cudf::table_view t({col0, col1, col2, *col3}); - this->run_test(t); + cudf::test::lists_column_wrapper col0{{{{1, 2, 3}, valids}, {4, 5}}, + {{LCW{}, LCW{}, {7, 8}, LCW{}}, valids}, + {{6, 12}}, + {{{7, 8}, {{9, 10, 11}, valids}, LCW{}}, valids}, + {{LCW{}, {-1, -2, -3, -4, -5}}, valids}, + {LCW{}}, + {{-10}, {-100, -200}}}; + + cudf::test::strings_column_wrapper col1{ + "Vimes", "Carrot", "Angua", "Cheery", "Detritus", "Slant", "Fred"}; + cudf::test::fixed_width_column_wrapper col2{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}; + + std::vector> children; + children.push_back(std::make_unique(col2)); + children.push_back(std::make_unique(col0)); + children.push_back(std::make_unique(col1)); + auto col3 = cudf::make_structs_column( + static_cast(col0).size(), std::move(children), 0, rmm::device_buffer{}); + + cudf::table_view t({col0, col1, col2, *col3}); + this->run_test(t); + } + + // struct + { + cudf::test::fixed_width_column_wrapper a{0, 1, 2, 3, 4, 5, 6, 7}; + cudf::test::fixed_width_column_wrapper b{{0, -1, -2, -3, -4, -5, -6, -7}, + {1, 1, 1, 0, 0, 0, 0, 1}}; + cudf::test::strings_column_wrapper c{{"abc", "def", "ghi", "jkl", "mno", "", "st", "uvwx"}, + {0, 0, 1, 1, 1, 1, 1, 1}}; + std::vector list_validity{1, 0, 1, 0, 1, 0, 1, 1}; + cudf::test::lists_column_wrapper d{ + {{0, 1}, {2, 3, 4}, {5, 6}, {7}, {8, 9, 10}, {11, 12}, {}, {15, 16, 17}}, + list_validity.begin()}; + cudf::test::fixed_width_column_wrapper _a{10, 20, 30, 40, 50, 60, 70, 80}; + cudf::test::fixed_width_column_wrapper _b{-10, -20, -30, -40, -50, -60, -70, -80}; + cudf::test::strings_column_wrapper _c{"aa", "", "ccc", "dddd", "eeeee", "f", "gg", "hhh"}; + cudf::test::structs_column_wrapper e({_a, _b, _c}, {1, 1, 1, 0, 1, 1, 1, 0}); + cudf::test::structs_column_wrapper s({a, b, c, d, e}, {1, 1, 0, 1, 1, 1, 1, 1}); + + auto split = cudf::split(s, {2, 5}); + + this->run_test(cudf::table_view({split[0]})); + this->run_test(cudf::table_view({split[1]})); + this->run_test(cudf::table_view({split[2]})); + } } TEST_F(PackUnpackTest, EmptyTable) @@ -490,6 +517,28 @@ TEST_F(PackUnpackTest, EmptyTable) } } +TEST_F(PackUnpackTest, SlicedEmpty) +{ + // empty sliced column. this is specifically testing the corner case: + // - a sliced column of size 0 + // - having children that are of size > 0 + // + cudf::test::strings_column_wrapper a{"abc", "def", "ghi", "jkl", "mno", "", "st", "uvwx"}; + cudf::test::lists_column_wrapper b{ + {0, 1}, {2}, {3, 4, 5}, {6, 7}, {8, 9}, {10}, {11, 12}, {13, 14}}; + cudf::test::fixed_width_column_wrapper c{0, 1, 2, 3, 4, 5, 6, 7}; + cudf::test::strings_column_wrapper _a{"abc", "def", "ghi", "jkl", "mno", "", "st", "uvwx"}; + cudf::test::lists_column_wrapper _b{ + {0, 1}, {2}, {3, 4, 5}, {6, 7}, {8, 9}, {10}, {11, 12}, {13, 14}}; + cudf::test::fixed_width_column_wrapper _c{0, 1, 2, 3, 4, 5, 6, 7}; + cudf::test::structs_column_wrapper d({_a, _b, _c}); + + cudf::table_view t({a, b, c, d}); + + auto sliced = cudf::split(t, {0}); + this->run_test(sliced[0]); +} + // clang-format on } // namespace test diff --git a/cpp/tests/copying/split_tests.cpp b/cpp/tests/copying/split_tests.cpp index 47ffe497ce3..de7770b4d0a 100644 --- a/cpp/tests/copying/split_tests.cpp +++ b/cpp/tests/copying/split_tests.cpp @@ -1525,12 +1525,14 @@ TEST_F(ContiguousSplitTableCornerCases, PreSplitTable) auto col3 = cudf::make_structs_column( static_cast(col0).size(), std::move(children), 0, rmm::device_buffer{}); + cudf::table_view t({col0, col1, col2, *col3}); + auto pre_split = cudf::split(t, {1}); + { - cudf::table_view t({col0, col1, col2, *col3}); std::vector splits{1, 4}; - auto result = cudf::contiguous_split(t, splits); - auto expected = cudf::split(t, splits); + auto result = cudf::contiguous_split(pre_split[1], splits); + auto expected = cudf::split(pre_split[1], splits); for (size_t index = 0; index < expected.size(); index++) { CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected[index], result[index].table); @@ -1538,11 +1540,10 @@ TEST_F(ContiguousSplitTableCornerCases, PreSplitTable) } { - cudf::table_view t({col0, col1, col2, *col3}); - std::vector splits{0, 6}; + std::vector splits{0, 5}; - auto result = cudf::contiguous_split(t, splits); - auto expected = cudf::split(t, splits); + auto result = cudf::contiguous_split(pre_split[1], splits); + auto expected = cudf::split(pre_split[1], splits); for (size_t index = 0; index < expected.size(); index++) { CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected[index], result[index].table); @@ -1593,6 +1594,118 @@ TEST_F(ContiguousSplitTableCornerCases, PreSplitTableLarge) } } +TEST_F(ContiguousSplitTableCornerCases, PreSplitList) +{ + // list> + { + cudf::test::lists_column_wrapper list{{{1, 2}, {3, 4}}, + {{5, 6}, {7}, {8, 9, 10}}, + {{11, 12}, {13}}, + {{14, 15, 16}, {17, 18}, {}}, + {{-1, -2, -3}, {-4, -5, -6, -7}}, + {{-8, -9}, {-10, -11}}, + {{-12, -13}, {-14}, {-15, -16}}, + {{-17, -18}, {}, {-19, -20}}}; + auto pre_split = cudf::split(list, {2}); + + cudf::table_view t({pre_split[1]}); + auto result = cudf::contiguous_split(t, {3, 4}); + auto expected = cudf::split(t, {3, 4}); + + auto iter = thrust::make_counting_iterator(0); + std::for_each(iter, iter + expected.size(), [&](cudf::size_type index) { + CUDF_TEST_EXPECT_TABLES_EQUAL(result[index].table, expected[index]); + }); + } + + // list> + { + cudf::test::fixed_width_column_wrapper offsets{ + 0, 2, 5, 7, 10, 12, 14, 17, 20}; + cudf::test::fixed_width_column_wrapper floats{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}; + cudf::test::structs_column_wrapper data({floats}); + + auto list = + cudf::make_lists_column(8, offsets.release(), data.release(), 0, rmm::device_buffer{}); + + auto pre_split = cudf::split(*list, {2}); + + cudf::table_view t({pre_split[1]}); + auto result = cudf::contiguous_split(t, {3, 4}); + auto expected = cudf::split(t, {3, 4}); + + auto iter = thrust::make_counting_iterator(0); + std::for_each(iter, iter + expected.size(), [&](cudf::size_type index) { + CUDF_TEST_EXPECT_TABLES_EQUAL(result[index].table, expected[index]); + }); + } +} + +TEST_F(ContiguousSplitTableCornerCases, PreSplitStructs) +{ + // includes struct + { + cudf::test::fixed_width_column_wrapper a{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + cudf::test::fixed_width_column_wrapper b{{0, -1, -2, -3, -4, -5, -6, -7, -8, -9}, + {1, 1, 1, 0, 0, 0, 0, 1, 1, 1}}; + cudf::test::strings_column_wrapper c{ + {"abc", "def", "ghi", "jkl", "mno", "", "st", "uvwx", "yy", "zzzz"}, + {0, 0, 1, 1, 1, 1, 1, 1, 1, 1}}; + std::vector list_validity{1, 0, 1, 0, 1, 0, 1, 1, 1, 1}; + cudf::test::lists_column_wrapper d{ + {{0, 1}, {2, 3, 4}, {5, 6}, {7}, {8, 9, 10}, {11, 12}, {}, {15, 16, 17}, {18, 19}, {20}}, + list_validity.begin()}; + cudf::test::fixed_width_column_wrapper _a{10, 20, 30, 40, 50, 60, 70, 80, 90, 100}; + cudf::test::fixed_width_column_wrapper _b{ + -10, -20, -30, -40, -50, -60, -70, -80, -90, -100}; + cudf::test::strings_column_wrapper _c{ + "aa", "", "ccc", "dddd", "eeeee", "f", "gg", "hhh", "i", "jjj"}; + cudf::test::structs_column_wrapper e({_a, _b, _c}, {1, 1, 1, 0, 1, 1, 1, 0, 1, 1}); + cudf::test::structs_column_wrapper s({a, b, c, d, e}, {1, 1, 0, 1, 1, 1, 1, 1, 1, 1}); + + auto pre_split = cudf::split(s, {4}); + + auto iter = thrust::make_counting_iterator(0); + std::for_each(iter, iter + pre_split.size(), [&](cudf::size_type index) { + cudf::table_view t({pre_split[index]}); + auto result = cudf::contiguous_split(t, {1}); + auto expected = cudf::split(t, {1}); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result[0].table, expected[0]); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result[1].table, expected[1]); + }); + } + + // struct> + { + cudf::test::fixed_width_column_wrapper offsets{ + 0, 2, 5, 7, 10, 12, 14, 17, 20}; + cudf::test::fixed_width_column_wrapper floats{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}; + cudf::test::structs_column_wrapper data({floats}); + auto list = + cudf::make_lists_column(8, offsets.release(), data.release(), 0, rmm::device_buffer{}); + cudf::test::strings_column_wrapper strings{"a", "bb", "ccc", "dddd", "", "e", "ff", "ggg"}; + + std::vector> struct_children; + struct_children.push_back(std::move(list)); + struct_children.push_back(strings.release()); + cudf::test::structs_column_wrapper col(std::move(struct_children)); + + auto pre_split = cudf::split(col, {2}); + + cudf::table_view t({pre_split[1]}); + auto result = cudf::contiguous_split(t, {3, 4}); + auto expected = cudf::split(t, {3, 4}); + + auto iter = thrust::make_counting_iterator(0); + std::for_each(iter, iter + expected.size(), [&](cudf::size_type index) { + CUDF_TEST_EXPECT_TABLES_EQUAL(result[index].table, expected[index]); + }); + } +} + TEST_F(ContiguousSplitTableCornerCases, NestedEmpty) { // this produces an empty strings column with no children, @@ -1687,7 +1800,40 @@ TEST_F(ContiguousSplitTableCornerCases, NestedEmpty) } } -TEST_F(ContiguousSplitTableCornerCases, MalformedColumns) {} +TEST_F(ContiguousSplitTableCornerCases, SplitEmpty) +{ + // empty sliced column. this is specifically testing the corner case: + // - a sliced column of size 0 + // - having children that are of size > 0 + // + cudf::test::strings_column_wrapper a{"abc", "def", "ghi", "jkl", "mno", "", "st", "uvwx"}; + cudf::test::lists_column_wrapper b{ + {0, 1}, {2}, {3, 4, 5}, {6, 7}, {8, 9}, {10}, {11, 12}, {13, 14}}; + cudf::test::fixed_width_column_wrapper c{0, 1, 2, 3, 4, 5, 6, 7}; + cudf::test::strings_column_wrapper _a{"abc", "def", "ghi", "jkl", "mno", "", "st", "uvwx"}; + cudf::test::lists_column_wrapper _b{ + {0, 1}, {2}, {3, 4, 5}, {6, 7}, {8, 9}, {10}, {11, 12}, {13, 14}}; + cudf::test::fixed_width_column_wrapper _c{0, 1, 2, 3, 4, 5, 6, 7}; + cudf::test::structs_column_wrapper d({_a, _b, _c}); + + cudf::table_view t({a, b, c, d}); + + auto sliced = cudf::split(t, {0}); + + { + auto result = cudf::contiguous_split(sliced[0], {}); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(sliced[0], result[0].table); + } + + { + auto result = cudf::contiguous_split(sliced[0], {0}); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(sliced[0], result[0].table); + } + + { + EXPECT_THROW(cudf::contiguous_split(sliced[0], {1}), cudf::logic_error); + } +} struct ContiguousSplitNestedTypesTest : public cudf::test::BaseFixture { };