diff --git a/cpp/include/cudf/detail/sorting.hpp b/cpp/include/cudf/detail/sorting.hpp index 3aa85e87b1d..97490ee3e1c 100644 --- a/cpp/include/cudf/detail/sorting.hpp +++ b/cpp/include/cudf/detail/sorting.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -63,6 +63,19 @@ std::unique_ptr sort_by_key( rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @copydoc cudf::stable_sort_by_key + * + * @param[in] stream CUDA stream used for device memory operations and kernel launches. + */ +std::unique_ptr
stable_sort_by_key( + table_view const& values, + table_view const& keys, + std::vector const& column_order = {}, + std::vector const& null_precedence = {}, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @copydoc cudf::segmented_sorted_order * diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp index c17abe8267d..ff334b9ee85 100644 --- a/cpp/include/cudf/sorting.hpp +++ b/cpp/include/cudf/sorting.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -145,6 +145,36 @@ std::unique_ptr
sort_by_key( std::vector const& null_precedence = {}, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Performs a key-value stable sort. + * + * Creates a new table that reorders the rows of `values` according to the + * lexicographic ordering of the rows of `keys`. + * + * The order of equivalent elements is guaranteed to be preserved. + * + * @throws cudf::logic_error if `values.num_rows() != keys.num_rows()`. + * + * @param values The table to reorder + * @param keys The table that determines the ordering + * @param column_order The desired order for each column in `keys`. Size must be + * equal to `keys.num_columns()` or empty. If empty, all columns are sorted in + * ascending order. + * @param null_precedence The desired order of a null element compared to other + * elements for each column in `keys`. Size must be equal to + * `keys.num_columns()` or empty. If empty, all columns will be sorted with + * `null_order::BEFORE`. + * @param mr Device memory resource used to allocate the returned table's device memory + * @return The reordering of `values` determined by the lexicographic order of + * the rows of `keys`. + */ +std::unique_ptr
stable_sort_by_key( + table_view const& values, + table_view const& keys, + std::vector const& column_order = {}, + std::vector const& null_precedence = {}, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Computes the ranks of input column in sorted order. * diff --git a/cpp/src/sort/stable_sort.cu b/cpp/src/sort/stable_sort.cu index 75335579de2..1d3734cace5 100644 --- a/cpp/src/sort/stable_sort.cu +++ b/cpp/src/sort/stable_sort.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #include "sort_impl.cuh" #include +#include #include #include #include @@ -34,6 +35,26 @@ std::unique_ptr stable_sorted_order(table_view const& input, return sorted_order(input, column_order, null_precedence, stream, mr); } +std::unique_ptr
stable_sort_by_key(table_view const& values, + table_view const& keys, + std::vector const& column_order, + std::vector const& null_precedence, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_EXPECTS(values.num_rows() == keys.num_rows(), + "Mismatch in number of rows for values and keys"); + + auto sorted_order = detail::stable_sorted_order( + keys, column_order, null_precedence, stream, rmm::mr::get_current_device_resource()); + + return detail::gather(values, + sorted_order->view(), + out_of_bounds_policy::DONT_CHECK, + detail::negative_index_policy::NOT_ALLOWED, + stream, + mr); +} } // namespace detail std::unique_ptr stable_sorted_order(table_view const& input, @@ -45,4 +66,15 @@ std::unique_ptr stable_sorted_order(table_view const& input, input, column_order, null_precedence, rmm::cuda_stream_default, mr); } +std::unique_ptr
stable_sort_by_key(table_view const& values, + table_view const& keys, + std::vector const& column_order, + std::vector const& null_precedence, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::stable_sort_by_key( + values, keys, column_order, null_precedence, rmm::cuda_stream_default, mr); +} + } // namespace cudf diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index f96edd3ce5a..b1c23749c4b 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -223,7 +223,10 @@ endif() # ################################################################################################## # * sort tests ------------------------------------------------------------------------------------ -ConfigureTest(SORT_TEST sort/segmented_sort_tests.cpp sort/sort_test.cpp sort/rank_test.cpp) +ConfigureTest( + SORT_TEST sort/segmented_sort_tests.cpp sort/sort_test.cpp sort/stable_sort_tests.cpp + sort/rank_test.cpp +) # ################################################################################################## # * copying tests --------------------------------------------------------------------------------- diff --git a/cpp/tests/sort/sort_test.cpp b/cpp/tests/sort/sort_test.cpp index af13c35acfb..7f9f40e98b8 100644 --- a/cpp/tests/sort/sort_test.cpp +++ b/cpp/tests/sort/sort_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,14 +20,12 @@ #include #include -#include #include -#include #include #include #include -#include +#include #include namespace cudf { @@ -50,10 +48,8 @@ void run_sort_test(table_view input, CUDF_TEST_EXPECT_TABLES_EQUAL(expected_sort_by_key_table->view(), got_sort_by_key_table->view()); } -using TestTypes = cudf::test::Concat; +using TestTypes = cudf::test::Concat; // include timestamps and durations template struct Sort : public BaseFixture { @@ -555,7 +551,12 @@ TYPED_TEST(Sort, WithStructColumnCombinationsWithoutNulls) std::vector column_order{order::DESCENDING}; // desc_nulls_first - fixed_width_column_wrapper expected1{{3, 5, 6, 7, 2, 4, 1, 0}}; + auto const expected1 = []() { + if constexpr (std::is_same_v) { + return fixed_width_column_wrapper{{3, 5, 6, 7, 1, 2, 4, 0}}; + } + return fixed_width_column_wrapper{{3, 5, 6, 7, 2, 4, 1, 0}}; + }(); auto got = sorted_order(input, column_order, {null_order::AFTER}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, got->view()); // Run test for sort and sort_by_key @@ -577,30 +578,18 @@ TYPED_TEST(Sort, WithStructColumnCombinationsWithoutNulls) run_sort_test(input, expected3, column_order2, {null_order::BEFORE}); // asce_nulls_last - fixed_width_column_wrapper expected4{{0, 1, 2, 4, 7, 6, 3, 5}}; + auto const expected4 = []() { + if constexpr (std::is_same_v) { + return fixed_width_column_wrapper{{0, 2, 4, 1, 7, 6, 3, 5}}; + } + return fixed_width_column_wrapper{{0, 1, 2, 4, 7, 6, 3, 5}}; + }(); got = sorted_order(input, column_order2, {null_order::AFTER}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected4, got->view()); // Run test for sort and sort_by_key run_sort_test(input, expected4, column_order2, {null_order::AFTER}); } -TYPED_TEST(Sort, Stable) -{ - using T = TypeParam; - using R = int32_t; - - fixed_width_column_wrapper col1({0, 1, 1, 0, 0, 1, 0, 1}, {0, 1, 1, 1, 1, 1, 1, 1}); - strings_column_wrapper col2({"2", "a", "b", "x", "k", "a", "x", "a"}, {1, 1, 1, 1, 0, 1, 1, 1}); - - fixed_width_column_wrapper expected{{4, 3, 6, 1, 5, 7, 2, 0}}; - - auto got = stable_sorted_order(table_view({col1, col2}), - {order::ASCENDING, order::ASCENDING}, - {null_order::AFTER, null_order::BEFORE}); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); -} - TYPED_TEST(Sort, MisMatchInColumnOrderSize) { using T = TypeParam; @@ -613,7 +602,6 @@ TYPED_TEST(Sort, MisMatchInColumnOrderSize) std::vector column_order{order::ASCENDING, order::DESCENDING}; EXPECT_THROW(sorted_order(input, column_order), logic_error); - EXPECT_THROW(stable_sorted_order(input, column_order), logic_error); EXPECT_THROW(sort(input, column_order), logic_error); EXPECT_THROW(sort_by_key(input, input, column_order), logic_error); } @@ -631,7 +619,6 @@ TYPED_TEST(Sort, MisMatchInNullPrecedenceSize) std::vector null_precedence{null_order::AFTER, null_order::BEFORE}; EXPECT_THROW(sorted_order(input, column_order, null_precedence), logic_error); - EXPECT_THROW(stable_sorted_order(input, column_order, null_precedence), logic_error); EXPECT_THROW(sort(input, column_order, null_precedence), logic_error); EXPECT_THROW(sort_by_key(input, input, column_order, null_precedence), logic_error); } diff --git a/cpp/tests/sort/stable_sort_tests.cpp b/cpp/tests/sort/stable_sort_tests.cpp new file mode 100644 index 00000000000..f80764e66a3 --- /dev/null +++ b/cpp/tests/sort/stable_sort_tests.cpp @@ -0,0 +1,273 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +namespace cudf { +namespace test { +void run_stable_sort_test(table_view input, + column_view expected_sorted_indices, + std::vector column_order = {}, + std::vector null_precedence = {}) +{ + auto got_sort_by_key_table = sort_by_key(input, input, column_order, null_precedence); + auto expected_sort_by_key_table = gather(input, expected_sorted_indices); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_sort_by_key_table->view(), got_sort_by_key_table->view()); +} + +using TestTypes = cudf::test::Concat; // include timestamps and durations + +template +struct StableSort : public BaseFixture { +}; + +TYPED_TEST_SUITE(StableSort, TestTypes); + +TYPED_TEST(StableSort, MixedNullOrder) +{ + using T = TypeParam; + using R = int32_t; + + fixed_width_column_wrapper col1({0, 1, 1, 0, 0, 1, 0, 1}, {0, 1, 1, 1, 1, 1, 1, 1}); + strings_column_wrapper col2({"2", "a", "b", "x", "k", "a", "x", "a"}, {1, 1, 1, 1, 0, 1, 1, 1}); + + fixed_width_column_wrapper expected{{4, 3, 6, 1, 5, 7, 2, 0}}; + + auto got = stable_sorted_order(table_view({col1, col2}), + {order::ASCENDING, order::ASCENDING}, + {null_order::AFTER, null_order::BEFORE}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); +} + +TYPED_TEST(StableSort, WithNullMax) +{ + using T = TypeParam; + + fixed_width_column_wrapper col1{{5, 4, 3, 5, 8, 5}, {1, 1, 0, 1, 1, 1}}; + strings_column_wrapper col2({"d", "e", "a", "d", "k", "d"}, {1, 1, 0, 1, 1, 1}); + fixed_width_column_wrapper col3{{10, 40, 70, 10, 2, 10}, {1, 1, 0, 1, 1, 1}}; + table_view input{{col1, col2, col3}}; + + fixed_width_column_wrapper expected{{1, 0, 3, 5, 4, 2}}; + std::vector column_order{order::ASCENDING, order::ASCENDING, order::DESCENDING}; + std::vector null_precedence{null_order::AFTER, null_order::AFTER, null_order::AFTER}; + + auto got = stable_sorted_order(input, column_order, null_precedence); + + if (not std::is_same_v) { + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); + + run_stable_sort_test(input, expected, column_order, null_precedence); + } else { + // for bools only validate that the null element landed at the back, since + // the rest of the values are equivalent and yields random sorted order. + auto to_host = [](column_view const& col) { + thrust::host_vector h_data(col.size()); + CUDA_TRY(cudaMemcpy( + h_data.data(), col.data(), h_data.size() * sizeof(int32_t), cudaMemcpyDefault)); + return h_data; + }; + thrust::host_vector h_exp = to_host(expected); + thrust::host_vector h_got = to_host(got->view()); + EXPECT_EQ(h_exp[h_exp.size() - 1], h_got[h_got.size() - 1]); + + fixed_width_column_wrapper expected_for_bool{{0, 3, 5, 1, 4, 2}}; + run_stable_sort_test(input, expected_for_bool, column_order, null_precedence); + } +} + +TYPED_TEST(StableSort, WithNullMin) +{ + using T = TypeParam; + + fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}, {1, 1, 0, 1, 1}}; + strings_column_wrapper col2({"d", "e", "a", "d", "k"}, {1, 1, 0, 1, 1}); + fixed_width_column_wrapper col3{{10, 40, 70, 10, 2}, {1, 1, 0, 1, 1}}; + table_view input{{col1, col2, col3}}; + + fixed_width_column_wrapper expected{{2, 1, 0, 3, 4}}; + std::vector column_order{order::ASCENDING, order::ASCENDING, order::DESCENDING}; + + auto got = stable_sorted_order(input, column_order); + + if (!std::is_same_v) { + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); + + run_stable_sort_test(input, expected, column_order); + } else { + // for bools only validate that the null element landed at the front, since + // the rest of the values are equivalent and yields random sorted order. + auto to_host = [](column_view const& col) { + thrust::host_vector h_data(col.size()); + CUDA_TRY(cudaMemcpy( + h_data.data(), col.data(), h_data.size() * sizeof(int32_t), cudaMemcpyDefault)); + return h_data; + }; + thrust::host_vector h_exp = to_host(expected); + thrust::host_vector h_got = to_host(got->view()); + EXPECT_EQ(h_exp.front(), h_got.front()); + + fixed_width_column_wrapper expected_for_bool{{2, 0, 3, 1, 4}}; + run_stable_sort_test(input, expected_for_bool, column_order); + } +} + +TYPED_TEST(StableSort, WithAllValid) +{ + using T = TypeParam; + + fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}}; + strings_column_wrapper col2({"d", "e", "a", "d", "k"}); + fixed_width_column_wrapper col3{{10, 40, 70, 10, 2}}; + table_view input{{col1, col2, col3}}; + + fixed_width_column_wrapper expected{{2, 1, 0, 3, 4}}; + std::vector column_order{order::ASCENDING, order::ASCENDING, order::DESCENDING}; + + auto got = stable_sorted_order(input, column_order); + + // Skip validating bools order. Valid true bools are all + // equivalent, and yield random order after thrust::sort + if (!std::is_same_v) { + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); + + run_stable_sort_test(input, expected, column_order); + } else { + fixed_width_column_wrapper expected_for_bool{{2, 0, 3, 1, 4}}; + run_stable_sort_test(input, expected_for_bool, column_order); + } +} + +TYPED_TEST(StableSort, MisMatchInColumnOrderSize) +{ + using T = TypeParam; + + fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}}; + strings_column_wrapper col2({"d", "e", "a", "d", "k"}); + fixed_width_column_wrapper col3{{10, 40, 70, 5, 2}}; + table_view input{{col1, col2, col3}}; + + std::vector column_order{order::ASCENDING, order::DESCENDING}; + + EXPECT_THROW(stable_sorted_order(input, column_order), logic_error); + EXPECT_THROW(stable_sort_by_key(input, input, column_order), logic_error); +} + +TYPED_TEST(StableSort, MisMatchInNullPrecedenceSize) +{ + using T = TypeParam; + + fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}}; + strings_column_wrapper col2({"d", "e", "a", "d", "k"}); + fixed_width_column_wrapper col3{{10, 40, 70, 5, 2}}; + table_view input{{col1, col2, col3}}; + + std::vector column_order{order::ASCENDING, order::DESCENDING, order::DESCENDING}; + std::vector null_precedence{null_order::AFTER, null_order::BEFORE}; + + EXPECT_THROW(stable_sorted_order(input, column_order, null_precedence), logic_error); + EXPECT_THROW(stable_sort_by_key(input, input, column_order, null_precedence), logic_error); +} + +TYPED_TEST(StableSort, ZeroSizedColumns) +{ + using T = TypeParam; + + fixed_width_column_wrapper col1{}; + table_view input{{col1}}; + + fixed_width_column_wrapper expected{}; + std::vector column_order{order::ASCENDING}; + + auto got = stable_sorted_order(input, column_order); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); + + run_stable_sort_test(input, expected, column_order); +} + +struct StableSortByKey : public BaseFixture { +}; + +TEST_F(StableSortByKey, ValueKeysSizeMismatch) +{ + using T = int64_t; + + fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}}; + strings_column_wrapper col2({"d", "e", "a", "d", "k"}); + fixed_width_column_wrapper col3{{10, 40, 70, 5, 2}}; + table_view values{{col1, col2, col3}}; + + fixed_width_column_wrapper key_col{{5, 4, 3, 5}}; + table_view keys{{key_col}}; + + EXPECT_THROW(stable_sort_by_key(values, keys), logic_error); +} + +template +struct StableSortFixedPoint : public cudf::test::BaseFixture { +}; + +template +using wrapper = cudf::test::fixed_width_column_wrapper; +TYPED_TEST_SUITE(StableSortFixedPoint, cudf::test::FixedPointTypes); + +TYPED_TEST(StableSortFixedPoint, FixedPointSortedOrderGather) +{ + using namespace numeric; + using decimalXX = TypeParam; + + auto const ZERO = decimalXX{0, scale_type{0}}; + auto const ONE = decimalXX{1, scale_type{0}}; + auto const TWO = decimalXX{2, scale_type{0}}; + auto const THREE = decimalXX{3, scale_type{0}}; + auto const FOUR = decimalXX{4, scale_type{0}}; + + auto const input_vec = std::vector{THREE, TWO, ONE, ZERO, FOUR, THREE}; + auto const index_vec = std::vector{3, 2, 1, 0, 5, 4}; + auto const sorted_vec = std::vector{ZERO, ONE, TWO, THREE, THREE, FOUR}; + + auto const input_col = wrapper(input_vec.begin(), input_vec.end()); + auto const index_col = wrapper(index_vec.begin(), index_vec.end()); + auto const sorted_col = wrapper(sorted_vec.begin(), sorted_vec.end()); + + auto const sorted_table = cudf::table_view{{sorted_col}}; + auto const input_table = cudf::table_view{{input_col}}; + + auto const indices = cudf::sorted_order(input_table); + auto const sorted = cudf::gather(input_table, indices->view()); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(index_col, indices->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(sorted_table, sorted->view()); +} + +} // namespace test +} // namespace cudf