diff --git a/cpp/include/cudf/detail/sorting.hpp b/cpp/include/cudf/detail/sorting.hpp
index 3aa85e87b1d..97490ee3e1c 100644
--- a/cpp/include/cudf/detail/sorting.hpp
+++ b/cpp/include/cudf/detail/sorting.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -63,6 +63,19 @@ std::unique_ptr
sort_by_key(
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+/**
+ * @copydoc cudf::stable_sort_by_key
+ *
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
+ */
+std::unique_ptr stable_sort_by_key(
+ table_view const& values,
+ table_view const& keys,
+ std::vector const& column_order = {},
+ std::vector const& null_precedence = {},
+ rmm::cuda_stream_view stream = rmm::cuda_stream_default,
+ rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
/**
* @copydoc cudf::segmented_sorted_order
*
diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp
index c17abe8267d..ff334b9ee85 100644
--- a/cpp/include/cudf/sorting.hpp
+++ b/cpp/include/cudf/sorting.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -145,6 +145,36 @@ std::unique_ptr sort_by_key(
std::vector const& null_precedence = {},
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+/**
+ * @brief Performs a key-value stable sort.
+ *
+ * Creates a new table that reorders the rows of `values` according to the
+ * lexicographic ordering of the rows of `keys`.
+ *
+ * The order of equivalent elements is guaranteed to be preserved.
+ *
+ * @throws cudf::logic_error if `values.num_rows() != keys.num_rows()`.
+ *
+ * @param values The table to reorder
+ * @param keys The table that determines the ordering
+ * @param column_order The desired order for each column in `keys`. Size must be
+ * equal to `keys.num_columns()` or empty. If empty, all columns are sorted in
+ * ascending order.
+ * @param null_precedence The desired order of a null element compared to other
+ * elements for each column in `keys`. Size must be equal to
+ * `keys.num_columns()` or empty. If empty, all columns will be sorted with
+ * `null_order::BEFORE`.
+ * @param mr Device memory resource used to allocate the returned table's device memory
+ * @return The reordering of `values` determined by the lexicographic order of
+ * the rows of `keys`.
+ */
+std::unique_ptr stable_sort_by_key(
+ table_view const& values,
+ table_view const& keys,
+ std::vector const& column_order = {},
+ std::vector const& null_precedence = {},
+ rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
/**
* @brief Computes the ranks of input column in sorted order.
*
diff --git a/cpp/src/sort/stable_sort.cu b/cpp/src/sort/stable_sort.cu
index 75335579de2..1d3734cace5 100644
--- a/cpp/src/sort/stable_sort.cu
+++ b/cpp/src/sort/stable_sort.cu
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
#include "sort_impl.cuh"
#include
+#include
#include
#include
#include
@@ -34,6 +35,26 @@ std::unique_ptr stable_sorted_order(table_view const& input,
return sorted_order(input, column_order, null_precedence, stream, mr);
}
+std::unique_ptr stable_sort_by_key(table_view const& values,
+ table_view const& keys,
+ std::vector const& column_order,
+ std::vector const& null_precedence,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr)
+{
+ CUDF_EXPECTS(values.num_rows() == keys.num_rows(),
+ "Mismatch in number of rows for values and keys");
+
+ auto sorted_order = detail::stable_sorted_order(
+ keys, column_order, null_precedence, stream, rmm::mr::get_current_device_resource());
+
+ return detail::gather(values,
+ sorted_order->view(),
+ out_of_bounds_policy::DONT_CHECK,
+ detail::negative_index_policy::NOT_ALLOWED,
+ stream,
+ mr);
+}
} // namespace detail
std::unique_ptr stable_sorted_order(table_view const& input,
@@ -45,4 +66,15 @@ std::unique_ptr stable_sorted_order(table_view const& input,
input, column_order, null_precedence, rmm::cuda_stream_default, mr);
}
+std::unique_ptr stable_sort_by_key(table_view const& values,
+ table_view const& keys,
+ std::vector const& column_order,
+ std::vector const& null_precedence,
+ rmm::mr::device_memory_resource* mr)
+{
+ CUDF_FUNC_RANGE();
+ return detail::stable_sort_by_key(
+ values, keys, column_order, null_precedence, rmm::cuda_stream_default, mr);
+}
+
} // namespace cudf
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index f96edd3ce5a..b1c23749c4b 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -223,7 +223,10 @@ endif()
# ##################################################################################################
# * sort tests ------------------------------------------------------------------------------------
-ConfigureTest(SORT_TEST sort/segmented_sort_tests.cpp sort/sort_test.cpp sort/rank_test.cpp)
+ConfigureTest(
+ SORT_TEST sort/segmented_sort_tests.cpp sort/sort_test.cpp sort/stable_sort_tests.cpp
+ sort/rank_test.cpp
+)
# ##################################################################################################
# * copying tests ---------------------------------------------------------------------------------
diff --git a/cpp/tests/sort/sort_test.cpp b/cpp/tests/sort/sort_test.cpp
index af13c35acfb..7f9f40e98b8 100644
--- a/cpp/tests/sort/sort_test.cpp
+++ b/cpp/tests/sort/sort_test.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -20,14 +20,12 @@
#include
#include
-#include
#include
-#include
#include
#include
#include
-#include
+#include
#include
namespace cudf {
@@ -50,10 +48,8 @@ void run_sort_test(table_view input,
CUDF_TEST_EXPECT_TABLES_EQUAL(expected_sort_by_key_table->view(), got_sort_by_key_table->view());
}
-using TestTypes = cudf::test::Concat;
+using TestTypes = cudf::test::Concat; // include timestamps and durations
template
struct Sort : public BaseFixture {
@@ -555,7 +551,12 @@ TYPED_TEST(Sort, WithStructColumnCombinationsWithoutNulls)
std::vector column_order{order::DESCENDING};
// desc_nulls_first
- fixed_width_column_wrapper expected1{{3, 5, 6, 7, 2, 4, 1, 0}};
+ auto const expected1 = []() {
+ if constexpr (std::is_same_v) {
+ return fixed_width_column_wrapper{{3, 5, 6, 7, 1, 2, 4, 0}};
+ }
+ return fixed_width_column_wrapper{{3, 5, 6, 7, 2, 4, 1, 0}};
+ }();
auto got = sorted_order(input, column_order, {null_order::AFTER});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, got->view());
// Run test for sort and sort_by_key
@@ -577,30 +578,18 @@ TYPED_TEST(Sort, WithStructColumnCombinationsWithoutNulls)
run_sort_test(input, expected3, column_order2, {null_order::BEFORE});
// asce_nulls_last
- fixed_width_column_wrapper expected4{{0, 1, 2, 4, 7, 6, 3, 5}};
+ auto const expected4 = []() {
+ if constexpr (std::is_same_v) {
+ return fixed_width_column_wrapper{{0, 2, 4, 1, 7, 6, 3, 5}};
+ }
+ return fixed_width_column_wrapper{{0, 1, 2, 4, 7, 6, 3, 5}};
+ }();
got = sorted_order(input, column_order2, {null_order::AFTER});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected4, got->view());
// Run test for sort and sort_by_key
run_sort_test(input, expected4, column_order2, {null_order::AFTER});
}
-TYPED_TEST(Sort, Stable)
-{
- using T = TypeParam;
- using R = int32_t;
-
- fixed_width_column_wrapper col1({0, 1, 1, 0, 0, 1, 0, 1}, {0, 1, 1, 1, 1, 1, 1, 1});
- strings_column_wrapper col2({"2", "a", "b", "x", "k", "a", "x", "a"}, {1, 1, 1, 1, 0, 1, 1, 1});
-
- fixed_width_column_wrapper expected{{4, 3, 6, 1, 5, 7, 2, 0}};
-
- auto got = stable_sorted_order(table_view({col1, col2}),
- {order::ASCENDING, order::ASCENDING},
- {null_order::AFTER, null_order::BEFORE});
-
- CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());
-}
-
TYPED_TEST(Sort, MisMatchInColumnOrderSize)
{
using T = TypeParam;
@@ -613,7 +602,6 @@ TYPED_TEST(Sort, MisMatchInColumnOrderSize)
std::vector column_order{order::ASCENDING, order::DESCENDING};
EXPECT_THROW(sorted_order(input, column_order), logic_error);
- EXPECT_THROW(stable_sorted_order(input, column_order), logic_error);
EXPECT_THROW(sort(input, column_order), logic_error);
EXPECT_THROW(sort_by_key(input, input, column_order), logic_error);
}
@@ -631,7 +619,6 @@ TYPED_TEST(Sort, MisMatchInNullPrecedenceSize)
std::vector null_precedence{null_order::AFTER, null_order::BEFORE};
EXPECT_THROW(sorted_order(input, column_order, null_precedence), logic_error);
- EXPECT_THROW(stable_sorted_order(input, column_order, null_precedence), logic_error);
EXPECT_THROW(sort(input, column_order, null_precedence), logic_error);
EXPECT_THROW(sort_by_key(input, input, column_order, null_precedence), logic_error);
}
diff --git a/cpp/tests/sort/stable_sort_tests.cpp b/cpp/tests/sort/stable_sort_tests.cpp
new file mode 100644
index 00000000000..f80764e66a3
--- /dev/null
+++ b/cpp/tests/sort/stable_sort_tests.cpp
@@ -0,0 +1,273 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+
+#include
+#include
+
+namespace cudf {
+namespace test {
+void run_stable_sort_test(table_view input,
+ column_view expected_sorted_indices,
+ std::vector column_order = {},
+ std::vector null_precedence = {})
+{
+ auto got_sort_by_key_table = sort_by_key(input, input, column_order, null_precedence);
+ auto expected_sort_by_key_table = gather(input, expected_sorted_indices);
+
+ CUDF_TEST_EXPECT_TABLES_EQUAL(expected_sort_by_key_table->view(), got_sort_by_key_table->view());
+}
+
+using TestTypes = cudf::test::Concat; // include timestamps and durations
+
+template
+struct StableSort : public BaseFixture {
+};
+
+TYPED_TEST_SUITE(StableSort, TestTypes);
+
+TYPED_TEST(StableSort, MixedNullOrder)
+{
+ using T = TypeParam;
+ using R = int32_t;
+
+ fixed_width_column_wrapper col1({0, 1, 1, 0, 0, 1, 0, 1}, {0, 1, 1, 1, 1, 1, 1, 1});
+ strings_column_wrapper col2({"2", "a", "b", "x", "k", "a", "x", "a"}, {1, 1, 1, 1, 0, 1, 1, 1});
+
+ fixed_width_column_wrapper expected{{4, 3, 6, 1, 5, 7, 2, 0}};
+
+ auto got = stable_sorted_order(table_view({col1, col2}),
+ {order::ASCENDING, order::ASCENDING},
+ {null_order::AFTER, null_order::BEFORE});
+
+ CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());
+}
+
+TYPED_TEST(StableSort, WithNullMax)
+{
+ using T = TypeParam;
+
+ fixed_width_column_wrapper col1{{5, 4, 3, 5, 8, 5}, {1, 1, 0, 1, 1, 1}};
+ strings_column_wrapper col2({"d", "e", "a", "d", "k", "d"}, {1, 1, 0, 1, 1, 1});
+ fixed_width_column_wrapper col3{{10, 40, 70, 10, 2, 10}, {1, 1, 0, 1, 1, 1}};
+ table_view input{{col1, col2, col3}};
+
+ fixed_width_column_wrapper expected{{1, 0, 3, 5, 4, 2}};
+ std::vector column_order{order::ASCENDING, order::ASCENDING, order::DESCENDING};
+ std::vector null_precedence{null_order::AFTER, null_order::AFTER, null_order::AFTER};
+
+ auto got = stable_sorted_order(input, column_order, null_precedence);
+
+ if (not std::is_same_v) {
+ CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());
+
+ run_stable_sort_test(input, expected, column_order, null_precedence);
+ } else {
+ // for bools only validate that the null element landed at the back, since
+ // the rest of the values are equivalent and yields random sorted order.
+ auto to_host = [](column_view const& col) {
+ thrust::host_vector h_data(col.size());
+ CUDA_TRY(cudaMemcpy(
+ h_data.data(), col.data(), h_data.size() * sizeof(int32_t), cudaMemcpyDefault));
+ return h_data;
+ };
+ thrust::host_vector h_exp = to_host(expected);
+ thrust::host_vector h_got = to_host(got->view());
+ EXPECT_EQ(h_exp[h_exp.size() - 1], h_got[h_got.size() - 1]);
+
+ fixed_width_column_wrapper expected_for_bool{{0, 3, 5, 1, 4, 2}};
+ run_stable_sort_test(input, expected_for_bool, column_order, null_precedence);
+ }
+}
+
+TYPED_TEST(StableSort, WithNullMin)
+{
+ using T = TypeParam;
+
+ fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}, {1, 1, 0, 1, 1}};
+ strings_column_wrapper col2({"d", "e", "a", "d", "k"}, {1, 1, 0, 1, 1});
+ fixed_width_column_wrapper col3{{10, 40, 70, 10, 2}, {1, 1, 0, 1, 1}};
+ table_view input{{col1, col2, col3}};
+
+ fixed_width_column_wrapper expected{{2, 1, 0, 3, 4}};
+ std::vector column_order{order::ASCENDING, order::ASCENDING, order::DESCENDING};
+
+ auto got = stable_sorted_order(input, column_order);
+
+ if (!std::is_same_v) {
+ CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());
+
+ run_stable_sort_test(input, expected, column_order);
+ } else {
+ // for bools only validate that the null element landed at the front, since
+ // the rest of the values are equivalent and yields random sorted order.
+ auto to_host = [](column_view const& col) {
+ thrust::host_vector h_data(col.size());
+ CUDA_TRY(cudaMemcpy(
+ h_data.data(), col.data(), h_data.size() * sizeof(int32_t), cudaMemcpyDefault));
+ return h_data;
+ };
+ thrust::host_vector h_exp = to_host(expected);
+ thrust::host_vector h_got = to_host(got->view());
+ EXPECT_EQ(h_exp.front(), h_got.front());
+
+ fixed_width_column_wrapper expected_for_bool{{2, 0, 3, 1, 4}};
+ run_stable_sort_test(input, expected_for_bool, column_order);
+ }
+}
+
+TYPED_TEST(StableSort, WithAllValid)
+{
+ using T = TypeParam;
+
+ fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}};
+ strings_column_wrapper col2({"d", "e", "a", "d", "k"});
+ fixed_width_column_wrapper col3{{10, 40, 70, 10, 2}};
+ table_view input{{col1, col2, col3}};
+
+ fixed_width_column_wrapper expected{{2, 1, 0, 3, 4}};
+ std::vector column_order{order::ASCENDING, order::ASCENDING, order::DESCENDING};
+
+ auto got = stable_sorted_order(input, column_order);
+
+ // Skip validating bools order. Valid true bools are all
+ // equivalent, and yield random order after thrust::sort
+ if (!std::is_same_v) {
+ CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());
+
+ run_stable_sort_test(input, expected, column_order);
+ } else {
+ fixed_width_column_wrapper expected_for_bool{{2, 0, 3, 1, 4}};
+ run_stable_sort_test(input, expected_for_bool, column_order);
+ }
+}
+
+TYPED_TEST(StableSort, MisMatchInColumnOrderSize)
+{
+ using T = TypeParam;
+
+ fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}};
+ strings_column_wrapper col2({"d", "e", "a", "d", "k"});
+ fixed_width_column_wrapper col3{{10, 40, 70, 5, 2}};
+ table_view input{{col1, col2, col3}};
+
+ std::vector column_order{order::ASCENDING, order::DESCENDING};
+
+ EXPECT_THROW(stable_sorted_order(input, column_order), logic_error);
+ EXPECT_THROW(stable_sort_by_key(input, input, column_order), logic_error);
+}
+
+TYPED_TEST(StableSort, MisMatchInNullPrecedenceSize)
+{
+ using T = TypeParam;
+
+ fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}};
+ strings_column_wrapper col2({"d", "e", "a", "d", "k"});
+ fixed_width_column_wrapper col3{{10, 40, 70, 5, 2}};
+ table_view input{{col1, col2, col3}};
+
+ std::vector column_order{order::ASCENDING, order::DESCENDING, order::DESCENDING};
+ std::vector null_precedence{null_order::AFTER, null_order::BEFORE};
+
+ EXPECT_THROW(stable_sorted_order(input, column_order, null_precedence), logic_error);
+ EXPECT_THROW(stable_sort_by_key(input, input, column_order, null_precedence), logic_error);
+}
+
+TYPED_TEST(StableSort, ZeroSizedColumns)
+{
+ using T = TypeParam;
+
+ fixed_width_column_wrapper col1{};
+ table_view input{{col1}};
+
+ fixed_width_column_wrapper expected{};
+ std::vector column_order{order::ASCENDING};
+
+ auto got = stable_sorted_order(input, column_order);
+
+ CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());
+
+ run_stable_sort_test(input, expected, column_order);
+}
+
+struct StableSortByKey : public BaseFixture {
+};
+
+TEST_F(StableSortByKey, ValueKeysSizeMismatch)
+{
+ using T = int64_t;
+
+ fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}};
+ strings_column_wrapper col2({"d", "e", "a", "d", "k"});
+ fixed_width_column_wrapper col3{{10, 40, 70, 5, 2}};
+ table_view values{{col1, col2, col3}};
+
+ fixed_width_column_wrapper key_col{{5, 4, 3, 5}};
+ table_view keys{{key_col}};
+
+ EXPECT_THROW(stable_sort_by_key(values, keys), logic_error);
+}
+
+template
+struct StableSortFixedPoint : public cudf::test::BaseFixture {
+};
+
+template
+using wrapper = cudf::test::fixed_width_column_wrapper;
+TYPED_TEST_SUITE(StableSortFixedPoint, cudf::test::FixedPointTypes);
+
+TYPED_TEST(StableSortFixedPoint, FixedPointSortedOrderGather)
+{
+ using namespace numeric;
+ using decimalXX = TypeParam;
+
+ auto const ZERO = decimalXX{0, scale_type{0}};
+ auto const ONE = decimalXX{1, scale_type{0}};
+ auto const TWO = decimalXX{2, scale_type{0}};
+ auto const THREE = decimalXX{3, scale_type{0}};
+ auto const FOUR = decimalXX{4, scale_type{0}};
+
+ auto const input_vec = std::vector{THREE, TWO, ONE, ZERO, FOUR, THREE};
+ auto const index_vec = std::vector{3, 2, 1, 0, 5, 4};
+ auto const sorted_vec = std::vector{ZERO, ONE, TWO, THREE, THREE, FOUR};
+
+ auto const input_col = wrapper(input_vec.begin(), input_vec.end());
+ auto const index_col = wrapper(index_vec.begin(), index_vec.end());
+ auto const sorted_col = wrapper(sorted_vec.begin(), sorted_vec.end());
+
+ auto const sorted_table = cudf::table_view{{sorted_col}};
+ auto const input_table = cudf::table_view{{input_col}};
+
+ auto const indices = cudf::sorted_order(input_table);
+ auto const sorted = cudf::gather(input_table, indices->view());
+
+ CUDF_TEST_EXPECT_COLUMNS_EQUAL(index_col, indices->view());
+ CUDF_TEST_EXPECT_TABLES_EQUAL(sorted_table, sorted->view());
+}
+
+} // namespace test
+} // namespace cudf