Skip to content

Commit

Permalink
Add cudf::stable_sort_by_key (#10387)
Browse files Browse the repository at this point in the history
This PR adds a new `stable_sort_by_key` API into libcudf. The new API is helpful to simplify Cython/JNI bindings of `drop_duplicates` (#10370).

Authors:
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - David Wendt (https://github.com/davidwendt)

URL: #10387
  • Loading branch information
PointKernel authored Mar 7, 2022
1 parent a584cdc commit 4f8c60a
Show file tree
Hide file tree
Showing 6 changed files with 371 additions and 33 deletions.
15 changes: 14 additions & 1 deletion cpp/include/cudf/detail/sorting.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -63,6 +63,19 @@ std::unique_ptr<table> sort_by_key(
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::stable_sort_by_key
*
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<table> stable_sort_by_key(
table_view const& values,
table_view const& keys,
std::vector<order> const& column_order = {},
std::vector<null_order> const& null_precedence = {},
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::segmented_sorted_order
*
Expand Down
32 changes: 31 additions & 1 deletion cpp/include/cudf/sorting.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -145,6 +145,36 @@ std::unique_ptr<table> sort_by_key(
std::vector<null_order> const& null_precedence = {},
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Performs a key-value stable sort.
*
* Creates a new table that reorders the rows of `values` according to the
* lexicographic ordering of the rows of `keys`.
*
* The order of equivalent elements is guaranteed to be preserved.
*
* @throws cudf::logic_error if `values.num_rows() != keys.num_rows()`.
*
* @param values The table to reorder
* @param keys The table that determines the ordering
* @param column_order The desired order for each column in `keys`. Size must be
* equal to `keys.num_columns()` or empty. If empty, all columns are sorted in
* ascending order.
* @param null_precedence The desired order of a null element compared to other
* elements for each column in `keys`. Size must be equal to
* `keys.num_columns()` or empty. If empty, all columns will be sorted with
* `null_order::BEFORE`.
* @param mr Device memory resource used to allocate the returned table's device memory
* @return The reordering of `values` determined by the lexicographic order of
* the rows of `keys`.
*/
std::unique_ptr<table> stable_sort_by_key(
table_view const& values,
table_view const& keys,
std::vector<order> const& column_order = {},
std::vector<null_order> const& null_precedence = {},
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Computes the ranks of input column in sorted order.
*
Expand Down
34 changes: 33 additions & 1 deletion cpp/src/sort/stable_sort.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,6 +17,7 @@
#include "sort_impl.cuh"

#include <cudf/column/column.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/sorting.hpp>
#include <cudf/sorting.hpp>
#include <cudf/table/table_view.hpp>
Expand All @@ -34,6 +35,26 @@ std::unique_ptr<column> stable_sorted_order(table_view const& input,
return sorted_order<true>(input, column_order, null_precedence, stream, mr);
}

std::unique_ptr<table> stable_sort_by_key(table_view const& values,
table_view const& keys,
std::vector<order> const& column_order,
std::vector<null_order> const& null_precedence,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_EXPECTS(values.num_rows() == keys.num_rows(),
"Mismatch in number of rows for values and keys");

auto sorted_order = detail::stable_sorted_order(
keys, column_order, null_precedence, stream, rmm::mr::get_current_device_resource());

return detail::gather(values,
sorted_order->view(),
out_of_bounds_policy::DONT_CHECK,
detail::negative_index_policy::NOT_ALLOWED,
stream,
mr);
}
} // namespace detail

std::unique_ptr<column> stable_sorted_order(table_view const& input,
Expand All @@ -45,4 +66,15 @@ std::unique_ptr<column> stable_sorted_order(table_view const& input,
input, column_order, null_precedence, rmm::cuda_stream_default, mr);
}

std::unique_ptr<table> stable_sort_by_key(table_view const& values,
table_view const& keys,
std::vector<order> const& column_order,
std::vector<null_order> const& null_precedence,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::stable_sort_by_key(
values, keys, column_order, null_precedence, rmm::cuda_stream_default, mr);
}

} // namespace cudf
5 changes: 4 additions & 1 deletion cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,10 @@ endif()

# ##################################################################################################
# * sort tests ------------------------------------------------------------------------------------
ConfigureTest(SORT_TEST sort/segmented_sort_tests.cpp sort/sort_test.cpp sort/rank_test.cpp)
ConfigureTest(
SORT_TEST sort/segmented_sort_tests.cpp sort/sort_test.cpp sort/stable_sort_tests.cpp
sort/rank_test.cpp
)

# ##################################################################################################
# * copying tests ---------------------------------------------------------------------------------
Expand Down
45 changes: 16 additions & 29 deletions cpp/tests/sort/sort_test.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -20,14 +20,12 @@
#include <cudf_test/table_utilities.hpp>
#include <cudf_test/type_lists.hpp>

#include <cudf/column/column_factories.hpp>
#include <cudf/copying.hpp>
#include <cudf/fixed_point/fixed_point.hpp>
#include <cudf/sorting.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/type_dispatcher.hpp>

#include <type_traits>
#include <vector>

namespace cudf {
Expand All @@ -50,10 +48,8 @@ void run_sort_test(table_view input,
CUDF_TEST_EXPECT_TABLES_EQUAL(expected_sort_by_key_table->view(), got_sort_by_key_table->view());
}

using TestTypes = cudf::test::Concat<cudf::test::IntegralTypesNotBool,
cudf::test::FloatingPointTypes,
cudf::test::DurationTypes,
cudf::test::TimestampTypes>;
using TestTypes = cudf::test::Concat<cudf::test::NumericTypes, // include integers, floats and bool
cudf::test::ChronoTypes>; // include timestamps and durations

template <typename T>
struct Sort : public BaseFixture {
Expand Down Expand Up @@ -555,7 +551,12 @@ TYPED_TEST(Sort, WithStructColumnCombinationsWithoutNulls)
std::vector<order> column_order{order::DESCENDING};

// desc_nulls_first
fixed_width_column_wrapper<int32_t> expected1{{3, 5, 6, 7, 2, 4, 1, 0}};
auto const expected1 = []() {
if constexpr (std::is_same_v<T, bool>) {
return fixed_width_column_wrapper<int32_t>{{3, 5, 6, 7, 1, 2, 4, 0}};
}
return fixed_width_column_wrapper<int32_t>{{3, 5, 6, 7, 2, 4, 1, 0}};
}();
auto got = sorted_order(input, column_order, {null_order::AFTER});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, got->view());
// Run test for sort and sort_by_key
Expand All @@ -577,30 +578,18 @@ TYPED_TEST(Sort, WithStructColumnCombinationsWithoutNulls)
run_sort_test(input, expected3, column_order2, {null_order::BEFORE});

// asce_nulls_last
fixed_width_column_wrapper<int32_t> expected4{{0, 1, 2, 4, 7, 6, 3, 5}};
auto const expected4 = []() {
if constexpr (std::is_same_v<T, bool>) {
return fixed_width_column_wrapper<int32_t>{{0, 2, 4, 1, 7, 6, 3, 5}};
}
return fixed_width_column_wrapper<int32_t>{{0, 1, 2, 4, 7, 6, 3, 5}};
}();
got = sorted_order(input, column_order2, {null_order::AFTER});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected4, got->view());
// Run test for sort and sort_by_key
run_sort_test(input, expected4, column_order2, {null_order::AFTER});
}

TYPED_TEST(Sort, Stable)
{
using T = TypeParam;
using R = int32_t;

fixed_width_column_wrapper<T> col1({0, 1, 1, 0, 0, 1, 0, 1}, {0, 1, 1, 1, 1, 1, 1, 1});
strings_column_wrapper col2({"2", "a", "b", "x", "k", "a", "x", "a"}, {1, 1, 1, 1, 0, 1, 1, 1});

fixed_width_column_wrapper<R> expected{{4, 3, 6, 1, 5, 7, 2, 0}};

auto got = stable_sorted_order(table_view({col1, col2}),
{order::ASCENDING, order::ASCENDING},
{null_order::AFTER, null_order::BEFORE});

CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());
}

TYPED_TEST(Sort, MisMatchInColumnOrderSize)
{
using T = TypeParam;
Expand All @@ -613,7 +602,6 @@ TYPED_TEST(Sort, MisMatchInColumnOrderSize)
std::vector<order> column_order{order::ASCENDING, order::DESCENDING};

EXPECT_THROW(sorted_order(input, column_order), logic_error);
EXPECT_THROW(stable_sorted_order(input, column_order), logic_error);
EXPECT_THROW(sort(input, column_order), logic_error);
EXPECT_THROW(sort_by_key(input, input, column_order), logic_error);
}
Expand All @@ -631,7 +619,6 @@ TYPED_TEST(Sort, MisMatchInNullPrecedenceSize)
std::vector<null_order> null_precedence{null_order::AFTER, null_order::BEFORE};

EXPECT_THROW(sorted_order(input, column_order, null_precedence), logic_error);
EXPECT_THROW(stable_sorted_order(input, column_order, null_precedence), logic_error);
EXPECT_THROW(sort(input, column_order, null_precedence), logic_error);
EXPECT_THROW(sort_by_key(input, input, column_order, null_precedence), logic_error);
}
Expand Down
Loading

0 comments on commit 4f8c60a

Please sign in to comment.