Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cudf::stable_sort_by_key #10387

Merged
merged 9 commits into from
Mar 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion cpp/include/cudf/detail/sorting.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -63,6 +63,19 @@ std::unique_ptr<table> sort_by_key(
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::stable_sort_by_key
*
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<table> stable_sort_by_key(
table_view const& values,
table_view const& keys,
std::vector<order> const& column_order = {},
std::vector<null_order> const& null_precedence = {},
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::segmented_sorted_order
*
Expand Down
32 changes: 31 additions & 1 deletion cpp/include/cudf/sorting.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -145,6 +145,36 @@ std::unique_ptr<table> sort_by_key(
std::vector<null_order> const& null_precedence = {},
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Performs a key-value stable sort.
*
* Creates a new table that reorders the rows of `values` according to the
* lexicographic ordering of the rows of `keys`.
*
* The order of equivalent elements is guaranteed to be preserved.
*
* @throws cudf::logic_error if `values.num_rows() != keys.num_rows()`.
*
* @param values The table to reorder
* @param keys The table that determines the ordering
* @param column_order The desired order for each column in `keys`. Size must be
* equal to `keys.num_columns()` or empty. If empty, all columns are sorted in
* ascending order.
* @param null_precedence The desired order of a null element compared to other
* elements for each column in `keys`. Size must be equal to
* `keys.num_columns()` or empty. If empty, all columns will be sorted with
* `null_order::BEFORE`.
* @param mr Device memory resource used to allocate the returned table's device memory
* @return The reordering of `values` determined by the lexicographic order of
* the rows of `keys`.
*/
std::unique_ptr<table> stable_sort_by_key(
table_view const& values,
table_view const& keys,
std::vector<order> const& column_order = {},
std::vector<null_order> const& null_precedence = {},
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Computes the ranks of input column in sorted order.
*
Expand Down
34 changes: 33 additions & 1 deletion cpp/src/sort/stable_sort.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,6 +17,7 @@
#include "sort_impl.cuh"

#include <cudf/column/column.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/sorting.hpp>
#include <cudf/sorting.hpp>
#include <cudf/table/table_view.hpp>
Expand All @@ -34,6 +35,26 @@ std::unique_ptr<column> stable_sorted_order(table_view const& input,
return sorted_order<true>(input, column_order, null_precedence, stream, mr);
}

std::unique_ptr<table> stable_sort_by_key(table_view const& values,
table_view const& keys,
std::vector<order> const& column_order,
std::vector<null_order> const& null_precedence,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_EXPECTS(values.num_rows() == keys.num_rows(),
"Mismatch in number of rows for values and keys");

auto sorted_order = detail::stable_sorted_order(
keys, column_order, null_precedence, stream, rmm::mr::get_current_device_resource());

return detail::gather(values,
sorted_order->view(),
out_of_bounds_policy::DONT_CHECK,
detail::negative_index_policy::NOT_ALLOWED,
stream,
mr);
}
} // namespace detail

std::unique_ptr<column> stable_sorted_order(table_view const& input,
Expand All @@ -45,4 +66,15 @@ std::unique_ptr<column> stable_sorted_order(table_view const& input,
input, column_order, null_precedence, rmm::cuda_stream_default, mr);
}

std::unique_ptr<table> stable_sort_by_key(table_view const& values,
table_view const& keys,
std::vector<order> const& column_order,
std::vector<null_order> const& null_precedence,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::stable_sort_by_key(
values, keys, column_order, null_precedence, rmm::cuda_stream_default, mr);
}

} // namespace cudf
5 changes: 4 additions & 1 deletion cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,10 @@ endif()

# ##################################################################################################
# * sort tests ------------------------------------------------------------------------------------
ConfigureTest(SORT_TEST sort/segmented_sort_tests.cpp sort/sort_test.cpp sort/rank_test.cpp)
ConfigureTest(
SORT_TEST sort/segmented_sort_tests.cpp sort/sort_test.cpp sort/stable_sort_tests.cpp
sort/rank_test.cpp
)

# ##################################################################################################
# * copying tests ---------------------------------------------------------------------------------
Expand Down
45 changes: 16 additions & 29 deletions cpp/tests/sort/sort_test.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -20,14 +20,12 @@
#include <cudf_test/table_utilities.hpp>
#include <cudf_test/type_lists.hpp>

#include <cudf/column/column_factories.hpp>
#include <cudf/copying.hpp>
#include <cudf/fixed_point/fixed_point.hpp>
#include <cudf/sorting.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/type_dispatcher.hpp>

#include <type_traits>
#include <vector>

namespace cudf {
Expand All @@ -50,10 +48,8 @@ void run_sort_test(table_view input,
CUDF_TEST_EXPECT_TABLES_EQUAL(expected_sort_by_key_table->view(), got_sort_by_key_table->view());
}

using TestTypes = cudf::test::Concat<cudf::test::IntegralTypesNotBool,
cudf::test::FloatingPointTypes,
cudf::test::DurationTypes,
cudf::test::TimestampTypes>;
using TestTypes = cudf::test::Concat<cudf::test::NumericTypes, // include integers, floats and bool
cudf::test::ChronoTypes>; // include timestamps and durations

template <typename T>
struct Sort : public BaseFixture {
Expand Down Expand Up @@ -555,7 +551,12 @@ TYPED_TEST(Sort, WithStructColumnCombinationsWithoutNulls)
std::vector<order> column_order{order::DESCENDING};

// desc_nulls_first
fixed_width_column_wrapper<int32_t> expected1{{3, 5, 6, 7, 2, 4, 1, 0}};
auto const expected1 = []() {
if constexpr (std::is_same_v<T, bool>) {
return fixed_width_column_wrapper<int32_t>{{3, 5, 6, 7, 1, 2, 4, 0}};
}
return fixed_width_column_wrapper<int32_t>{{3, 5, 6, 7, 2, 4, 1, 0}};
}();
auto got = sorted_order(input, column_order, {null_order::AFTER});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, got->view());
// Run test for sort and sort_by_key
Expand All @@ -577,30 +578,18 @@ TYPED_TEST(Sort, WithStructColumnCombinationsWithoutNulls)
run_sort_test(input, expected3, column_order2, {null_order::BEFORE});

// asce_nulls_last
fixed_width_column_wrapper<int32_t> expected4{{0, 1, 2, 4, 7, 6, 3, 5}};
auto const expected4 = []() {
if constexpr (std::is_same_v<T, bool>) {
return fixed_width_column_wrapper<int32_t>{{0, 2, 4, 1, 7, 6, 3, 5}};
}
return fixed_width_column_wrapper<int32_t>{{0, 1, 2, 4, 7, 6, 3, 5}};
}();
got = sorted_order(input, column_order2, {null_order::AFTER});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected4, got->view());
// Run test for sort and sort_by_key
run_sort_test(input, expected4, column_order2, {null_order::AFTER});
}

TYPED_TEST(Sort, Stable)
{
using T = TypeParam;
using R = int32_t;

fixed_width_column_wrapper<T> col1({0, 1, 1, 0, 0, 1, 0, 1}, {0, 1, 1, 1, 1, 1, 1, 1});
strings_column_wrapper col2({"2", "a", "b", "x", "k", "a", "x", "a"}, {1, 1, 1, 1, 0, 1, 1, 1});

fixed_width_column_wrapper<R> expected{{4, 3, 6, 1, 5, 7, 2, 0}};

auto got = stable_sorted_order(table_view({col1, col2}),
{order::ASCENDING, order::ASCENDING},
{null_order::AFTER, null_order::BEFORE});

CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());
}

TYPED_TEST(Sort, MisMatchInColumnOrderSize)
{
using T = TypeParam;
Expand All @@ -613,7 +602,6 @@ TYPED_TEST(Sort, MisMatchInColumnOrderSize)
std::vector<order> column_order{order::ASCENDING, order::DESCENDING};

EXPECT_THROW(sorted_order(input, column_order), logic_error);
EXPECT_THROW(stable_sorted_order(input, column_order), logic_error);
EXPECT_THROW(sort(input, column_order), logic_error);
EXPECT_THROW(sort_by_key(input, input, column_order), logic_error);
}
Expand All @@ -631,7 +619,6 @@ TYPED_TEST(Sort, MisMatchInNullPrecedenceSize)
std::vector<null_order> null_precedence{null_order::AFTER, null_order::BEFORE};

EXPECT_THROW(sorted_order(input, column_order, null_precedence), logic_error);
EXPECT_THROW(stable_sorted_order(input, column_order, null_precedence), logic_error);
EXPECT_THROW(sort(input, column_order, null_precedence), logic_error);
EXPECT_THROW(sort_by_key(input, input, column_order, null_precedence), logic_error);
}
Expand Down
Loading