Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor cudf::detail::sorted_order #13062

Merged
merged 16 commits into from
Apr 12, 2023
7 changes: 4 additions & 3 deletions cpp/src/sort/sort.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,15 +14,16 @@
* limitations under the License.
*/

#include <sort/sort_impl.cuh>
ttnghia marked this conversation as resolved.
Show resolved Hide resolved

#include <cudf/column/column.hpp>
#include <cudf/detail/gather.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/sorting.hpp>
#include <cudf/sorting.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <sort/sort_impl.cuh>

#include <rmm/cuda_stream_view.hpp>

#include <thrust/functional.h>
Expand Down
87 changes: 69 additions & 18 deletions cpp/src/sort/sort_column_impl.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -16,14 +16,68 @@

#pragma once

#include <sort/sort_impl.cuh>
#include <cudf/column/column_device_view.cuh>
#include <cudf/table/experimental/row_operators.cuh>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/traits.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/sequence.h>
#include <thrust/sort.h>

namespace cudf {
namespace detail {

/**
* @brief Sort indices of a single column.
*
* This API offers fast sorting for primitive types. It cannot handle nested types and will not
* consider `NaN` as equivalent to other `NaN`.
*
* @tparam stable Whether to use stable sort
* @param input Column to sort. The column data is not modified.
* @param column_order Ascending or descending sort order
* @param null_precedence How null rows are to be ordered
* @param stable True if sort should be stable
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return Sorted indices for the input column.
*/
template <bool stable>
std::unique_ptr<column> sorted_order(column_view const& input,
order column_order,
null_order null_precedence,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);
vyasr marked this conversation as resolved.
Show resolved Hide resolved

/**
* @brief Comparator functor needed for single column sort.
*
* @tparam Column element type.
*/
template <typename T>
struct simple_comparator {
Comment on lines +60 to +61
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is moved from sort_impl.cuh.

__device__ bool operator()(size_type lhs, size_type rhs)
{
if (has_nulls) {
bool lhs_null{d_column.is_null(lhs)};
bool rhs_null{d_column.is_null(rhs)};
if (lhs_null || rhs_null) {
if (!ascending) thrust::swap(lhs_null, rhs_null);
return (null_precedence == cudf::null_order::BEFORE ? !rhs_null : !lhs_null);
}
}
return relational_compare(d_column.element<T>(lhs), d_column.element<T>(rhs)) ==
(ascending ? weak_ordering::LESS : weak_ordering::GREATER);
}
column_device_view const d_column;
bool has_nulls;
bool ascending;
null_order null_precedence{};
};

template <bool stable>
struct column_sorted_order_fn {
/**
Expand Down Expand Up @@ -63,34 +117,29 @@ struct column_sorted_order_fn {
// But this also requires making a copy of the input data.
auto temp_col = column(input, stream);
auto d_col = temp_col.mutable_view();
if (ascending) {

auto const do_sort = [&](auto const comp) {
// Compiling `thrust::*sort*` APIs is expensive.
// Thus, we should optimize that by using constexpr condition to only compile what we need.
if constexpr (stable) {
thrust::stable_sort_by_key(rmm::exec_policy(stream),
d_col.begin<T>(),
d_col.end<T>(),
indices.begin<size_type>(),
thrust::less<T>());
comp);
} else {
thrust::sort_by_key(rmm::exec_policy(stream),
d_col.begin<T>(),
d_col.end<T>(),
indices.begin<size_type>(),
thrust::less<T>());
comp);
}
};

if (ascending) {
do_sort(thrust::less<T>{});
} else {
if constexpr (stable) {
thrust::stable_sort_by_key(rmm::exec_policy(stream),
d_col.begin<T>(),
d_col.end<T>(),
indices.begin<size_type>(),
thrust::greater<T>());
} else {
thrust::sort_by_key(rmm::exec_policy(stream),
d_col.begin<T>(),
d_col.end<T>(),
indices.begin<size_type>(),
thrust::greater<T>());
}
do_sort(thrust::greater<T>{});
}
}

Expand All @@ -114,6 +163,8 @@ struct column_sorted_order_fn {
{
auto keys = column_device_view::create(input, stream);
auto comp = simple_comparator<T>{*keys, input.has_nulls(), ascending, null_precedence};
// Compiling `thrust::*sort*` APIs is expensive.
// Thus, we should optimize that by using constexpr condition to only compile what we need.
if constexpr (stable) {
thrust::stable_sort(
rmm::exec_policy(stream), indices.begin<size_type>(), indices.end<size_type>(), comp);
Expand Down
98 changes: 18 additions & 80 deletions cpp/src/sort/sort_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,76 +16,21 @@

#pragma once

#include <cudf/column/column_factories.hpp>
#include <cudf/detail/gather.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/table/experimental/row_operators.cuh>
#include <cudf/table/table_device_view.cuh>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/traits.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>
#include <sort/sort_column_impl.cuh>
ttnghia marked this conversation as resolved.
Show resolved Hide resolved

#include <thrust/sequence.h>
#include <thrust/sort.h>
#include <thrust/swap.h>
#include <cudf/column/column_factories.hpp>

namespace cudf {
namespace detail {

/**
* @brief Comparator functor needed for single column sort.
*
* @tparam Column element type.
*/
template <typename T>
struct simple_comparator {
__device__ bool operator()(size_type lhs, size_type rhs)
{
if (has_nulls) {
bool lhs_null{d_column.is_null(lhs)};
bool rhs_null{d_column.is_null(rhs)};
if (lhs_null || rhs_null) {
if (!ascending) thrust::swap(lhs_null, rhs_null);
return (null_precedence == cudf::null_order::BEFORE ? !rhs_null : !lhs_null);
}
}
return relational_compare(d_column.element<T>(lhs), d_column.element<T>(rhs)) ==
(ascending ? weak_ordering::LESS : weak_ordering::GREATER);
}
column_device_view const d_column;
bool has_nulls;
bool ascending;
null_order null_precedence{};
};

/**
* @brief Sort indices of a single column.
*
* @param input Column to sort. The column data is not modified.
* @param column_order Ascending or descending sort order
* @param null_precedence How null rows are to be ordered
* @param stable True if sort should be stable
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return Sorted indices for the input column.
*/
template <bool stable>
std::unique_ptr<column> sorted_order(column_view const& input,
order column_order,
null_order null_precedence,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

/**
* @copydoc
* sorted_order(table_view&,std::vector<order>,std::vector<null_order>,rmm::mr::device_memory_resource*)
*
* @tparam stable Whether to use stable sort
* @param stream CUDA stream used for device memory operations and kernel launches
*/
template <bool stable = false>
template <bool stable>
std::unique_ptr<column> sorted_order(table_view input,
std::vector<order> const& column_order,
std::vector<null_order> const& null_precedence,
Expand All @@ -111,8 +56,7 @@ std::unique_ptr<column> sorted_order(table_view input,
auto const single_col = input.column(0);
auto const col_order = column_order.empty() ? order::ASCENDING : column_order.front();
auto const null_prec = null_precedence.empty() ? null_order::BEFORE : null_precedence.front();
return stable ? sorted_order<true>(single_col, col_order, null_prec, stream, mr)
: sorted_order<false>(single_col, col_order, null_prec, stream, mr);
return sorted_order<stable>(single_col, col_order, null_prec, stream, mr);
}

std::unique_ptr<column> sorted_indices = cudf::make_numeric_column(
Expand All @@ -123,11 +67,10 @@ std::unique_ptr<column> sorted_order(table_view input,
mutable_indices_view.end<size_type>(),
0);

auto comp =
experimental::row::lexicographic::self_comparator(input, column_order, null_precedence, stream);
if (cudf::detail::has_nested_columns(input)) {
auto comparator = comp.less<true>(nullate::DYNAMIC{has_nested_nulls(input)});
if (stable) {
auto const do_sort = [&](auto const comparator) {
// Compiling `thrust::*sort*` APIs is expensive.
// Thus, we should optimize that by using constexpr condition to only compile what we need.
if constexpr (stable) {
thrust::stable_sort(rmm::exec_policy(stream),
mutable_indices_view.begin<size_type>(),
mutable_indices_view.end<size_type>(),
Expand All @@ -138,22 +81,17 @@ std::unique_ptr<column> sorted_order(table_view input,
mutable_indices_view.end<size_type>(),
comparator);
}
};

auto const comp = cudf::experimental::row::lexicographic::self_comparator(
input, column_order, null_precedence, stream);
if (cudf::detail::has_nested_columns(input)) {
auto const comparator = comp.less<true>(nullate::DYNAMIC{has_nested_nulls(input)});
do_sort(comparator);
} else {
auto comparator = comp.less<false>(nullate::DYNAMIC{has_nested_nulls(input)});
if (stable) {
thrust::stable_sort(rmm::exec_policy(stream),
mutable_indices_view.begin<size_type>(),
mutable_indices_view.end<size_type>(),
comparator);
} else {
thrust::sort(rmm::exec_policy(stream),
mutable_indices_view.begin<size_type>(),
mutable_indices_view.end<size_type>(),
comparator);
}
auto const comparator = comp.less<false>(nullate::DYNAMIC{has_nested_nulls(input)});
do_sort(comparator);
}
// protection for temporary d_column_order and d_null_precedence
stream.synchronize();

return sorted_indices;
}
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/sort/stable_sort.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,9 +14,10 @@
* limitations under the License.
*/

#include "sort_impl.cuh"
#include <sort/sort_impl.cuh>
ttnghia marked this conversation as resolved.
Show resolved Hide resolved

#include <cudf/column/column.hpp>
#include <cudf/detail/gather.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/sorting.hpp>
#include <cudf/sorting.hpp>
Expand Down