Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-24.04' into cuco-set-gr…
Browse files Browse the repository at this point in the history
…oupby
  • Loading branch information
PointKernel committed Feb 29, 2024
2 parents 8bade44 + efc4edf commit 6e54cd9
Show file tree
Hide file tree
Showing 16 changed files with 310 additions and 161 deletions.
3 changes: 2 additions & 1 deletion ci/build_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

set -euo pipefail

export RAPIDS_VERSION_NUMBER="$(rapids-generate-version)"

rapids-logger "Create test conda environment"
. /opt/conda/etc/profile.d/conda.sh

Expand All @@ -27,7 +29,6 @@ rapids-mamba-retry install \
--channel "${PYTHON_CHANNEL}" \
libcudf cudf dask-cudf

export RAPIDS_VERSION_NUMBER="24.04"
export RAPIDS_DOCS_DIR="$(mktemp -d)"

rapids-logger "Build CPP docs"
Expand Down
1 change: 0 additions & 1 deletion ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ for FILE in .github/workflows/*.yaml; do
sed_runner "/shared-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}"
sed_runner "s/dask-cuda.git@branch-[^\"\s]\+/dask-cuda.git@branch-${NEXT_SHORT_TAG}/g" ${FILE};
done
sed_runner "s/RAPIDS_VERSION_NUMBER=\".*/RAPIDS_VERSION_NUMBER=\"${NEXT_SHORT_TAG}\"/g" ci/build_docs.sh

# Java files
NEXT_FULL_JAVA_TAG="${NEXT_SHORT_TAG}.${PATCH_PEP440}-SNAPSHOT"
Expand Down
4 changes: 1 addition & 3 deletions ci/test_cpp_memcheck.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@ source ./ci/test_cpp_common.sh

rapids-logger "Memcheck gtests with rmm_mode=cuda"

./ci/run_cudf_memcheck_ctests.sh \
--gtest_output=xml:"${RAPIDS_TESTS_DIR}${test_name}.xml" \
&& EXITCODE=$? || EXITCODE=$?;
./ci/run_cudf_memcheck_ctests.sh && EXITCODE=$? || EXITCODE=$?;

rapids-logger "Test script exiting with value: $EXITCODE"
exit ${EXITCODE}
13 changes: 12 additions & 1 deletion cpp/include/cudf/detail/sorting.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -150,5 +150,16 @@ std::unique_ptr<table> sort(table_view const& values,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

/**
* @copydoc cudf::stable_sort
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<table> stable_sort(table_view const& values,
std::vector<order> const& column_order,
std::vector<null_order> const& null_precedence,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

} // namespace detail
} // namespace cudf
35 changes: 14 additions & 21 deletions cpp/include/cudf/sorting.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -115,6 +115,18 @@ std::unique_ptr<table> sort(
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Performs a stable lexicographic sort of the rows of a table
*
* @copydoc cudf::sort
*/
std::unique_ptr<table> stable_sort(
table_view const& input,
std::vector<order> const& column_order = {},
std::vector<null_order> const& null_precedence = {},
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Performs a key-value sort.
*
Expand Down Expand Up @@ -148,26 +160,7 @@ std::unique_ptr<table> sort_by_key(
/**
* @brief Performs a key-value stable sort.
*
* Creates a new table that reorders the rows of `values` according to the
* lexicographic ordering of the rows of `keys`.
*
* The order of equivalent elements is guaranteed to be preserved.
*
* @throws cudf::logic_error if `values.num_rows() != keys.num_rows()`.
*
* @param values The table to reorder
* @param keys The table that determines the ordering
* @param column_order The desired order for each column in `keys`. Size must be
* equal to `keys.num_columns()` or empty. If empty, all columns are sorted in
* ascending order.
* @param null_precedence The desired order of a null element compared to other
* elements for each column in `keys`. Size must be equal to
* `keys.num_columns()` or empty. If empty, all columns will be sorted with
* `null_order::BEFORE`.
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned table's device memory
* @return The reordering of `values` determined by the lexicographic order of
* the rows of `keys`.
* @copydoc cudf::sort_by_key
*/
std::unique_ptr<table> stable_sort_by_key(
table_view const& values,
Expand Down
17 changes: 7 additions & 10 deletions cpp/src/join/distinct_hash_join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -205,27 +205,24 @@ CUDF_KERNEL void distinct_join_probe_kernel(Iter iter,
cudf::size_type buffer_size = 0;

while (idx - block.thread_rank() < n) { // the whole thread block falls into the same iteration
cudf::size_type thread_count{0};
cudf::size_type build_idx{0};
if (idx < n) {
auto const found = hash_table.find(*(iter + idx));
thread_count = found != hash_table.end();
build_idx = static_cast<cudf::size_type>(found->second);
}
auto const found = idx < n ? hash_table.find(*(iter + idx)) : hash_table.end();
auto const has_match = found != hash_table.end();

// Use a whole-block scan to calculate the output location
cudf::size_type offset;
cudf::size_type block_count;
block_scan(block_scan_temp_storage).ExclusiveSum(thread_count, offset, block_count);
block_scan(block_scan_temp_storage)
.ExclusiveSum(static_cast<cudf::size_type>(has_match), offset, block_count);

if (buffer_size + block_count > buffer_capacity) {
flush_buffer(block, buffer_size, buffer, counter, build_indices, probe_indices);
block.sync();
buffer_size = 0;
}

if (thread_count == 1) {
buffer[buffer_size + offset] = cuco::pair{build_idx, static_cast<cudf::size_type>(idx)};
if (has_match) {
buffer[buffer_size + offset] = cuco::pair{static_cast<cudf::size_type>(found->second),
static_cast<cudf::size_type>(idx)};
}
buffer_size += block_count;
block.sync();
Expand Down
101 changes: 101 additions & 0 deletions cpp/src/sort/common_sort_impl.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cudf/column/column_device_view.cuh>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/traits.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/sort.h>

namespace cudf {
namespace detail {

/**
* @brief The enum specifying which sorting method to use (stable or unstable).
*/
enum class sort_method : bool { STABLE, UNSTABLE };

/**
* @brief Functor performs a fast-path, in-place sort on eligible columns
*
* @tparam method Whether to use a stable or unstable sort.
*/
template <sort_method method>
struct inplace_column_sort_fn {
/**
* @brief Check if fast-path, in-place sort is available for the given column
*
* @param column to check
* @return true if fast-path sort is available, false otherwise.
*/
static bool is_usable(column_view const& column)
{
return !column.has_nulls() && cudf::is_fixed_width(column.type()) &&
!cudf::is_floating_point(column.type());
}
/**
* @brief Check if fast-path, in-place sort is available for the given table
*
* @param table to check
* @return true if fast-path sort is available, false otherwise.
*/
static bool is_usable(table_view const& table)
{
return table.num_columns() == 1 && is_usable(table.column(0));
}

/**
* @brief Fast-path sort a column in place
*
* Precondition, is_usable(column) returned true
*
* @tparam T column data type.
* @param col Column to sort, modified in place.
* @param order Ascending or descending sort order.
* @param stream CUDA stream used for device memory operations and kernel launches
*
*/
template <typename T, std::enable_if_t<cudf::is_fixed_width<T>()>* = nullptr>
void operator()(mutable_column_view& col, order order, rmm::cuda_stream_view stream) const
{
auto const do_sort = [&](auto const cmp) {
if constexpr (method == sort_method::STABLE) {
thrust::stable_sort(rmm::exec_policy(stream), col.begin<T>(), col.end<T>(), cmp);
} else {
thrust::sort(rmm::exec_policy(stream), col.begin<T>(), col.end<T>(), cmp);
}
};
if (order == order::ASCENDING) {
do_sort(thrust::less<T>());
} else {
do_sort(thrust::greater<T>());
}
}

template <typename T, std::enable_if_t<!cudf::is_fixed_width<T>()>* = nullptr>
void operator()(mutable_column_view&, order, rmm::cuda_stream_view) const
{
CUDF_FAIL("Column type must be relationally comparable and fixed-width");
}
};

} // namespace detail
} // namespace cudf
11 changes: 5 additions & 6 deletions cpp/src/sort/segmented_sort_impl.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,6 +14,10 @@
* limitations under the License.
*/

#pragma once

#include "common_sort_impl.cuh"

#include <cudf/column/column_factories.hpp>
#include <cudf/detail/copy.hpp>
#include <cudf/detail/gather.hpp>
Expand All @@ -29,11 +33,6 @@
namespace cudf {
namespace detail {

/**
* @brief The enum specifying which sorting method to use (stable or unstable).
*/
enum class sort_method { STABLE, UNSTABLE };

/**
* @brief Functor performs faster segmented sort on eligible columns
*/
Expand Down
40 changes: 9 additions & 31 deletions cpp/src/sort/sort.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,6 +14,7 @@
* limitations under the License.
*/

#include "common_sort_impl.cuh"
#include "sort_impl.cuh"

#include <cudf/column/column.hpp>
Expand All @@ -37,7 +38,7 @@ std::unique_ptr<column> sorted_order(table_view const& input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return sorted_order<false>(input, column_order, null_precedence, stream, mr);
return sorted_order<sort_method::UNSTABLE>(input, column_order, null_precedence, stream, mr);
}

std::unique_ptr<table> sort_by_key(table_view const& values,
Expand All @@ -61,47 +62,24 @@ std::unique_ptr<table> sort_by_key(table_view const& values,
mr);
}

struct inplace_column_sort_fn {
template <typename T, std::enable_if_t<cudf::is_fixed_width<T>()>* = nullptr>
void operator()(mutable_column_view& col, bool ascending, rmm::cuda_stream_view stream) const
{
CUDF_EXPECTS(!col.has_nulls(), "Nulls not supported for in-place sort");
if (ascending) {
thrust::sort(rmm::exec_policy(stream), col.begin<T>(), col.end<T>(), thrust::less<T>());
} else {
thrust::sort(rmm::exec_policy(stream), col.begin<T>(), col.end<T>(), thrust::greater<T>());
}
}

template <typename T, std::enable_if_t<!cudf::is_fixed_width<T>()>* = nullptr>
void operator()(mutable_column_view&, bool, rmm::cuda_stream_view) const
{
CUDF_FAIL("Column type must be relationally comparable and fixed-width");
}
};

std::unique_ptr<table> sort(table_view const& input,
std::vector<order> const& column_order,
std::vector<null_order> const& null_precedence,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
// fast-path sort conditions: single, non-floating-point, fixed-width column with no nulls
if (input.num_columns() == 1 && !input.column(0).has_nulls() &&
cudf::is_fixed_width(input.column(0).type()) &&
!cudf::is_floating_point(input.column(0).type())) {
auto output = std::make_unique<column>(input.column(0), stream, mr);
auto view = output->mutable_view();
bool ascending = (column_order.empty() ? true : column_order.front() == order::ASCENDING);
if (inplace_column_sort_fn<sort_method::UNSTABLE>::is_usable(input)) {
auto output = std::make_unique<column>(input.column(0), stream, mr);
auto view = output->mutable_view();
auto order = (column_order.empty() ? order::ASCENDING : column_order.front());
cudf::type_dispatcher<dispatch_storage_type>(
output->type(), inplace_column_sort_fn{}, view, ascending, stream);
output->type(), inplace_column_sort_fn<sort_method::UNSTABLE>{}, view, order, stream);
std::vector<std::unique_ptr<column>> columns;
columns.emplace_back(std::move(output));
return std::make_unique<table>(std::move(columns));
}
return detail::sort_by_key(
input, input, column_order, null_precedence, cudf::get_default_stream(), mr);
return detail::sort_by_key(input, input, column_order, null_precedence, stream, mr);
}

} // namespace detail
Expand Down
15 changes: 8 additions & 7 deletions cpp/src/sort/sort_column.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,6 +14,7 @@
* limitations under the License.
*/

#include "common_sort_impl.cuh"
#include "sort_column_impl.cuh"

#include <cudf/column/column_factories.hpp>
Expand All @@ -30,19 +31,19 @@ namespace detail {
* sorted_order(column_view&,order,null_order,rmm::cuda_stream_view,rmm::mr::device_memory_resource*)
*/
template <>
std::unique_ptr<column> sorted_order<false>(column_view const& input,
order column_order,
null_order null_precedence,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
std::unique_ptr<column> sorted_order<sort_method::UNSTABLE>(column_view const& input,
order column_order,
null_order null_precedence,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto sorted_indices = cudf::make_numeric_column(
data_type(type_to_id<size_type>()), input.size(), mask_state::UNALLOCATED, stream, mr);
mutable_column_view indices_view = sorted_indices->mutable_view();
thrust::sequence(
rmm::exec_policy(stream), indices_view.begin<size_type>(), indices_view.end<size_type>(), 0);
cudf::type_dispatcher<dispatch_storage_type>(input.type(),
column_sorted_order_fn<false>{},
column_sorted_order_fn<sort_method::UNSTABLE>{},
input,
indices_view,
column_order == order::ASCENDING,
Expand Down
Loading

0 comments on commit 6e54cd9

Please sign in to comment.