Skip to content

Commit

Permalink
Add Segmented sort (#7122)
Browse files Browse the repository at this point in the history
addresses part of #6541 Segment sort of lists

- [x] lists_column_view segmented_sort
- [x] numerical types (cub segmented sort limitation)
- [x] sort_lists(table_view)
- [x] unit tests

closes  #4603 Segmented sort
- [x] segmented_sort
- [x] unit tests.

Authors:
  - Karthikeyan (@karthikeyann)

Approvers:
  - AJ Schmidt (@ajschmidt8)
  - Keith Kraus (@kkraus14)
  - Jake Hemstad (@jrhemstad)
  - Conor Hoekstra (@codereport)

URL: #7122
  • Loading branch information
karthikeyann authored Feb 4, 2021
1 parent fd38b4c commit 369ec98
Show file tree
Hide file tree
Showing 11 changed files with 1,042 additions and 13 deletions.
3 changes: 2 additions & 1 deletion conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018-2020, NVIDIA CORPORATION.
# Copyright (c) 2018-2021, NVIDIA CORPORATION.

{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
Expand Down Expand Up @@ -128,6 +128,7 @@ test:
- test -f $PREFIX/include/cudf/lists/contains.hpp
- test -f $PREFIX/include/cudf/lists/gather.hpp
- test -f $PREFIX/include/cudf/lists/lists_column_view.hpp
- test -f $PREFIX/include/cudf/lists/sorting.hpp
- test -f $PREFIX/include/cudf/merge.hpp
- test -f $PREFIX/include/cudf/null_mask.hpp
- test -f $PREFIX/include/cudf/partitioning.hpp
Expand Down
29 changes: 28 additions & 1 deletion cpp/include/cudf/detail/sorting.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -63,5 +63,32 @@ std::unique_ptr<table> sort_by_key(
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::segmented_sorted_order
*
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> segmented_sorted_order(
table_view const& keys,
column_view const& segment_offsets,
std::vector<order> const& column_order = {},
std::vector<null_order> const& null_precedence = {},
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::segmented_sort_by_key
*
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<table> segmented_sort_by_key(
table_view const& values,
table_view const& keys,
column_view const& segment_offsets,
std::vector<order> const& column_order = {},
std::vector<null_order> const& null_precedence = {},
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

} // namespace detail
} // namespace cudf
25 changes: 24 additions & 1 deletion cpp/include/cudf/lists/list_device_view.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -190,4 +190,27 @@ class list_device_view {
};
};

/**
* @brief returns size of the list by row index
*
*/
struct list_size_functor {
column_device_view const d_column;
CUDA_HOST_DEVICE_CALLABLE list_size_functor(column_device_view const& d_col) : d_column(d_col)
{
#if defined(__CUDA_ARCH__)
release_assert(d_col.type().id() == type_id::LIST && "Only list type column is supported");
#else
CUDF_EXPECTS(d_col.type().id() == type_id::LIST, "Only list type column is supported");
#endif
}
CUDA_DEVICE_CALLABLE size_type operator()(size_type idx)
{
if (d_column.is_null(idx)) return size_type{0};
auto d_offsets =
d_column.child(lists_column_view::offsets_column_index).data<size_type>() + d_column.offset();
return d_offsets[idx + 1] - d_offsets[idx];
}
};

} // namespace cudf
59 changes: 59 additions & 0 deletions cpp/include/cudf/lists/sorting.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cudf/column/column.hpp>
#include <cudf/column/column_view.hpp>
#include <cudf/lists/lists_column_view.hpp>

namespace cudf {
namespace lists {
/**
* @addtogroup lists_sort
* @{
* @file
*/

/**
* @brief Segmented sort of the elements within a list in each row of a list column.
*
* `source_column` with depth 1 is only supported.
*
* * @code{.pseudo}
* source_column : [{4, 2, 3, 1}, {1, 2, NULL, 4}, {-10, 10, 0}]
*
* Ascending, Null After : [{1, 2, 3, 4}, {1, 2, 4, NULL}, {-10, 0, 10}]
* Ascending, Null Before : [{1, 2, 3, 4}, {NULL, 1, 2, 4}, {-10, 0, 10}]
* Descending, Null After : [{4, 3, 2, 1}, {NULL, 4, 2, 1}, {10, 0, -10}]
* Descending, Null Before : [{4, 3, 2, 1}, {4, 2, 1, NULL}, {10, 0, -10}]
* @endcode
*
* @param source_column View of the list column of numeric types to sort
* @param column_order The desired sort order
* @param null_precedence The desired order of null compared to other elements in the list
* @param mr Device memory resource to allocate any returned objects
* @return list column with elements in each list sorted.
*
*/
std::unique_ptr<column> sort_lists(
lists_column_view const& source_column,
order column_order,
null_order null_precedence,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group
} // namespace lists
} // namespace cudf
61 changes: 59 additions & 2 deletions cpp/include/cudf/sorting.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -128,7 +128,7 @@ std::unique_ptr<table> sort(
* @param values The table to reorder
* @param keys The table that determines the ordering
* @param column_order The desired order for each column in `keys`. Size must be
* equal to `input.num_columns()` or empty. If empty, all columns are sorted in
* equal to `keys.num_columns()` or empty. If empty, all columns are sorted in
* ascending order.
* @param null_precedence The desired order of a null element compared to other
* elements for each column in `keys`. Size must be equal to
Expand Down Expand Up @@ -184,5 +184,62 @@ std::unique_ptr<column> rank(
bool percentage,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Returns sorted order after sorting each segment in the table.
*
* If segment_offsets contains values larger than number of rows, behaviour is undefined.
* @throws cudf::logic_error if `segment_offsets` is not `size_type` column.
*
* @param keys The table that determines the ordering of elements in each segment
* @param segment_offsets The column of `size_type` type containing start offset index for each
* contiguous segment.
* @param column_order The desired order for each column in `keys`. Size must be
* equal to `keys.num_columns()` or empty. If empty, all columns are sorted in
* ascending order.
* @param null_precedence The desired order of a null element compared to other
* elements for each column in `keys`. Size must be equal to
* `keys.num_columns()` or empty. If empty, all columns will be sorted with
* `null_order::BEFORE`.
* @param mr Device memory resource to allocate any returned objects
* @return sorted order of the segment sorted table .
*
*/
std::unique_ptr<column> segmented_sorted_order(
table_view const& keys,
column_view const& segment_offsets,
std::vector<order> const& column_order = {},
std::vector<null_order> const& null_precedence = {},
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Performs a lexicographic segmented sort of a table
*
* If segment_offsets contains values larger than number of rows, behaviour is undefined.
* @throws cudf::logic_error if `values.num_rows() != keys.num_rows()`.
* @throws cudf::logic_error if `segment_offsets` is not `size_type` column.
*
* @param values The table to reorder
* @param keys The table that determines the ordering of elements in each segment
* @param segment_offsets The column of `size_type` type containing start offset index for each
* contiguous segment.
* @param column_order The desired order for each column in `keys`. Size must be
* equal to `keys.num_columns()` or empty. If empty, all columns are sorted in
* ascending order.
* @param null_precedence The desired order of a null element compared to other
* elements for each column in `keys`. Size must be equal to
* `keys.num_columns()` or empty. If empty, all columns will be sorted with
* `null_order::BEFORE`.
* @param mr Device memory resource to allocate any returned objects
* @return table with elements in each segment sorted.
*
*/
std::unique_ptr<table> segmented_sort_by_key(
table_view const& values,
table_view const& keys,
column_view const& segment_offsets,
std::vector<order> const& column_order = {},
std::vector<null_order> const& null_precedence = {},
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group
} // namespace cudf
9 changes: 2 additions & 7 deletions cpp/src/lists/count_elements.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/lists/count_elements.hpp>
#include <cudf/lists/list_device_view.cuh>
#include <cudf/lists/lists_column_view.hpp>
#include <cudf/utilities/error.hpp>

Expand Down Expand Up @@ -60,13 +61,7 @@ std::unique_ptr<column> count_elements(lists_column_view const& input,
thrust::make_counting_iterator<cudf::size_type>(0),
thrust::make_counting_iterator<cudf::size_type>(input.size()),
output->mutable_view().begin<size_type>(),
[d_column] __device__(size_type idx) {
if (d_column.is_null(idx)) return size_type{0};
auto d_offsets =
d_column.child(lists_column_view::offsets_column_index).data<size_type>() +
d_column.offset();
return d_offsets[idx + 1] - d_offsets[idx];
});
list_size_functor{d_column});

output->set_null_count(input.null_count()); // reset null count
return output;
Expand Down
Loading

0 comments on commit 369ec98

Please sign in to comment.