From b2464b1174b14859106ec429d763193ab78d9efe Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Tue, 12 Jan 2021 16:33:41 +0530 Subject: [PATCH 01/32] add segmented sort for lists_column_view --- cpp/include/cudf/lists/sorting.hpp | 59 +++++++ cpp/src/lists/segmented_sort.cu | 250 +++++++++++++++++++++++++++++ 2 files changed, 309 insertions(+) create mode 100644 cpp/include/cudf/lists/sorting.hpp create mode 100644 cpp/src/lists/segmented_sort.cu diff --git a/cpp/include/cudf/lists/sorting.hpp b/cpp/include/cudf/lists/sorting.hpp new file mode 100644 index 00000000000..614d8966f2c --- /dev/null +++ b/cpp/include/cudf/lists/sorting.hpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +namespace cudf { +namespace lists { +/** + * @addtogroup lists_sort + * @{ + * @file + */ + +/** + * @brief Segmented sort of the elements within a list in each row of a list column. + * + * `source_column` with depth 1 is only supported. This uses segmented radix sort. + * + * * @code{.pseudo} + * source_column : [{4, 2, 3, 1}, {1, 2, NULL, 4}, {-10, 10, 0}] + * + * Ascending, Null After : [{1, 2, 3, 4}, {1, 2, 4, NULL}, {-10, 0, 10}] + * Ascending, Null Before : [{1, 2, 3, 4}, {NULL, 1, 2, 4}, {-10, 0, 10}] + * Descending, Null After : [{4, 3, 2, 1}, {4, 2, 1, NULL}, {10, 0, -10}] + * Descending, Null Before : [{4, 3, 2, 1}, {NULL, 4, 2, 1}, {10, 0, -10}] + * @endcode + * + * @param source_column View into the list column of numeric types to gather from + * @param column_order The desired sort order + * @param null_precedence The desired order of null compared to other elements in the list + * @param mr Device memory resource to allocate any returned objects + * @return list column with elements in each list sorted. + * + */ +std::unique_ptr segmented_sort( + lists_column_view const& source_column, + order column_order, + null_order null_precedence, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** @} */ // end of group +} // namespace lists +} // namespace cudf diff --git a/cpp/src/lists/segmented_sort.cu b/cpp/src/lists/segmented_sort.cu new file mode 100644 index 00000000000..089471ef891 --- /dev/null +++ b/cpp/src/lists/segmented_sort.cu @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace cudf { +namespace lists { +namespace detail { + +struct SortPairs { + template + void SortPairsAscending(KeyT const* keys_in, + KeyT* keys_out, + ValueT const* values_in, + ValueT* values_out, + int num_items, + int num_segments, + OffsetIteratorT begin_offsets, + OffsetIteratorT end_offsets, + rmm::cuda_stream_view stream) + { + rmm::device_buffer d_temp_storage; + size_t temp_storage_bytes = 0; + cub::DeviceSegmentedRadixSort::SortPairs(d_temp_storage.data(), + temp_storage_bytes, + keys_in, + keys_out, + values_in, + values_out, + num_items, + num_segments, + begin_offsets, + end_offsets, + 0, + sizeof(KeyT) * 8, + stream.value()); + d_temp_storage = rmm::device_buffer{temp_storage_bytes, stream}; + + cub::DeviceSegmentedRadixSort::SortPairs(d_temp_storage.data(), + temp_storage_bytes, + keys_in, + keys_out, + values_in, + values_out, + num_items, + num_segments, + begin_offsets, + end_offsets, + 0, + sizeof(KeyT) * 8, + stream.value()); + } + + template + void SortPairsDescending(KeyT const* keys_in, + KeyT* keys_out, + ValueT const* values_in, + ValueT* values_out, + int num_items, + int num_segments, + OffsetIteratorT begin_offsets, + OffsetIteratorT end_offsets, + rmm::cuda_stream_view stream) + { + rmm::device_buffer d_temp_storage; + size_t temp_storage_bytes = 0; + cub::DeviceSegmentedRadixSort::SortPairsDescending(d_temp_storage.data(), + temp_storage_bytes, + keys_in, + keys_out, + values_in, + values_out, + num_items, + num_segments, + begin_offsets, + end_offsets, + 0, + sizeof(KeyT) * 8, + stream.value()); + d_temp_storage = rmm::device_buffer{temp_storage_bytes, stream}; + + cub::DeviceSegmentedRadixSort::SortPairsDescending(d_temp_storage.data(), + temp_storage_bytes, + keys_in, + keys_out, + values_in, + values_out, + num_items, + num_segments, + begin_offsets, + end_offsets, + 0, + sizeof(KeyT) * 8, + stream.value()); + } + template + std::enable_if_t(), std::unique_ptr> operator()( + column_view const& child, + column_view const& offsets, + order column_order, + null_order null_precedence, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + CUDF_FAIL("segmented sort is not supported for non-numeric list types"); + } + template + std::enable_if_t(), std::unique_ptr> operator()( + column_view const& child, + column_view const& offsets, + order column_order, + null_order null_precedence, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + auto output = + cudf::detail::allocate_like(child, child.size(), mask_allocation_policy::NEVER, stream, mr); + mutable_column_view mutable_output_view = output->mutable_view(); + + auto keys = [&]() { + if (child.nullable()) { + rmm::device_uvector keys(child.size(), stream); + auto null_replace_T = + column_order == order::ASCENDING + ? (null_precedence == null_order::AFTER ? std::numeric_limits::max() + : std::numeric_limits::min()) + : (null_precedence == null_order::BEFORE ? std::numeric_limits::max() + : std::numeric_limits::min()); + auto device_child = column_device_view::create(child, stream); + auto keys_in = + cudf::detail::make_null_replacement_iterator(*device_child, null_replace_T); + thrust::copy_n(rmm::exec_policy(stream), keys_in, child.size(), keys.begin()); + return keys; + } + return rmm::device_uvector{0, stream}; + }(); + + std::unique_ptr sorted_indices = cudf::make_numeric_column( + data_type(type_to_id()), child.size(), mask_state::UNALLOCATED, stream, mr); + mutable_column_view mutable_indices_view = sorted_indices->mutable_view(); + thrust::sequence(rmm::exec_policy(stream), + mutable_indices_view.begin(), + mutable_indices_view.end(), + 0); + + if (column_order == order::ASCENDING) + SortPairsAscending(child.nullable() ? keys.data() : child.begin(), + mutable_output_view.begin(), + mutable_indices_view.begin(), + mutable_indices_view.begin(), + child.size(), + offsets.size() - 1, + offsets.begin(), + offsets.begin() + 1, + stream); + else + SortPairsDescending(child.nullable() ? keys.data() : child.begin(), + mutable_output_view.begin(), + mutable_indices_view.begin(), + mutable_indices_view.begin(), + child.size(), + offsets.size() - 1, + offsets.begin(), + offsets.begin() + 1, + stream); + std::vector> output_cols; + output_cols.push_back(std::move(output)); + // rearrange the null_mask. + cudf::detail::gather_bitmask(cudf::table_view{{child}}, + mutable_indices_view.begin(), + output_cols, + cudf::detail::gather_bitmask_op::DONT_CHECK, + stream, + mr); + return std::move(output_cols.front()); + } +}; + +std::unique_ptr segmented_sort(lists_column_view const& input, + order column_order, + null_order null_precedence, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + if (input.size() == 0) return {}; + + auto output_child = type_dispatcher(input.child().type(), + SortPairs{}, + input.get_sliced_child(stream), + input.offsets(), + column_order, + null_precedence, + stream, + mr); + + // Copy list offsets. + auto output_offset = std::make_unique(input.offsets(), stream, mr); + auto null_mask = cudf::detail::copy_bitmask(input.parent(), stream, mr); + + // Assemble list column & return + return make_lists_column(input.size(), + std::move(output_offset), + std::move(output_child), + input.null_count(), + std::move(null_mask)); +} +} // namespace detail + +std::unique_ptr segmented_sort(lists_column_view const& input, + order column_order, + null_order null_precedence, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::segmented_sort(input, column_order, null_precedence, rmm::cuda_stream_default, mr); +} + +} // namespace lists +} // namespace cudf From e7e2e4fa24824c64c64d143a382f07a233d11fb8 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Tue, 12 Jan 2021 16:34:17 +0530 Subject: [PATCH 02/32] add unit test for segmented sort for lists_column_view --- cpp/tests/CMakeLists.txt | 3 +- cpp/tests/lists/segmented_sort_tests.cpp | 109 +++++++++++++++++++++++ 2 files changed, 111 insertions(+), 1 deletion(-) create mode 100644 cpp/tests/lists/segmented_sort_tests.cpp diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 0d958f47b6b..4c20e9084d7 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -653,7 +653,8 @@ ConfigureTest(AST_TEST "${AST_TEST_SRC}") # - lists tests ---------------------------------------------------------------------------------- set(LISTS_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/lists/extract_tests.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/lists/extract_tests.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/lists/segmented_sort_tests.cpp") ConfigureTest(LISTS_TEST "${LISTS_TEST_SRC}") diff --git a/cpp/tests/lists/segmented_sort_tests.cpp b/cpp/tests/lists/segmented_sort_tests.cpp new file mode 100644 index 00000000000..016311bc489 --- /dev/null +++ b/cpp/tests/lists/segmented_sort_tests.cpp @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +template +using LCW = cudf::test::lists_column_wrapper; +using cudf::lists_column_view; +using cudf::lists::segmented_sort; + +namespace cudf { +namespace test { + +template +struct SegmentedSort : public BaseFixture { +}; + +// using NumericTypesNotBool = Concat; +TYPED_TEST_CASE(SegmentedSort, NumericTypes); + +TYPED_TEST(SegmentedSort, NoNull) +{ + using T = TypeParam; + + // List + LCW list{{3, 2, 1, 4}, {5}, {10, 8, 9}, {6, 7}}; + + // Ascending + // LCW order{{2, 1, 0, 3}, {0}, {1, 2, 0}, {0, 1}}; + LCW expected{{1, 2, 3, 4}, {5}, {8, 9, 10}, {6, 7}}; + auto results = segmented_sort(lists_column_view{list}, order::ASCENDING, null_order::AFTER); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + + results = segmented_sort(lists_column_view{list}, order::ASCENDING, null_order::BEFORE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + + // Descending + // LCW order{{3, 0, 1, 2}, {0}, {0, 1, 2}, {1, 0}}; + LCW expected2{{4, 3, 2, 1}, {5}, {10, 9, 8}, {7, 6}}; + results = segmented_sort(lists_column_view{list}, order::DESCENDING, null_order::AFTER); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected2); + + results = segmented_sort(lists_column_view{list}, order::DESCENDING, null_order::BEFORE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected2); +} + +TYPED_TEST(SegmentedSort, Null) +{ + using T = TypeParam; + if (std::is_same::value) return; + std::vector valids_o{1, 1, 0, 1}; + std::vector valids_a{1, 1, 1, 0}; + std::vector valids_b{0, 1, 1, 1}; + + // List + LCW list{{{3, 2, 4, 1}, valids_o.begin()}, {5}, {10, 8, 9}, {6, 7}}; + // LCW order{{2, 1, 3, 0}, {0}, {1, 2, 0}, {0, 1}}; + LCW expected1{{{1, 2, 3, 4}, valids_a.begin()}, {5}, {8, 9, 10}, {6, 7}}; + LCW expected2{{{4, 1, 2, 3}, valids_b.begin()}, {5}, {8, 9, 10}, {6, 7}}; + auto results = segmented_sort(lists_column_view{list}, order::ASCENDING, null_order::AFTER); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected1); + + results = segmented_sort(lists_column_view{list}, order::ASCENDING, null_order::BEFORE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected2); + + // Descending + // LCW order{{3, 0, 1, 2}, {0}, {0, 1, 2}, {1, 0}}; + LCW expected3{{{3, 2, 1, 4}, valids_a.begin()}, {5}, {10, 9, 8}, {7, 6}}; + LCW expected4{{{4, 3, 2, 1}, valids_b.begin()}, {5}, {10, 9, 8}, {7, 6}}; + results = segmented_sort(lists_column_view{list}, order::DESCENDING, null_order::AFTER); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected3); + + results = segmented_sort(lists_column_view{list}, order::DESCENDING, null_order::BEFORE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected4); +} + +} // namespace test +} // namespace cudf From f16f9243af2f2040a69b2b6438d6196980a21a4d Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Mon, 25 Jan 2021 14:04:29 +0530 Subject: [PATCH 03/32] add segmented_sort(table_view) --- cpp/include/cudf/detail/sorting.hpp | 23 ++++ cpp/include/cudf/sorting.hpp | 6 + cpp/src/sort/segmented_sort.cu | 154 +++++++++++++++++++++++ cpp/tests/CMakeLists.txt | 1 + cpp/tests/lists/segmented_sort_tests.cpp | 1 - 5 files changed, 184 insertions(+), 1 deletion(-) create mode 100644 cpp/src/sort/segmented_sort.cu diff --git a/cpp/include/cudf/detail/sorting.hpp b/cpp/include/cudf/detail/sorting.hpp index 0ac20ed3c94..16301eaae90 100644 --- a/cpp/include/cudf/detail/sorting.hpp +++ b/cpp/include/cudf/detail/sorting.hpp @@ -63,5 +63,28 @@ std::unique_ptr sort_by_key( rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr segmented_sorted_order( + table_view const& values, + table_view const& keys, + std::vector const& column_order = {}, + std::vector const& null_precedence = {}, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +std::unique_ptr
segmented_sort_by_key( + table_view const& values, + table_view const& keys, + std::vector const& column_order = {}, + std::vector const& null_precedence = {}, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +std::unique_ptr
segmented_sort( + table_view input, + std::vector const& column_order = {}, + std::vector const& null_precedence = {}, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp index 1116b49c892..0b15c1530c1 100644 --- a/cpp/include/cudf/sorting.hpp +++ b/cpp/include/cudf/sorting.hpp @@ -184,5 +184,11 @@ std::unique_ptr rank( bool percentage, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
segmented_sort( + table_view input, + std::vector const& column_order = {}, + std::vector const& null_precedence = {}, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** @} */ // end of group } // namespace cudf diff --git a/cpp/src/sort/segmented_sort.cu b/cpp/src/sort/segmented_sort.cu new file mode 100644 index 00000000000..5e2c306c66b --- /dev/null +++ b/cpp/src/sort/segmented_sort.cu @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace cudf { +namespace detail { + +// returns row index for each element of the row in list column. +rmm::device_uvector get_list_segment_indices(lists_column_view const& lc, + rmm::cuda_stream_view stream) +{ + auto sliced_child = lc.get_sliced_child(stream); + rmm::device_uvector segment_ids(sliced_child.size(), stream); + + auto offsets = lc.offsets().begin() + lc.offset(); + auto offsets_minus_one = thrust::make_transform_iterator( + offsets, [offsets] __device__(auto i) { return i - offsets[0] - 1; }); + auto counting_iter = thrust::make_counting_iterator(0); + thrust::lower_bound(rmm::exec_policy(stream), + offsets_minus_one + 1, + offsets_minus_one + lc.size() + 1, + counting_iter, + counting_iter + segment_ids.size(), + segment_ids.begin()); + return std::move(segment_ids); +} + +std::unique_ptr segmented_sorted_order(table_view const& values, + table_view const& keys, + std::vector const& column_order, + std::vector const& null_precedence, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_EXPECTS(std::all_of(keys.begin(), + keys.end(), + [](column_view const& col) { return col.type().id() == type_id::LIST; }), + "segmented_sort only supports lists columns"); + CUDF_EXPECTS(std::all_of(values.begin(), + values.end(), + [](column_view const& col) { return col.type().id() == type_id::LIST; }), + "segmented_sort only supports lists columns"); + // TODO check if all list sizes are equal. OR all offsets are equal (may be wrong). + + auto segment_ids = get_list_segment_indices(lists_column_view{keys.column(0)}, stream); + // insert segment id before all child columns. + std::vector child_key_columns(keys.num_columns() + 1); + child_key_columns[0] = + column_view(data_type(type_to_id()), segment_ids.size(), segment_ids.data()); + std::transform(keys.begin(), keys.end(), child_key_columns.begin() + 1, [stream](auto col) { + return lists_column_view(col).get_sliced_child(stream); + }); + auto child_keys = table_view(child_key_columns); + + std::vector child_column_order(column_order); + if (not column_order.empty()) + child_column_order.insert(child_column_order.begin(), order::ASCENDING); + std::vector child_null_precedence(null_precedence); + if (not null_precedence.empty()) + child_null_precedence.insert(child_null_precedence.begin(), null_order::AFTER); + + // create table_view of child columns + return detail::sorted_order(child_keys, child_column_order, child_null_precedence, stream, mr); +} + +std::unique_ptr
segmented_sort_by_key(table_view const& values, + table_view const& keys, + std::vector const& column_order, + std::vector const& null_precedence, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto sorted_order = segmented_sorted_order(values, keys, column_order, null_precedence, stream); + std::vector child_columns(values.num_columns()); + std::transform(values.begin(), values.end(), child_columns.begin(), [stream](auto col) { + return lists_column_view(col).get_sliced_child(stream); + }); + // return std::unique_ptr
(new table{table_view{std::vector{*sorted_order}}}); + // TODO build the list columns from returned table! and packit into table! + auto child_result = detail::gather(table_view{child_columns}, + sorted_order->view(), + out_of_bounds_policy::DONT_CHECK, + detail::negative_index_policy::NOT_ALLOWED, + stream, + mr) + ->release(); + + std::vector> list_columns; + std::transform( // thrust::host, + values.begin(), + values.end(), + std::make_move_iterator(child_result.begin()), + std::back_inserter(list_columns), + [&stream, &mr](auto& input_list, auto&& sorted_child) { + auto output_offset = + std::make_unique(lists_column_view(input_list).offsets(), stream, mr); + auto null_mask = cudf::detail::copy_bitmask(input_list, stream, mr); + // Assemble list column & return + return make_lists_column(input_list.size(), + std::move(output_offset), + std::move(sorted_child), + input_list.null_count(), + std::move(null_mask)); + }); + return std::make_unique
(std::move(list_columns)); + // TODO write tests and verify */ +} +} // namespace detail + +std::unique_ptr
segmented_sort(table_view input, + std::vector const& column_order, + std::vector const& null_precedence, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::segmented_sort_by_key( + input, input, column_order, null_precedence, rmm::cuda_stream_default, mr); +} + +} // namespace cudf diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 4c20e9084d7..f35ca4eae17 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -363,6 +363,7 @@ ConfigureTest(JSON_TEST "${JSON_TEST_SRC}") # - sort tests ------------------------------------------------------------------------------------ set(SORT_TEST_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/sort/segmented_sort_tests.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_test.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/sort/rank_test.cpp") diff --git a/cpp/tests/lists/segmented_sort_tests.cpp b/cpp/tests/lists/segmented_sort_tests.cpp index 016311bc489..6b9cda03ada 100644 --- a/cpp/tests/lists/segmented_sort_tests.cpp +++ b/cpp/tests/lists/segmented_sort_tests.cpp @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include From 44ff02a94df9116dd7b51947bcd0f519230ba75b Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Mon, 25 Jan 2021 14:05:28 +0530 Subject: [PATCH 04/32] add unit test segmented_sort(table_view) all valid --- cpp/tests/sort/segmented_sort_tests.cpp | 89 +++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 cpp/tests/sort/segmented_sort_tests.cpp diff --git a/cpp/tests/sort/segmented_sort_tests.cpp b/cpp/tests/sort/segmented_sort_tests.cpp new file mode 100644 index 00000000000..c72abd1edad --- /dev/null +++ b/cpp/tests/sort/segmented_sort_tests.cpp @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +template +using LCW = cudf::test::lists_column_wrapper; +using cudf::lists_column_view; +using cudf::segmented_sort; + +namespace cudf { +namespace test { + +template +struct SegmentedSort : public BaseFixture { +}; + +// using NumericTypesNotBool = Concat; +TYPED_TEST_CASE(SegmentedSort, NumericTypes); + +TYPED_TEST(SegmentedSort, NoNull) +{ + using T = TypeParam; + + // List + LCW list1{{3, 2, 1, 4, 4, 4}, {5}, {9, 8, 9}, {6, 7}}; + LCW list2{{3, 1, 2, 3, 1, 2}, {0}, {10, 9, 9}, {6, 7}}; + table_view input{{list1, list2}}; + + // Ascending + // LCW order{{2, 1, 0, 4, 5, 3}, {0}, {1, 2, 0}, {0, 1}}; + LCW expected1{{1, 2, 3, 4, 4, 4}, {5}, {8, 9, 9}, {6, 7}}; + LCW expected2{{2, 1, 3, 1, 2, 3}, {0}, {9, 9, 10}, {6, 7}}; + table_view expected_table1{{expected1, expected2}}; + auto results = segmented_sort( + input, {order::ASCENDING, order::ASCENDING}, {null_order::AFTER, null_order::AFTER}); + CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table1); + + results = segmented_sort( + input, {order::ASCENDING, order::ASCENDING}, {null_order::BEFORE, null_order::BEFORE}); + CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table1); + + // Descending + // LCW order{{3, 5, 4, 0, 1, 2}, {0}, {0, 2, 1}, {1, 0}}; + LCW expected3{{4, 4, 4, 3, 2, 1}, {5}, {9, 9, 8}, {7, 6}}; + LCW expected4{{3, 2, 1, 3, 1, 2}, {0}, {10, 9, 9}, {7, 6}}; + table_view expected_table2{{expected3, expected4}}; + results = segmented_sort( + input, {order::DESCENDING, order::DESCENDING}, {null_order::AFTER, null_order::AFTER}); + CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table2); + + results = segmented_sort( + input, {order::DESCENDING, order::DESCENDING}, {null_order::BEFORE, null_order::BEFORE}); + CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table2); +} + +} // namespace test +} // namespace cudf From 4d5fbbfff7f5c3d59160f6b5b5243c9a8c721a07 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Tue, 26 Jan 2021 01:14:20 +0530 Subject: [PATCH 05/32] fix interfaces --- cpp/include/cudf/detail/sorting.hpp | 3 +-- cpp/include/cudf/sorting.hpp | 2 +- cpp/src/sort/segmented_sort.cu | 15 +++++++-------- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/cpp/include/cudf/detail/sorting.hpp b/cpp/include/cudf/detail/sorting.hpp index 16301eaae90..e36eb827c03 100644 --- a/cpp/include/cudf/detail/sorting.hpp +++ b/cpp/include/cudf/detail/sorting.hpp @@ -64,7 +64,6 @@ std::unique_ptr
sort_by_key( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); std::unique_ptr segmented_sorted_order( - table_view const& values, table_view const& keys, std::vector const& column_order = {}, std::vector const& null_precedence = {}, @@ -80,7 +79,7 @@ std::unique_ptr
segmented_sort_by_key( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); std::unique_ptr
segmented_sort( - table_view input, + table_view const& input, std::vector const& column_order = {}, std::vector const& null_precedence = {}, rmm::cuda_stream_view stream = rmm::cuda_stream_default, diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp index 0b15c1530c1..1f8bbeb4fa6 100644 --- a/cpp/include/cudf/sorting.hpp +++ b/cpp/include/cudf/sorting.hpp @@ -185,7 +185,7 @@ std::unique_ptr rank( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); std::unique_ptr
segmented_sort( - table_view input, + table_view const& input, std::vector const& column_order = {}, std::vector const& null_precedence = {}, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/src/sort/segmented_sort.cu b/cpp/src/sort/segmented_sort.cu index 5e2c306c66b..ebb8ef14c1f 100644 --- a/cpp/src/sort/segmented_sort.cu +++ b/cpp/src/sort/segmented_sort.cu @@ -59,8 +59,7 @@ rmm::device_uvector get_list_segment_indices(lists_column_view const& return std::move(segment_ids); } -std::unique_ptr segmented_sorted_order(table_view const& values, - table_view const& keys, +std::unique_ptr segmented_sorted_order(table_view const& keys, std::vector const& column_order, std::vector const& null_precedence, rmm::cuda_stream_view stream, @@ -70,10 +69,6 @@ std::unique_ptr segmented_sorted_order(table_view const& values, keys.end(), [](column_view const& col) { return col.type().id() == type_id::LIST; }), "segmented_sort only supports lists columns"); - CUDF_EXPECTS(std::all_of(values.begin(), - values.end(), - [](column_view const& col) { return col.type().id() == type_id::LIST; }), - "segmented_sort only supports lists columns"); // TODO check if all list sizes are equal. OR all offsets are equal (may be wrong). auto segment_ids = get_list_segment_indices(lists_column_view{keys.column(0)}, stream); @@ -104,7 +99,11 @@ std::unique_ptr
segmented_sort_by_key(table_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto sorted_order = segmented_sorted_order(values, keys, column_order, null_precedence, stream); + CUDF_EXPECTS(std::all_of(values.begin(), + values.end(), + [](column_view const& col) { return col.type().id() == type_id::LIST; }), + "segmented_sort only supports lists columns"); + auto sorted_order = segmented_sorted_order(keys, column_order, null_precedence, stream); std::vector child_columns(values.num_columns()); std::transform(values.begin(), values.end(), child_columns.begin(), [stream](auto col) { return lists_column_view(col).get_sliced_child(stream); @@ -141,7 +140,7 @@ std::unique_ptr
segmented_sort_by_key(table_view const& values, } } // namespace detail -std::unique_ptr
segmented_sort(table_view input, +std::unique_ptr
segmented_sort(table_view const& input, std::vector const& column_order, std::vector const& null_precedence, rmm::mr::device_memory_resource* mr) From edfa549c7036356d4c9b364d8092df04ec3ddf17 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Tue, 26 Jan 2021 01:15:01 +0530 Subject: [PATCH 06/32] add null list column test --- cpp/tests/sort/segmented_sort_tests.cpp | 64 +++++++++++++++++++++---- 1 file changed, 54 insertions(+), 10 deletions(-) diff --git a/cpp/tests/sort/segmented_sort_tests.cpp b/cpp/tests/sort/segmented_sort_tests.cpp index c72abd1edad..04ab7bb39e4 100644 --- a/cpp/tests/sort/segmented_sort_tests.cpp +++ b/cpp/tests/sort/segmented_sort_tests.cpp @@ -21,9 +21,8 @@ #include #include -#include +//#include #include -#include #include #include #include @@ -32,12 +31,8 @@ #include #include -#include - template using LCW = cudf::test::lists_column_wrapper; -using cudf::lists_column_view; -using cudf::segmented_sort; namespace cudf { namespace test { @@ -63,11 +58,11 @@ TYPED_TEST(SegmentedSort, NoNull) LCW expected1{{1, 2, 3, 4, 4, 4}, {5}, {8, 9, 9}, {6, 7}}; LCW expected2{{2, 1, 3, 1, 2, 3}, {0}, {9, 9, 10}, {6, 7}}; table_view expected_table1{{expected1, expected2}}; - auto results = segmented_sort( + auto results = cudf::segmented_sort( input, {order::ASCENDING, order::ASCENDING}, {null_order::AFTER, null_order::AFTER}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table1); - results = segmented_sort( + results = cudf::segmented_sort( input, {order::ASCENDING, order::ASCENDING}, {null_order::BEFORE, null_order::BEFORE}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table1); @@ -76,14 +71,63 @@ TYPED_TEST(SegmentedSort, NoNull) LCW expected3{{4, 4, 4, 3, 2, 1}, {5}, {9, 9, 8}, {7, 6}}; LCW expected4{{3, 2, 1, 3, 1, 2}, {0}, {10, 9, 9}, {7, 6}}; table_view expected_table2{{expected3, expected4}}; - results = segmented_sort( + results = cudf::segmented_sort( input, {order::DESCENDING, order::DESCENDING}, {null_order::AFTER, null_order::AFTER}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table2); - results = segmented_sort( + results = cudf::segmented_sort( input, {order::DESCENDING, order::DESCENDING}, {null_order::BEFORE, null_order::BEFORE}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table2); } +TYPED_TEST(SegmentedSort, Nulls) +{ + using T = TypeParam; + if (std::is_same::value) return; + + // List + std::vector valids1{1, 1, 1, 0, 1, 1}; + std::vector valids1a{1, 1, 1, 1, 1, 0}; + std::vector valids2{1, 1, 0}; + std::vector valids2b{1, 0, 1}; + LCW list1{{{3, 2, 1, 4, 4, 4}, valids1.begin()}, {5}, {9, 8, 9}, {6, 7}}; + LCW list2{{3, 1, 2, 2, 1, 3}, {0}, {{10, 9, 9}, valids2.begin()}, {6, 7}}; + table_view input{{list1, list2}}; + // nulls = (4-NULL, 2), (9,9-NULL) + // (8,9), (9,10), (9,N) + + // Ascending + // LCW order{{2, 1, 0, 4, 5, 3}, {0}, {1, 0, 2}, {0, 1}}; + LCW expected1a{{{1, 2, 3, 4, 4, 4}, valids1a.begin()}, {5}, {8, 9, 9}, {6, 7}}; + LCW expected2a{{2, 1, 3, 1, 3, 2}, {0}, {{9, 10, 9}, valids2.begin()}, {6, 7}}; + table_view expected_table1a{{expected1a, expected2a}}; + auto results = cudf::segmented_sort( + input, {order::ASCENDING, order::ASCENDING}, {null_order::AFTER, null_order::AFTER}); + CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table1a); + + // LCW order{{3, 2, 1, 0, 4, 5}, {0}, {2, 1, 0}, {0, 1}}; + LCW expected1b{{{4, 1, 2, 3, 4, 4}, valids1a.rbegin()}, {5}, {8, 9, 9}, {6, 7}}; + LCW expected2b{{2, 2, 1, 3, 1, 3}, {0}, {{9, 9, 10}, valids2b.begin()}, {6, 7}}; + table_view expected_table1b{{expected1b, expected2b}}; + results = cudf::segmented_sort( + input, {order::ASCENDING, order::ASCENDING}, {null_order::BEFORE, null_order::BEFORE}); + CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table1b); + + // Descending + LCW expected3a{{{4, 4, 4, 3, 2, 1}, valids1a.rbegin()}, {5}, {9, 9, 8}, {7, 6}}; + LCW expected4a{{2, 3, 1, 3, 1, 2}, {0}, {{9, 10, 9}, valids2.rbegin()}, {7, 6}}; + table_view expected_table2a{{expected3a, expected4a}}; + results = cudf::segmented_sort( + input, {order::DESCENDING, order::DESCENDING}, {null_order::AFTER, null_order::AFTER}); + CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table2a); + + LCW expected3b{{{4, 4, 3, 2, 1, 4}, valids1a.begin()}, {5}, {9, 9, 8}, {7, 6}}; + LCW expected4b{{3, 1, 3, 1, 2, 2}, {0}, {{10, 9, 9}, valids2b.begin()}, {7, 6}}; + table_view expected_table2b{{expected3b, expected4b}}; + results = cudf::segmented_sort( + input, {order::DESCENDING, order::DESCENDING}, {null_order::BEFORE, null_order::BEFORE}); + CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table2b); +} + } // namespace test } // namespace cudf From 54c0c5ec7f59a512d7a3c49274412aeae74bf9d7 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Wed, 27 Jan 2021 23:57:17 +0530 Subject: [PATCH 07/32] fix null_order example, code, tests for sort_lists segmented sort --- cpp/include/cudf/lists/sorting.hpp | 8 ++++---- cpp/src/lists/segmented_sort.cu | 8 ++------ cpp/tests/lists/segmented_sort_tests.cpp | 4 ++-- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/cpp/include/cudf/lists/sorting.hpp b/cpp/include/cudf/lists/sorting.hpp index 614d8966f2c..eda68fb07a6 100644 --- a/cpp/include/cudf/lists/sorting.hpp +++ b/cpp/include/cudf/lists/sorting.hpp @@ -30,18 +30,18 @@ namespace lists { /** * @brief Segmented sort of the elements within a list in each row of a list column. * - * `source_column` with depth 1 is only supported. This uses segmented radix sort. + * `source_column` with depth 1 is only supported. * * * @code{.pseudo} * source_column : [{4, 2, 3, 1}, {1, 2, NULL, 4}, {-10, 10, 0}] * * Ascending, Null After : [{1, 2, 3, 4}, {1, 2, 4, NULL}, {-10, 0, 10}] * Ascending, Null Before : [{1, 2, 3, 4}, {NULL, 1, 2, 4}, {-10, 0, 10}] - * Descending, Null After : [{4, 3, 2, 1}, {4, 2, 1, NULL}, {10, 0, -10}] - * Descending, Null Before : [{4, 3, 2, 1}, {NULL, 4, 2, 1}, {10, 0, -10}] + * Descending, Null After : [{4, 3, 2, 1}, {NULL, 4, 2, 1}, {10, 0, -10}] + * Descending, Null Before : [{4, 3, 2, 1}, {4, 2, 1, NULL}, {10, 0, -10}] * @endcode * - * @param source_column View into the list column of numeric types to gather from + * @param source_column View of the list column of numeric types to sort * @param column_order The desired sort order * @param null_precedence The desired order of null compared to other elements in the list * @param mr Device memory resource to allocate any returned objects diff --git a/cpp/src/lists/segmented_sort.cu b/cpp/src/lists/segmented_sort.cu index 089471ef891..f8a4fb130be 100644 --- a/cpp/src/lists/segmented_sort.cu +++ b/cpp/src/lists/segmented_sort.cu @@ -151,12 +151,8 @@ struct SortPairs { auto keys = [&]() { if (child.nullable()) { rmm::device_uvector keys(child.size(), stream); - auto null_replace_T = - column_order == order::ASCENDING - ? (null_precedence == null_order::AFTER ? std::numeric_limits::max() - : std::numeric_limits::min()) - : (null_precedence == null_order::BEFORE ? std::numeric_limits::max() - : std::numeric_limits::min()); + auto null_replace_T = null_precedence == null_order::AFTER ? std::numeric_limits::max() + : std::numeric_limits::min(); auto device_child = column_device_view::create(child, stream); auto keys_in = cudf::detail::make_null_replacement_iterator(*device_child, null_replace_T); diff --git a/cpp/tests/lists/segmented_sort_tests.cpp b/cpp/tests/lists/segmented_sort_tests.cpp index 6b9cda03ada..303aa15f963 100644 --- a/cpp/tests/lists/segmented_sort_tests.cpp +++ b/cpp/tests/lists/segmented_sort_tests.cpp @@ -95,8 +95,8 @@ TYPED_TEST(SegmentedSort, Null) // Descending // LCW order{{3, 0, 1, 2}, {0}, {0, 1, 2}, {1, 0}}; - LCW expected3{{{3, 2, 1, 4}, valids_a.begin()}, {5}, {10, 9, 8}, {7, 6}}; - LCW expected4{{{4, 3, 2, 1}, valids_b.begin()}, {5}, {10, 9, 8}, {7, 6}}; + LCW expected3{{{4, 3, 2, 1}, valids_b.begin()}, {5}, {10, 9, 8}, {7, 6}}; + LCW expected4{{{3, 2, 1, 4}, valids_a.begin()}, {5}, {10, 9, 8}, {7, 6}}; results = segmented_sort(lists_column_view{list}, order::DESCENDING, null_order::AFTER); CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected3); From 4b6ae057324056d40f2366384dbdfeb6e3de308e Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Wed, 27 Jan 2021 23:58:25 +0530 Subject: [PATCH 08/32] documentation update for segmented_sort_by_key --- cpp/include/cudf/detail/sorting.hpp | 12 +++++------- cpp/include/cudf/sorting.hpp | 28 +++++++++++++++++++++++++--- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/cpp/include/cudf/detail/sorting.hpp b/cpp/include/cudf/detail/sorting.hpp index e36eb827c03..2090fd61c59 100644 --- a/cpp/include/cudf/detail/sorting.hpp +++ b/cpp/include/cudf/detail/sorting.hpp @@ -70,6 +70,11 @@ std::unique_ptr segmented_sorted_order( rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @copydoc cudf::segmented_sort_by_key + * + * @param[in] stream CUDA stream used for device memory operations and kernel launches. + */ std::unique_ptr
segmented_sort_by_key( table_view const& values, table_view const& keys, @@ -78,12 +83,5 @@ std::unique_ptr
segmented_sort_by_key( rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -std::unique_ptr
segmented_sort( - table_view const& input, - std::vector const& column_order = {}, - std::vector const& null_precedence = {}, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp index 1f8bbeb4fa6..2e4ee9dcaf4 100644 --- a/cpp/include/cudf/sorting.hpp +++ b/cpp/include/cudf/sorting.hpp @@ -128,7 +128,7 @@ std::unique_ptr
sort( * @param values The table to reorder * @param keys The table that determines the ordering * @param column_order The desired order for each column in `keys`. Size must be - * equal to `input.num_columns()` or empty. If empty, all columns are sorted in + * equal to `keys.num_columns()` or empty. If empty, all columns are sorted in * ascending order. * @param null_precedence The desired order of a null element compared to other * elements for each column in `keys`. Size must be equal to @@ -184,8 +184,30 @@ std::unique_ptr rank( bool percentage, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -std::unique_ptr
segmented_sort( - table_view const& input, +/** + * @brief Performs a lexicographic segmented sort of the list in each row of a table of list columns + * + * `keys` with list columns of depth 1 is only supported. + * @throws cudf::logic_error if `values.num_rows() != keys.num_rows()`. + * @throws cudf::logic_error if any list sizes of corresponding row in each column are not equal. + * @throws cudf::logic_error if any column of `keys` or `values` is not a list column. + * + * @param values The table to reorder + * @param keys The table that determines the ordering + * @param column_order The desired order for each column in `keys`. Size must be + * equal to `keys.num_columns()` or empty. If empty, all columns are sorted in + * ascending order. + * @param null_precedence The desired order of a null element compared to other + * elements for each column in `keys`. Size must be equal to + * `keys.num_columns()` or empty. If empty, all columns will be sorted with + * `null_order::BEFORE`. + * @param mr Device memory resource to allocate any returned objects + * @return table with list columns with elements in each list sorted. + * + */ +std::unique_ptr
segmented_sort_by_key( + table_view const& values, + table_view const& keys, std::vector const& column_order = {}, std::vector const& null_precedence = {}, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); From a936f4e295026bf8f5c547b913c5b828810eadca Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Wed, 27 Jan 2021 23:58:49 +0530 Subject: [PATCH 09/32] replace segmented_sort by segmented_sort_by_key --- cpp/src/sort/segmented_sort.cu | 11 +++++---- cpp/tests/sort/segmented_sort_tests.cpp | 32 ++++++++++++------------- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/cpp/src/sort/segmented_sort.cu b/cpp/src/sort/segmented_sort.cu index ebb8ef14c1f..732bbd598c0 100644 --- a/cpp/src/sort/segmented_sort.cu +++ b/cpp/src/sort/segmented_sort.cu @@ -140,14 +140,15 @@ std::unique_ptr
segmented_sort_by_key(table_view const& values, } } // namespace detail -std::unique_ptr
segmented_sort(table_view const& input, - std::vector const& column_order, - std::vector const& null_precedence, - rmm::mr::device_memory_resource* mr) +std::unique_ptr
segmented_sort_by_key(table_view const& values, + table_view const& keys, + std::vector const& column_order, + std::vector const& null_precedence, + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); return detail::segmented_sort_by_key( - input, input, column_order, null_precedence, rmm::cuda_stream_default, mr); + values, keys, column_order, null_precedence, rmm::cuda_stream_default, mr); } } // namespace cudf diff --git a/cpp/tests/sort/segmented_sort_tests.cpp b/cpp/tests/sort/segmented_sort_tests.cpp index 04ab7bb39e4..7430bc157a1 100644 --- a/cpp/tests/sort/segmented_sort_tests.cpp +++ b/cpp/tests/sort/segmented_sort_tests.cpp @@ -58,12 +58,12 @@ TYPED_TEST(SegmentedSort, NoNull) LCW expected1{{1, 2, 3, 4, 4, 4}, {5}, {8, 9, 9}, {6, 7}}; LCW expected2{{2, 1, 3, 1, 2, 3}, {0}, {9, 9, 10}, {6, 7}}; table_view expected_table1{{expected1, expected2}}; - auto results = cudf::segmented_sort( - input, {order::ASCENDING, order::ASCENDING}, {null_order::AFTER, null_order::AFTER}); + auto results = cudf::segmented_sort_by_key( + input, input, {order::ASCENDING, order::ASCENDING}, {null_order::AFTER, null_order::AFTER}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table1); - results = cudf::segmented_sort( - input, {order::ASCENDING, order::ASCENDING}, {null_order::BEFORE, null_order::BEFORE}); + results = cudf::segmented_sort_by_key( + input, input, {order::ASCENDING, order::ASCENDING}, {null_order::BEFORE, null_order::BEFORE}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table1); // Descending @@ -71,12 +71,12 @@ TYPED_TEST(SegmentedSort, NoNull) LCW expected3{{4, 4, 4, 3, 2, 1}, {5}, {9, 9, 8}, {7, 6}}; LCW expected4{{3, 2, 1, 3, 1, 2}, {0}, {10, 9, 9}, {7, 6}}; table_view expected_table2{{expected3, expected4}}; - results = cudf::segmented_sort( - input, {order::DESCENDING, order::DESCENDING}, {null_order::AFTER, null_order::AFTER}); + results = cudf::segmented_sort_by_key( + input, input, {order::DESCENDING, order::DESCENDING}, {null_order::AFTER, null_order::AFTER}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table2); - results = cudf::segmented_sort( - input, {order::DESCENDING, order::DESCENDING}, {null_order::BEFORE, null_order::BEFORE}); + results = cudf::segmented_sort_by_key( + input, input, {order::DESCENDING, order::DESCENDING}, {null_order::BEFORE, null_order::BEFORE}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table2); } @@ -101,31 +101,31 @@ TYPED_TEST(SegmentedSort, Nulls) LCW expected1a{{{1, 2, 3, 4, 4, 4}, valids1a.begin()}, {5}, {8, 9, 9}, {6, 7}}; LCW expected2a{{2, 1, 3, 1, 3, 2}, {0}, {{9, 10, 9}, valids2.begin()}, {6, 7}}; table_view expected_table1a{{expected1a, expected2a}}; - auto results = cudf::segmented_sort( - input, {order::ASCENDING, order::ASCENDING}, {null_order::AFTER, null_order::AFTER}); + auto results = cudf::segmented_sort_by_key( + input, input, {order::ASCENDING, order::ASCENDING}, {null_order::AFTER, null_order::AFTER}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table1a); // LCW order{{3, 2, 1, 0, 4, 5}, {0}, {2, 1, 0}, {0, 1}}; LCW expected1b{{{4, 1, 2, 3, 4, 4}, valids1a.rbegin()}, {5}, {8, 9, 9}, {6, 7}}; LCW expected2b{{2, 2, 1, 3, 1, 3}, {0}, {{9, 9, 10}, valids2b.begin()}, {6, 7}}; table_view expected_table1b{{expected1b, expected2b}}; - results = cudf::segmented_sort( - input, {order::ASCENDING, order::ASCENDING}, {null_order::BEFORE, null_order::BEFORE}); + results = cudf::segmented_sort_by_key( + input, input, {order::ASCENDING, order::ASCENDING}, {null_order::BEFORE, null_order::BEFORE}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table1b); // Descending LCW expected3a{{{4, 4, 4, 3, 2, 1}, valids1a.rbegin()}, {5}, {9, 9, 8}, {7, 6}}; LCW expected4a{{2, 3, 1, 3, 1, 2}, {0}, {{9, 10, 9}, valids2.rbegin()}, {7, 6}}; table_view expected_table2a{{expected3a, expected4a}}; - results = cudf::segmented_sort( - input, {order::DESCENDING, order::DESCENDING}, {null_order::AFTER, null_order::AFTER}); + results = cudf::segmented_sort_by_key( + input, input, {order::DESCENDING, order::DESCENDING}, {null_order::AFTER, null_order::AFTER}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table2a); LCW expected3b{{{4, 4, 3, 2, 1, 4}, valids1a.begin()}, {5}, {9, 9, 8}, {7, 6}}; LCW expected4b{{3, 1, 3, 1, 2, 2}, {0}, {{10, 9, 9}, valids2b.begin()}, {7, 6}}; table_view expected_table2b{{expected3b, expected4b}}; - results = cudf::segmented_sort( - input, {order::DESCENDING, order::DESCENDING}, {null_order::BEFORE, null_order::BEFORE}); + results = cudf::segmented_sort_by_key( + input, input, {order::DESCENDING, order::DESCENDING}, {null_order::BEFORE, null_order::BEFORE}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table2b); } From d03f032e956f1b1474ca51bc43acd2bb1ebd1764 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Thu, 28 Jan 2021 00:10:36 +0530 Subject: [PATCH 10/32] conda yml include new hpp --- conda/recipes/libcudf/meta.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 1d660e2cd74..b4e071169f1 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -128,6 +128,7 @@ test: - test -f $PREFIX/include/cudf/lists/contains.hpp - test -f $PREFIX/include/cudf/lists/gather.hpp - test -f $PREFIX/include/cudf/lists/lists_column_view.hpp + - test -f $PREFIX/include/cudf/lists/sorting.hpp - test -f $PREFIX/include/cudf/merge.hpp - test -f $PREFIX/include/cudf/null_mask.hpp - test -f $PREFIX/include/cudf/partitioning.hpp From 23c8f264f99fbc9b192edba29dd0ca710b59538e Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Thu, 28 Jan 2021 18:38:13 +0530 Subject: [PATCH 11/32] add more checks for input list table --- cpp/src/sort/segmented_sort.cu | 90 ++++++++++++++++++++++------------ 1 file changed, 60 insertions(+), 30 deletions(-) diff --git a/cpp/src/sort/segmented_sort.cu b/cpp/src/sort/segmented_sort.cu index 732bbd598c0..bf1a170d5f9 100644 --- a/cpp/src/sort/segmented_sort.cu +++ b/cpp/src/sort/segmented_sort.cu @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -35,6 +36,7 @@ #include #include #include +#include namespace cudf { namespace detail { @@ -59,18 +61,47 @@ rmm::device_uvector get_list_segment_indices(lists_column_view const& return std::move(segment_ids); } -std::unique_ptr segmented_sorted_order(table_view const& keys, - std::vector const& column_order, - std::vector const& null_precedence, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +// same as count_elements(list). TODO: DRY. +struct list_size_functor { + column_device_view const& d_column; + __device__ size_type operator()(size_type idx) + { + if (d_column.is_null(idx)) return size_type{0}; + auto d_offsets = + d_column.child(lists_column_view::offsets_column_index).data() + d_column.offset(); + return d_offsets[idx + 1] - d_offsets[idx]; + } +}; + +void validate_list_columns(table_view const& keys, rmm::cuda_stream_view stream) { CUDF_EXPECTS(std::all_of(keys.begin(), keys.end(), [](column_view const& col) { return col.type().id() == type_id::LIST; }), "segmented_sort only supports lists columns"); - // TODO check if all list sizes are equal. OR all offsets are equal (may be wrong). + // check if all list sizes are equal. + auto table_device = table_device_view::create(keys, stream); + auto counting_iter = thrust::make_counting_iterator(0); + CUDF_EXPECTS( + thrust::all_of(rmm::exec_policy(stream), + counting_iter, + counting_iter + keys.num_rows(), + [d_keys = *table_device] __device__(size_type idx) { + auto size = list_size_functor{d_keys.column(0)}(idx); + return thrust::all_of( + thrust::seq, d_keys.begin(), d_keys.end(), [&](auto const& d_column) { + return list_size_functor{d_column}(idx) == size; + }); + }), + "size of each list in a row of table should be same"); +} +std::unique_ptr segmented_sorted_order(table_view const& keys, + std::vector const& column_order, + std::vector const& null_precedence, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ auto segment_ids = get_list_segment_indices(lists_column_view{keys.column(0)}, stream); // insert segment id before all child columns. std::vector child_key_columns(keys.num_columns() + 1); @@ -88,7 +119,7 @@ std::unique_ptr segmented_sorted_order(table_view const& keys, if (not null_precedence.empty()) child_null_precedence.insert(child_null_precedence.begin(), null_order::AFTER); - // create table_view of child columns + // return sorted order of child columns return detail::sorted_order(child_keys, child_column_order, child_null_precedence, stream, mr); } @@ -99,17 +130,18 @@ std::unique_ptr
segmented_sort_by_key(table_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(std::all_of(values.begin(), - values.end(), - [](column_view const& col) { return col.type().id() == type_id::LIST; }), - "segmented_sort only supports lists columns"); + std::vector key_value_columns; + key_value_columns.reserve(keys.num_columns() + values.num_columns()); + key_value_columns.insert(key_value_columns.end(), keys.begin(), keys.end()); + key_value_columns.insert(key_value_columns.end(), values.begin(), values.end()); + validate_list_columns(table_view{key_value_columns}, stream); + auto sorted_order = segmented_sorted_order(keys, column_order, null_precedence, stream); + std::vector child_columns(values.num_columns()); std::transform(values.begin(), values.end(), child_columns.begin(), [stream](auto col) { return lists_column_view(col).get_sliced_child(stream); }); - // return std::unique_ptr
(new table{table_view{std::vector{*sorted_order}}}); - // TODO build the list columns from returned table! and packit into table! auto child_result = detail::gather(table_view{child_columns}, sorted_order->view(), out_of_bounds_policy::DONT_CHECK, @@ -119,24 +151,22 @@ std::unique_ptr
segmented_sort_by_key(table_view const& values, ->release(); std::vector> list_columns; - std::transform( // thrust::host, - values.begin(), - values.end(), - std::make_move_iterator(child_result.begin()), - std::back_inserter(list_columns), - [&stream, &mr](auto& input_list, auto&& sorted_child) { - auto output_offset = - std::make_unique(lists_column_view(input_list).offsets(), stream, mr); - auto null_mask = cudf::detail::copy_bitmask(input_list, stream, mr); - // Assemble list column & return - return make_lists_column(input_list.size(), - std::move(output_offset), - std::move(sorted_child), - input_list.null_count(), - std::move(null_mask)); - }); + std::transform(values.begin(), + values.end(), + std::make_move_iterator(child_result.begin()), + std::back_inserter(list_columns), + [&stream, &mr](auto& input_list, auto&& sorted_child) { + auto output_offset = + std::make_unique(lists_column_view(input_list).offsets(), stream, mr); + auto null_mask = cudf::detail::copy_bitmask(input_list, stream, mr); + // Assemble list column & return + return make_lists_column(input_list.size(), + std::move(output_offset), + std::move(sorted_child), + input_list.null_count(), + std::move(null_mask)); + }); return std::make_unique
(std::move(list_columns)); - // TODO write tests and verify */ } } // namespace detail From 0825984bae0deb3739736479c4fda3f1c3e42d33 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Thu, 28 Jan 2021 18:38:31 +0530 Subject: [PATCH 12/32] add error tests for inputs --- cpp/tests/sort/segmented_sort_tests.cpp | 53 ++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/cpp/tests/sort/segmented_sort_tests.cpp b/cpp/tests/sort/segmented_sort_tests.cpp index 7430bc157a1..f69ed582f5d 100644 --- a/cpp/tests/sort/segmented_sort_tests.cpp +++ b/cpp/tests/sort/segmented_sort_tests.cpp @@ -21,7 +21,6 @@ #include #include -//#include #include #include #include @@ -44,6 +43,58 @@ struct SegmentedSort : public BaseFixture { // using NumericTypesNotBool = Concat; TYPED_TEST_CASE(SegmentedSort, NumericTypes); +using SegmentedSortInt = SegmentedSort; +TEST_F(SegmentedSortInt, Errors) +{ + LCW col1{{3, 1, 2}, {1}, {2}, {0}, {10, 9, 9}, {6, 7}}; + fixed_width_column_wrapper col2{{5, 4, 3, 5, 8, 5}, {1, 1, 0, 1, 1, 1}}; + strings_column_wrapper col3({"d", "e", "a", "d", "k", "d"}, {1, 1, 0, 1, 1, 1}); + LCW col4{{3, 1, 2}, {1}, {2}, {0}, {10, 9, 9, 4}, {6, 7}}; + table_view input1{{col1}}; + table_view input2{{col1, col2}}; + table_view input3{{col2, col3}}; + table_view input4{{col4}}; + table_view input5{{col1, col4}}; + // Valid + CUDF_EXPECT_NO_THROW(cudf::segmented_sort_by_key(input1, input1, {}, {})); + // Non-List keys + CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort_by_key(input2, input1, {}, {}), + "segmented_sort only supports lists columns"); + // Non-List values + CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort_by_key(input1, input2, {}, {}), + "segmented_sort only supports lists columns"); + // Both + CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort_by_key(input2, input2, {}, {}), + "segmented_sort only supports lists columns"); + CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort_by_key(input2, input3, {}, {}), + "segmented_sort only supports lists columns"); + CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort_by_key(input3, input3, {}, {}), + "segmented_sort only supports lists columns"); + // List sizes mismatch key + CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort_by_key(input5, input4, {}, {}), + "size of each list in a row of table should be same"); + // List sizes mismatch value + CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort_by_key(input1, input5, {}, {}), + "size of each list in a row of table should be same"); + // List sizes mismatch between key-value + CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort_by_key(input1, input4, {}, {}), + "size of each list in a row of table should be same"); + + // Mismatch order sizes + EXPECT_THROW( + cudf::segmented_sort_by_key(input1, input1, {order::ASCENDING, order::ASCENDING}, {}), + logic_error); + // Mismatch null precedence sizes + EXPECT_THROW( + cudf::segmented_sort_by_key(input1, input1, {}, {null_order::AFTER, null_order::AFTER}), + logic_error); + // Both + EXPECT_THROW( + cudf::segmented_sort_by_key( + input1, input1, {order::ASCENDING, order::ASCENDING}, {null_order::AFTER, null_order::AFTER}), + logic_error); +} + TYPED_TEST(SegmentedSort, NoNull) { using T = TypeParam; From 663662b17147db4114007a96b6b37172d5067d68 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Fri, 29 Jan 2021 00:28:58 +0530 Subject: [PATCH 13/32] rename API segmented_sort to sort_lists --- cpp/include/cudf/detail/sorting.hpp | 11 ++---- cpp/include/cudf/lists/sorting.hpp | 2 +- cpp/include/cudf/sorting.hpp | 6 ++-- cpp/src/lists/segmented_sort.cu | 20 +++++------ cpp/src/sort/segmented_sort.cu | 24 ++++++------- cpp/tests/lists/segmented_sort_tests.cpp | 18 +++++----- cpp/tests/sort/segmented_sort_tests.cpp | 46 ++++++++++++------------ 7 files changed, 59 insertions(+), 68 deletions(-) diff --git a/cpp/include/cudf/detail/sorting.hpp b/cpp/include/cudf/detail/sorting.hpp index 2090fd61c59..2a739860c4d 100644 --- a/cpp/include/cudf/detail/sorting.hpp +++ b/cpp/include/cudf/detail/sorting.hpp @@ -63,19 +63,12 @@ std::unique_ptr
sort_by_key( rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -std::unique_ptr segmented_sorted_order( - table_view const& keys, - std::vector const& column_order = {}, - std::vector const& null_precedence = {}, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - /** - * @copydoc cudf::segmented_sort_by_key + * @copydoc cudf::sort_lists * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr
segmented_sort_by_key( +std::unique_ptr
sort_lists( table_view const& values, table_view const& keys, std::vector const& column_order = {}, diff --git a/cpp/include/cudf/lists/sorting.hpp b/cpp/include/cudf/lists/sorting.hpp index eda68fb07a6..e27f3d03d86 100644 --- a/cpp/include/cudf/lists/sorting.hpp +++ b/cpp/include/cudf/lists/sorting.hpp @@ -48,7 +48,7 @@ namespace lists { * @return list column with elements in each list sorted. * */ -std::unique_ptr segmented_sort( +std::unique_ptr sort_lists( lists_column_view const& source_column, order column_order, null_order null_precedence, diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp index 2e4ee9dcaf4..9a2cbf74102 100644 --- a/cpp/include/cudf/sorting.hpp +++ b/cpp/include/cudf/sorting.hpp @@ -192,8 +192,8 @@ std::unique_ptr rank( * @throws cudf::logic_error if any list sizes of corresponding row in each column are not equal. * @throws cudf::logic_error if any column of `keys` or `values` is not a list column. * - * @param values The table to reorder - * @param keys The table that determines the ordering + * @param values The table with list columns to reorder + * @param keys The table with list coumns that determines the ordering of elements in each list * @param column_order The desired order for each column in `keys`. Size must be * equal to `keys.num_columns()` or empty. If empty, all columns are sorted in * ascending order. @@ -205,7 +205,7 @@ std::unique_ptr rank( * @return table with list columns with elements in each list sorted. * */ -std::unique_ptr
segmented_sort_by_key( +std::unique_ptr
sort_lists( table_view const& values, table_view const& keys, std::vector const& column_order = {}, diff --git a/cpp/src/lists/segmented_sort.cu b/cpp/src/lists/segmented_sort.cu index f8a4fb130be..a9f5da2aef7 100644 --- a/cpp/src/lists/segmented_sort.cu +++ b/cpp/src/lists/segmented_sort.cu @@ -203,11 +203,11 @@ struct SortPairs { } }; -std::unique_ptr segmented_sort(lists_column_view const& input, - order column_order, - null_order null_precedence, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::unique_ptr sort_lists(lists_column_view const& input, + order column_order, + null_order null_precedence, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { if (input.size() == 0) return {}; @@ -233,13 +233,13 @@ std::unique_ptr segmented_sort(lists_column_view const& input, } } // namespace detail -std::unique_ptr segmented_sort(lists_column_view const& input, - order column_order, - null_order null_precedence, - rmm::mr::device_memory_resource* mr) +std::unique_ptr sort_lists(lists_column_view const& input, + order column_order, + null_order null_precedence, + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::segmented_sort(input, column_order, null_precedence, rmm::cuda_stream_default, mr); + return detail::sort_lists(input, column_order, null_precedence, rmm::cuda_stream_default, mr); } } // namespace lists diff --git a/cpp/src/sort/segmented_sort.cu b/cpp/src/sort/segmented_sort.cu index bf1a170d5f9..a132da0f8c4 100644 --- a/cpp/src/sort/segmented_sort.cu +++ b/cpp/src/sort/segmented_sort.cu @@ -123,12 +123,12 @@ std::unique_ptr segmented_sorted_order(table_view const& keys, return detail::sorted_order(child_keys, child_column_order, child_null_precedence, stream, mr); } -std::unique_ptr
segmented_sort_by_key(table_view const& values, - table_view const& keys, - std::vector const& column_order, - std::vector const& null_precedence, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::unique_ptr
sort_lists(table_view const& values, + table_view const& keys, + std::vector const& column_order, + std::vector const& null_precedence, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { std::vector key_value_columns; key_value_columns.reserve(keys.num_columns() + values.num_columns()); @@ -170,14 +170,14 @@ std::unique_ptr
segmented_sort_by_key(table_view const& values, } } // namespace detail -std::unique_ptr
segmented_sort_by_key(table_view const& values, - table_view const& keys, - std::vector const& column_order, - std::vector const& null_precedence, - rmm::mr::device_memory_resource* mr) +std::unique_ptr
sort_lists(table_view const& values, + table_view const& keys, + std::vector const& column_order, + std::vector const& null_precedence, + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::segmented_sort_by_key( + return detail::sort_lists( values, keys, column_order, null_precedence, rmm::cuda_stream_default, mr); } diff --git a/cpp/tests/lists/segmented_sort_tests.cpp b/cpp/tests/lists/segmented_sort_tests.cpp index 303aa15f963..491a58a7f4c 100644 --- a/cpp/tests/lists/segmented_sort_tests.cpp +++ b/cpp/tests/lists/segmented_sort_tests.cpp @@ -36,7 +36,7 @@ template using LCW = cudf::test::lists_column_wrapper; using cudf::lists_column_view; -using cudf::lists::segmented_sort; +using cudf::lists::sort_lists; namespace cudf { namespace test { @@ -58,19 +58,19 @@ TYPED_TEST(SegmentedSort, NoNull) // Ascending // LCW order{{2, 1, 0, 3}, {0}, {1, 2, 0}, {0, 1}}; LCW expected{{1, 2, 3, 4}, {5}, {8, 9, 10}, {6, 7}}; - auto results = segmented_sort(lists_column_view{list}, order::ASCENDING, null_order::AFTER); + auto results = sort_lists(lists_column_view{list}, order::ASCENDING, null_order::AFTER); CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); - results = segmented_sort(lists_column_view{list}, order::ASCENDING, null_order::BEFORE); + results = sort_lists(lists_column_view{list}, order::ASCENDING, null_order::BEFORE); CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); // Descending // LCW order{{3, 0, 1, 2}, {0}, {0, 1, 2}, {1, 0}}; LCW expected2{{4, 3, 2, 1}, {5}, {10, 9, 8}, {7, 6}}; - results = segmented_sort(lists_column_view{list}, order::DESCENDING, null_order::AFTER); + results = sort_lists(lists_column_view{list}, order::DESCENDING, null_order::AFTER); CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected2); - results = segmented_sort(lists_column_view{list}, order::DESCENDING, null_order::BEFORE); + results = sort_lists(lists_column_view{list}, order::DESCENDING, null_order::BEFORE); CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected2); } @@ -87,20 +87,20 @@ TYPED_TEST(SegmentedSort, Null) // LCW order{{2, 1, 3, 0}, {0}, {1, 2, 0}, {0, 1}}; LCW expected1{{{1, 2, 3, 4}, valids_a.begin()}, {5}, {8, 9, 10}, {6, 7}}; LCW expected2{{{4, 1, 2, 3}, valids_b.begin()}, {5}, {8, 9, 10}, {6, 7}}; - auto results = segmented_sort(lists_column_view{list}, order::ASCENDING, null_order::AFTER); + auto results = sort_lists(lists_column_view{list}, order::ASCENDING, null_order::AFTER); CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected1); - results = segmented_sort(lists_column_view{list}, order::ASCENDING, null_order::BEFORE); + results = sort_lists(lists_column_view{list}, order::ASCENDING, null_order::BEFORE); CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected2); // Descending // LCW order{{3, 0, 1, 2}, {0}, {0, 1, 2}, {1, 0}}; LCW expected3{{{4, 3, 2, 1}, valids_b.begin()}, {5}, {10, 9, 8}, {7, 6}}; LCW expected4{{{3, 2, 1, 4}, valids_a.begin()}, {5}, {10, 9, 8}, {7, 6}}; - results = segmented_sort(lists_column_view{list}, order::DESCENDING, null_order::AFTER); + results = sort_lists(lists_column_view{list}, order::DESCENDING, null_order::AFTER); CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected3); - results = segmented_sort(lists_column_view{list}, order::DESCENDING, null_order::BEFORE); + results = sort_lists(lists_column_view{list}, order::DESCENDING, null_order::BEFORE); CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected4); } diff --git a/cpp/tests/sort/segmented_sort_tests.cpp b/cpp/tests/sort/segmented_sort_tests.cpp index f69ed582f5d..1397f5aa230 100644 --- a/cpp/tests/sort/segmented_sort_tests.cpp +++ b/cpp/tests/sort/segmented_sort_tests.cpp @@ -56,41 +56,39 @@ TEST_F(SegmentedSortInt, Errors) table_view input4{{col4}}; table_view input5{{col1, col4}}; // Valid - CUDF_EXPECT_NO_THROW(cudf::segmented_sort_by_key(input1, input1, {}, {})); + CUDF_EXPECT_NO_THROW(cudf::sort_lists(input1, input1, {}, {})); // Non-List keys - CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort_by_key(input2, input1, {}, {}), + CUDF_EXPECT_THROW_MESSAGE(cudf::sort_lists(input2, input1, {}, {}), "segmented_sort only supports lists columns"); // Non-List values - CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort_by_key(input1, input2, {}, {}), + CUDF_EXPECT_THROW_MESSAGE(cudf::sort_lists(input1, input2, {}, {}), "segmented_sort only supports lists columns"); // Both - CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort_by_key(input2, input2, {}, {}), + CUDF_EXPECT_THROW_MESSAGE(cudf::sort_lists(input2, input2, {}, {}), "segmented_sort only supports lists columns"); - CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort_by_key(input2, input3, {}, {}), + CUDF_EXPECT_THROW_MESSAGE(cudf::sort_lists(input2, input3, {}, {}), "segmented_sort only supports lists columns"); - CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort_by_key(input3, input3, {}, {}), + CUDF_EXPECT_THROW_MESSAGE(cudf::sort_lists(input3, input3, {}, {}), "segmented_sort only supports lists columns"); // List sizes mismatch key - CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort_by_key(input5, input4, {}, {}), + CUDF_EXPECT_THROW_MESSAGE(cudf::sort_lists(input5, input4, {}, {}), "size of each list in a row of table should be same"); // List sizes mismatch value - CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort_by_key(input1, input5, {}, {}), + CUDF_EXPECT_THROW_MESSAGE(cudf::sort_lists(input1, input5, {}, {}), "size of each list in a row of table should be same"); // List sizes mismatch between key-value - CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort_by_key(input1, input4, {}, {}), + CUDF_EXPECT_THROW_MESSAGE(cudf::sort_lists(input1, input4, {}, {}), "size of each list in a row of table should be same"); // Mismatch order sizes - EXPECT_THROW( - cudf::segmented_sort_by_key(input1, input1, {order::ASCENDING, order::ASCENDING}, {}), - logic_error); + EXPECT_THROW(cudf::sort_lists(input1, input1, {order::ASCENDING, order::ASCENDING}, {}), + logic_error); // Mismatch null precedence sizes - EXPECT_THROW( - cudf::segmented_sort_by_key(input1, input1, {}, {null_order::AFTER, null_order::AFTER}), - logic_error); + EXPECT_THROW(cudf::sort_lists(input1, input1, {}, {null_order::AFTER, null_order::AFTER}), + logic_error); // Both EXPECT_THROW( - cudf::segmented_sort_by_key( + cudf::sort_lists( input1, input1, {order::ASCENDING, order::ASCENDING}, {null_order::AFTER, null_order::AFTER}), logic_error); } @@ -109,11 +107,11 @@ TYPED_TEST(SegmentedSort, NoNull) LCW expected1{{1, 2, 3, 4, 4, 4}, {5}, {8, 9, 9}, {6, 7}}; LCW expected2{{2, 1, 3, 1, 2, 3}, {0}, {9, 9, 10}, {6, 7}}; table_view expected_table1{{expected1, expected2}}; - auto results = cudf::segmented_sort_by_key( + auto results = cudf::sort_lists( input, input, {order::ASCENDING, order::ASCENDING}, {null_order::AFTER, null_order::AFTER}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table1); - results = cudf::segmented_sort_by_key( + results = cudf::sort_lists( input, input, {order::ASCENDING, order::ASCENDING}, {null_order::BEFORE, null_order::BEFORE}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table1); @@ -122,11 +120,11 @@ TYPED_TEST(SegmentedSort, NoNull) LCW expected3{{4, 4, 4, 3, 2, 1}, {5}, {9, 9, 8}, {7, 6}}; LCW expected4{{3, 2, 1, 3, 1, 2}, {0}, {10, 9, 9}, {7, 6}}; table_view expected_table2{{expected3, expected4}}; - results = cudf::segmented_sort_by_key( + results = cudf::sort_lists( input, input, {order::DESCENDING, order::DESCENDING}, {null_order::AFTER, null_order::AFTER}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table2); - results = cudf::segmented_sort_by_key( + results = cudf::sort_lists( input, input, {order::DESCENDING, order::DESCENDING}, {null_order::BEFORE, null_order::BEFORE}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table2); } @@ -152,7 +150,7 @@ TYPED_TEST(SegmentedSort, Nulls) LCW expected1a{{{1, 2, 3, 4, 4, 4}, valids1a.begin()}, {5}, {8, 9, 9}, {6, 7}}; LCW expected2a{{2, 1, 3, 1, 3, 2}, {0}, {{9, 10, 9}, valids2.begin()}, {6, 7}}; table_view expected_table1a{{expected1a, expected2a}}; - auto results = cudf::segmented_sort_by_key( + auto results = cudf::sort_lists( input, input, {order::ASCENDING, order::ASCENDING}, {null_order::AFTER, null_order::AFTER}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table1a); @@ -160,7 +158,7 @@ TYPED_TEST(SegmentedSort, Nulls) LCW expected1b{{{4, 1, 2, 3, 4, 4}, valids1a.rbegin()}, {5}, {8, 9, 9}, {6, 7}}; LCW expected2b{{2, 2, 1, 3, 1, 3}, {0}, {{9, 9, 10}, valids2b.begin()}, {6, 7}}; table_view expected_table1b{{expected1b, expected2b}}; - results = cudf::segmented_sort_by_key( + results = cudf::sort_lists( input, input, {order::ASCENDING, order::ASCENDING}, {null_order::BEFORE, null_order::BEFORE}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table1b); @@ -168,14 +166,14 @@ TYPED_TEST(SegmentedSort, Nulls) LCW expected3a{{{4, 4, 4, 3, 2, 1}, valids1a.rbegin()}, {5}, {9, 9, 8}, {7, 6}}; LCW expected4a{{2, 3, 1, 3, 1, 2}, {0}, {{9, 10, 9}, valids2.rbegin()}, {7, 6}}; table_view expected_table2a{{expected3a, expected4a}}; - results = cudf::segmented_sort_by_key( + results = cudf::sort_lists( input, input, {order::DESCENDING, order::DESCENDING}, {null_order::AFTER, null_order::AFTER}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table2a); LCW expected3b{{{4, 4, 3, 2, 1, 4}, valids1a.begin()}, {5}, {9, 9, 8}, {7, 6}}; LCW expected4b{{3, 1, 3, 1, 2, 2}, {0}, {{10, 9, 9}, valids2b.begin()}, {7, 6}}; table_view expected_table2b{{expected3b, expected4b}}; - results = cudf::segmented_sort_by_key( + results = cudf::sort_lists( input, input, {order::DESCENDING, order::DESCENDING}, {null_order::BEFORE, null_order::BEFORE}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table2b); } From e842e1dcbc192b02f50e6da033e002792be1d017 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Fri, 29 Jan 2021 00:38:47 +0530 Subject: [PATCH 14/32] rename tests --- cpp/src/sort/segmented_sort.cu | 3 ++- cpp/tests/lists/segmented_sort_tests.cpp | 9 ++++----- cpp/tests/sort/segmented_sort_tests.cpp | 12 ++++++------ 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/cpp/src/sort/segmented_sort.cu b/cpp/src/sort/segmented_sort.cu index a132da0f8c4..2161e39cfcf 100644 --- a/cpp/src/sort/segmented_sort.cu +++ b/cpp/src/sort/segmented_sort.cu @@ -136,7 +136,8 @@ std::unique_ptr
sort_lists(table_view const& values, key_value_columns.insert(key_value_columns.end(), values.begin(), values.end()); validate_list_columns(table_view{key_value_columns}, stream); - auto sorted_order = segmented_sorted_order(keys, column_order, null_precedence, stream); + auto sorted_order = segmented_sorted_order( + keys, column_order, null_precedence, stream, rmm::mr::get_current_device_resource()); std::vector child_columns(values.num_columns()); std::transform(values.begin(), values.end(), child_columns.begin(), [stream](auto col) { diff --git a/cpp/tests/lists/segmented_sort_tests.cpp b/cpp/tests/lists/segmented_sort_tests.cpp index 491a58a7f4c..8e4bc0feb64 100644 --- a/cpp/tests/lists/segmented_sort_tests.cpp +++ b/cpp/tests/lists/segmented_sort_tests.cpp @@ -42,13 +42,12 @@ namespace cudf { namespace test { template -struct SegmentedSort : public BaseFixture { +struct SegmentedSortLists : public BaseFixture { }; -// using NumericTypesNotBool = Concat; -TYPED_TEST_CASE(SegmentedSort, NumericTypes); +TYPED_TEST_CASE(SegmentedSortLists, NumericTypes); -TYPED_TEST(SegmentedSort, NoNull) +TYPED_TEST(SegmentedSortLists, NoNull) { using T = TypeParam; @@ -74,7 +73,7 @@ TYPED_TEST(SegmentedSort, NoNull) CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected2); } -TYPED_TEST(SegmentedSort, Null) +TYPED_TEST(SegmentedSortLists, Null) { using T = TypeParam; if (std::is_same::value) return; diff --git a/cpp/tests/sort/segmented_sort_tests.cpp b/cpp/tests/sort/segmented_sort_tests.cpp index 1397f5aa230..ce3434d3e47 100644 --- a/cpp/tests/sort/segmented_sort_tests.cpp +++ b/cpp/tests/sort/segmented_sort_tests.cpp @@ -37,14 +37,14 @@ namespace cudf { namespace test { template -struct SegmentedSort : public BaseFixture { +struct SegmentedSortLists : public BaseFixture { }; // using NumericTypesNotBool = Concat; -TYPED_TEST_CASE(SegmentedSort, NumericTypes); +TYPED_TEST_CASE(SegmentedSortLists, NumericTypes); -using SegmentedSortInt = SegmentedSort; -TEST_F(SegmentedSortInt, Errors) +using SegmentedSortListsInt = SegmentedSortLists; +TEST_F(SegmentedSortListsInt, Errors) { LCW col1{{3, 1, 2}, {1}, {2}, {0}, {10, 9, 9}, {6, 7}}; fixed_width_column_wrapper col2{{5, 4, 3, 5, 8, 5}, {1, 1, 0, 1, 1, 1}}; @@ -93,7 +93,7 @@ TEST_F(SegmentedSortInt, Errors) logic_error); } -TYPED_TEST(SegmentedSort, NoNull) +TYPED_TEST(SegmentedSortLists, NoNull) { using T = TypeParam; @@ -129,7 +129,7 @@ TYPED_TEST(SegmentedSort, NoNull) CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table2); } -TYPED_TEST(SegmentedSort, Nulls) +TYPED_TEST(SegmentedSortLists, Nulls) { using T = TypeParam; if (std::is_same::value) return; From bb0e7c13f4d5f6e7cc6d42648786ef45bc5688e4 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Fri, 29 Jan 2021 01:55:23 +0530 Subject: [PATCH 15/32] update sort_lists to use segmented_sorted_order --- cpp/src/sort/segmented_sort.cu | 66 ++++++++++++++++++++++------------ 1 file changed, 44 insertions(+), 22 deletions(-) diff --git a/cpp/src/sort/segmented_sort.cu b/cpp/src/sort/segmented_sort.cu index 2161e39cfcf..5c8851523d6 100644 --- a/cpp/src/sort/segmented_sort.cu +++ b/cpp/src/sort/segmented_sort.cu @@ -41,20 +41,20 @@ namespace cudf { namespace detail { -// returns row index for each element of the row in list column. -rmm::device_uvector get_list_segment_indices(lists_column_view const& lc, - rmm::cuda_stream_view stream) +// returns segment indices for each element for all segments. +rmm::device_uvector get_segment_indices(size_type num_rows, + column_view const& offsets, + rmm::cuda_stream_view stream) { - auto sliced_child = lc.get_sliced_child(stream); - rmm::device_uvector segment_ids(sliced_child.size(), stream); + rmm::device_uvector segment_ids(num_rows, stream); - auto offsets = lc.offsets().begin() + lc.offset(); + auto offset_begin = offsets.begin(); // assumes already offset column contains offset. auto offsets_minus_one = thrust::make_transform_iterator( - offsets, [offsets] __device__(auto i) { return i - offsets[0] - 1; }); + offset_begin, [offset_begin] __device__(auto i) { return i - offset_begin[0] - 1; }); auto counting_iter = thrust::make_counting_iterator(0); thrust::lower_bound(rmm::exec_policy(stream), offsets_minus_one + 1, - offsets_minus_one + lc.size() + 1, + offsets_minus_one + offsets.size(), counting_iter, counting_iter + segment_ids.size(), segment_ids.begin()); @@ -97,20 +97,22 @@ void validate_list_columns(table_view const& keys, rmm::cuda_stream_view stream) } std::unique_ptr segmented_sorted_order(table_view const& keys, + column_view const& segment_offsets, std::vector const& column_order, std::vector const& null_precedence, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto segment_ids = get_list_segment_indices(lists_column_view{keys.column(0)}, stream); - // insert segment id before all child columns. - std::vector child_key_columns(keys.num_columns() + 1); - child_key_columns[0] = - column_view(data_type(type_to_id()), segment_ids.size(), segment_ids.data()); - std::transform(keys.begin(), keys.end(), child_key_columns.begin() + 1, [stream](auto col) { - return lists_column_view(col).get_sliced_child(stream); - }); - auto child_keys = table_view(child_key_columns); + // Get segment id of each element in all segments. + auto segment_ids = get_segment_indices(keys.num_rows(), segment_offsets, stream); + + // insert segment id before all columns. + std::vector keys_with_segid; + keys_with_segid.reserve(keys.num_columns() + 1); + keys_with_segid.push_back( + column_view(data_type(type_to_id()), segment_ids.size(), segment_ids.data())); + keys_with_segid.insert(keys_with_segid.end(), keys.begin(), keys.end()); + auto segid_keys = table_view(keys_with_segid); std::vector child_column_order(column_order); if (not column_order.empty()) @@ -120,7 +122,7 @@ std::unique_ptr segmented_sorted_order(table_view const& keys, child_null_precedence.insert(child_null_precedence.begin(), null_order::AFTER); // return sorted order of child columns - return detail::sorted_order(child_keys, child_column_order, child_null_precedence, stream, mr); + return detail::sorted_order(segid_keys, child_column_order, child_null_precedence, stream, mr); } std::unique_ptr
sort_lists(table_view const& values, @@ -135,10 +137,29 @@ std::unique_ptr
sort_lists(table_view const& values, key_value_columns.insert(key_value_columns.end(), keys.begin(), keys.end()); key_value_columns.insert(key_value_columns.end(), values.begin(), values.end()); validate_list_columns(table_view{key_value_columns}, stream); - - auto sorted_order = segmented_sorted_order( - keys, column_order, null_precedence, stream, rmm::mr::get_current_device_resource()); - + CUDF_EXPECTS(keys.num_rows() > 0, "keys table should have atleast one list column"); + + // Get sorted order of child key columns + auto child_key_columns = thrust::make_transform_iterator( + keys.begin(), [stream](auto col) { return lists_column_view(col).get_sliced_child(stream); }); + + auto lc = lists_column_view{keys.column(0)}; + auto offset = lc.offsets(); + auto segment_offset = column_view(offset.type(), + offset.size(), + offset.head(), + offset.null_mask(), + offset.null_count(), + offset.offset() + lc.offset()); + auto sorted_order = segmented_sorted_order( + table_view{std::vector(child_key_columns, child_key_columns + keys.num_columns())}, + segment_offset, + column_order, + null_precedence, + stream, + rmm::mr::get_current_device_resource()); + + // Gather segmented sort of child value columns std::vector child_columns(values.num_columns()); std::transform(values.begin(), values.end(), child_columns.begin(), [stream](auto col) { return lists_column_view(col).get_sliced_child(stream); @@ -151,6 +172,7 @@ std::unique_ptr
sort_lists(table_view const& values, mr) ->release(); + // Construct list columns from gathered child columns & return std::vector> list_columns; std::transform(values.begin(), values.end(), From e273dd887ad114d76d4e97243fbcb988780b5f82 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Fri, 29 Jan 2021 20:29:53 +0530 Subject: [PATCH 16/32] reorder code to use segmented_sort in sort_lists --- cpp/src/sort/segmented_sort.cu | 83 +++++++++++++++++++++------------- 1 file changed, 51 insertions(+), 32 deletions(-) diff --git a/cpp/src/sort/segmented_sort.cu b/cpp/src/sort/segmented_sort.cu index 5c8851523d6..09855991f2a 100644 --- a/cpp/src/sort/segmented_sort.cu +++ b/cpp/src/sort/segmented_sort.cu @@ -125,6 +125,30 @@ std::unique_ptr segmented_sorted_order(table_view const& keys, return detail::sorted_order(segid_keys, child_column_order, child_null_precedence, stream, mr); } +std::unique_ptr
segmented_sort(table_view const& values, + table_view const& keys, + column_view const& segment_offsets, + std::vector const& column_order, + std::vector const& null_precedence, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto sorted_order = segmented_sorted_order(keys, + segment_offsets, + column_order, + null_precedence, + stream, + rmm::mr::get_current_device_resource()); + + // Gather segmented sort of child value columns` + return detail::gather(values, + sorted_order->view(), + out_of_bounds_policy::DONT_CHECK, + detail::negative_index_policy::NOT_ALLOWED, + stream, + mr); +} + std::unique_ptr
sort_lists(table_view const& values, table_view const& keys, std::vector const& column_order, @@ -139,39 +163,32 @@ std::unique_ptr
sort_lists(table_view const& values, validate_list_columns(table_view{key_value_columns}, stream); CUDF_EXPECTS(keys.num_rows() > 0, "keys table should have atleast one list column"); - // Get sorted order of child key columns + // child columns of keys auto child_key_columns = thrust::make_transform_iterator( keys.begin(), [stream](auto col) { return lists_column_view(col).get_sliced_child(stream); }); - - auto lc = lists_column_view{keys.column(0)}; - auto offset = lc.offsets(); - auto segment_offset = column_view(offset.type(), - offset.size(), - offset.head(), - offset.null_mask(), - offset.null_count(), - offset.offset() + lc.offset()); - auto sorted_order = segmented_sorted_order( - table_view{std::vector(child_key_columns, child_key_columns + keys.num_columns())}, - segment_offset, - column_order, - null_precedence, - stream, - rmm::mr::get_current_device_resource()); - - // Gather segmented sort of child value columns - std::vector child_columns(values.num_columns()); - std::transform(values.begin(), values.end(), child_columns.begin(), [stream](auto col) { - return lists_column_view(col).get_sliced_child(stream); - }); - auto child_result = detail::gather(table_view{child_columns}, - sorted_order->view(), - out_of_bounds_policy::DONT_CHECK, - detail::negative_index_policy::NOT_ALLOWED, - stream, - mr) - ->release(); - + auto child_keys = + table_view{std::vector(child_key_columns, child_key_columns + keys.num_columns())}; + + // segment offsets from first list column + auto lc = lists_column_view{keys.column(0)}; + auto offset = lc.offsets(); + auto segment_offsets = column_view(offset.type(), + offset.size(), + offset.head(), + offset.null_mask(), + offset.null_count(), + offset.offset() + lc.offset()); + // child columns of values + auto child_value_columns = thrust::make_transform_iterator( + values.begin(), [stream](auto col) { return lists_column_view(col).get_sliced_child(stream); }); + auto child_values = table_view{ + std::vector(child_value_columns, child_value_columns + values.num_columns())}; + + // Get segment sorted child columns of list columns + auto child_result = + segmented_sort( + child_values, child_keys, segment_offsets, column_order, null_precedence, stream, mr) + ->release(); // Construct list columns from gathered child columns & return std::vector> list_columns; std::transform(values.begin(), @@ -187,7 +204,9 @@ std::unique_ptr
sort_lists(table_view const& values, std::move(output_offset), std::move(sorted_child), input_list.null_count(), - std::move(null_mask)); + std::move(null_mask), + stream, + mr); }); return std::make_unique
(std::move(list_columns)); } From 5e63c7af32be44be28e7ccdf74153557c97a0007 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Fri, 29 Jan 2021 20:30:27 +0530 Subject: [PATCH 17/32] add sort_lists key,value tests --- cpp/tests/sort/segmented_sort_tests.cpp | 39 ++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/cpp/tests/sort/segmented_sort_tests.cpp b/cpp/tests/sort/segmented_sort_tests.cpp index ce3434d3e47..acb308d4e5f 100644 --- a/cpp/tests/sort/segmented_sort_tests.cpp +++ b/cpp/tests/sort/segmented_sort_tests.cpp @@ -44,7 +44,7 @@ struct SegmentedSortLists : public BaseFixture { TYPED_TEST_CASE(SegmentedSortLists, NumericTypes); using SegmentedSortListsInt = SegmentedSortLists; -TEST_F(SegmentedSortListsInt, Errors) +TEST_F(SegmentedSortListsInt, ErrorsTableSizes) { LCW col1{{3, 1, 2}, {1}, {2}, {0}, {10, 9, 9}, {6, 7}}; fixed_width_column_wrapper col2{{5, 4, 3, 5, 8, 5}, {1, 1, 0, 1, 1, 1}}; @@ -79,6 +79,12 @@ TEST_F(SegmentedSortListsInt, Errors) // List sizes mismatch between key-value CUDF_EXPECT_THROW_MESSAGE(cudf::sort_lists(input1, input4, {}, {}), "size of each list in a row of table should be same"); +} + +TEST_F(SegmentedSortListsInt, ErrorsMismatchArgSizes) +{ + LCW col1{{3, 1, 2}, {1}, {2}, {0}, {10, 9, 9}, {6, 7}}; + table_view input1{{col1}}; // Mismatch order sizes EXPECT_THROW(cudf::sort_lists(input1, input1, {order::ASCENDING, order::ASCENDING}, {}), @@ -178,5 +184,36 @@ TYPED_TEST(SegmentedSortLists, Nulls) CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table2b); } +TEST_F(SegmentedSortListsInt, KeyValues) +{ + using T = int; + using LCWstr = cudf::test::lists_column_wrapper; + + // List + LCW a{{21, 22, 23, 22}, {22, 21, 23, 22}}; + LCW b{{13, 14, 12, 11}, {14, 13, 12, 11}}; + LCWstr c{{"a", "b", "c", "d"}, {"a", "b", "c", "d"}}; + + // Ascending {a} + // LCW order{{0, 1, 3, 2}, {1, 0, 3, 2}}; + LCW sorted_a1{{21, 22, 22, 23}, {21, 22, 22, 23}}; + LCW sorted_b1{{13, 14, 11, 12}, {13, 14, 11, 12}}; + LCWstr sorted_c1{{"a", "b", "d", "c"}, {"b", "a", "d", "c"}}; + auto results = cudf::sort_lists(table_view{{a, b, c}}, table_view{{a}}, {}, {}); + table_view expected_table1{{sorted_a1, sorted_b1, sorted_c1}}; + CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table1); + + // Ascending {a,b} + // LCW order{{0, 3, 1, 2}, {1, 3, 0, 2}}; + LCW sorted_a2{{21, 22, 22, 23}, {21, 22, 22, 23}}; + LCW sorted_b2{{13, 11, 14, 12}, {13, 11, 14, 12}}; + LCWstr sorted_c2{{"a", "d", "b", "c"}, {"b", "d", "a", "c"}}; + table_view expected_table2{{sorted_a2, sorted_b2, sorted_c2}}; + table_view keys{{a, b}}; + table_view values{{a, b, c}}; + results = cudf::sort_lists(values, keys, {}, {}); + CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table2); +} + } // namespace test } // namespace cudf From d249c2582d5692520f92231690061a5eaecf05dc Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Fri, 29 Jan 2021 21:02:44 +0530 Subject: [PATCH 18/32] rename sortlists tests --- cpp/src/sort/segmented_sort.cu | 4 +++- cpp/tests/sort/segmented_sort_tests.cpp | 16 ++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/cpp/src/sort/segmented_sort.cu b/cpp/src/sort/segmented_sort.cu index 09855991f2a..a7115a33454 100644 --- a/cpp/src/sort/segmented_sort.cu +++ b/cpp/src/sort/segmented_sort.cu @@ -75,6 +75,7 @@ struct list_size_functor { void validate_list_columns(table_view const& keys, rmm::cuda_stream_view stream) { + // check if all are list columns CUDF_EXPECTS(std::all_of(keys.begin(), keys.end(), [](column_view const& col) { return col.type().id() == type_id::LIST; }), @@ -156,12 +157,12 @@ std::unique_ptr
sort_lists(table_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + CUDF_EXPECTS(keys.num_columns() > 0, "keys table should have atleast one list column"); std::vector key_value_columns; key_value_columns.reserve(keys.num_columns() + values.num_columns()); key_value_columns.insert(key_value_columns.end(), keys.begin(), keys.end()); key_value_columns.insert(key_value_columns.end(), values.begin(), values.end()); validate_list_columns(table_view{key_value_columns}, stream); - CUDF_EXPECTS(keys.num_rows() > 0, "keys table should have atleast one list column"); // child columns of keys auto child_key_columns = thrust::make_transform_iterator( @@ -189,6 +190,7 @@ std::unique_ptr
sort_lists(table_view const& values, segmented_sort( child_values, child_keys, segment_offsets, column_order, null_precedence, stream, mr) ->release(); + // Construct list columns from gathered child columns & return std::vector> list_columns; std::transform(values.begin(), diff --git a/cpp/tests/sort/segmented_sort_tests.cpp b/cpp/tests/sort/segmented_sort_tests.cpp index acb308d4e5f..c44a74e1cc2 100644 --- a/cpp/tests/sort/segmented_sort_tests.cpp +++ b/cpp/tests/sort/segmented_sort_tests.cpp @@ -37,14 +37,14 @@ namespace cudf { namespace test { template -struct SegmentedSortLists : public BaseFixture { +struct SortLists : public BaseFixture { }; // using NumericTypesNotBool = Concat; -TYPED_TEST_CASE(SegmentedSortLists, NumericTypes); +TYPED_TEST_CASE(SortLists, NumericTypes); -using SegmentedSortListsInt = SegmentedSortLists; -TEST_F(SegmentedSortListsInt, ErrorsTableSizes) +using SortListsInt = SortLists; +TEST_F(SortListsInt, ErrorsTableSizes) { LCW col1{{3, 1, 2}, {1}, {2}, {0}, {10, 9, 9}, {6, 7}}; fixed_width_column_wrapper col2{{5, 4, 3, 5, 8, 5}, {1, 1, 0, 1, 1, 1}}; @@ -81,7 +81,7 @@ TEST_F(SegmentedSortListsInt, ErrorsTableSizes) "size of each list in a row of table should be same"); } -TEST_F(SegmentedSortListsInt, ErrorsMismatchArgSizes) +TEST_F(SortListsInt, ErrorsMismatchArgSizes) { LCW col1{{3, 1, 2}, {1}, {2}, {0}, {10, 9, 9}, {6, 7}}; table_view input1{{col1}}; @@ -99,7 +99,7 @@ TEST_F(SegmentedSortListsInt, ErrorsMismatchArgSizes) logic_error); } -TYPED_TEST(SegmentedSortLists, NoNull) +TYPED_TEST(SortLists, NoNull) { using T = TypeParam; @@ -135,7 +135,7 @@ TYPED_TEST(SegmentedSortLists, NoNull) CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table2); } -TYPED_TEST(SegmentedSortLists, Nulls) +TYPED_TEST(SortLists, Nulls) { using T = TypeParam; if (std::is_same::value) return; @@ -184,7 +184,7 @@ TYPED_TEST(SegmentedSortLists, Nulls) CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table2b); } -TEST_F(SegmentedSortListsInt, KeyValues) +TEST_F(SortListsInt, KeyValues) { using T = int; using LCWstr = cudf::test::lists_column_wrapper; From 837655ec3f9c888195deb11ade5bf1f5650da348 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Fri, 29 Jan 2021 21:49:14 +0530 Subject: [PATCH 19/32] rename tests sort_lists --- cpp/src/sort/segmented_sort.cu | 4 ++-- cpp/tests/CMakeLists.txt | 4 ++-- .../{segmented_sort_tests.cpp => sort_lists_tests.cpp} | 8 ++++---- .../{segmented_sort_tests.cpp => sort_lists_tests.cpp} | 0 4 files changed, 8 insertions(+), 8 deletions(-) rename cpp/tests/lists/{segmented_sort_tests.cpp => sort_lists_tests.cpp} (95%) rename cpp/tests/sort/{segmented_sort_tests.cpp => sort_lists_tests.cpp} (100%) diff --git a/cpp/src/sort/segmented_sort.cu b/cpp/src/sort/segmented_sort.cu index a7115a33454..d46994e2ecd 100644 --- a/cpp/src/sort/segmented_sort.cu +++ b/cpp/src/sort/segmented_sort.cu @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include #include @@ -25,7 +24,7 @@ #include #include -#include +#include #include #include #include @@ -33,6 +32,7 @@ #include #include +//#include #include #include #include diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 9d049d6fce5..375dbc29d58 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -362,7 +362,7 @@ ConfigureTest(JSON_TEST "${JSON_TEST_SRC}") # - sort tests ------------------------------------------------------------------------------------ set(SORT_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/sort/segmented_sort_tests.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_lists_tests.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_test.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/sort/rank_test.cpp") @@ -657,7 +657,7 @@ set(LISTS_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/lists/contains_tests.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/lists/count_elements_tests.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/lists/extract_tests.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/lists/segmented_sort_tests.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/lists/sort_lists_tests.cpp") ConfigureTest(LISTS_TEST "${LISTS_TEST_SRC}") diff --git a/cpp/tests/lists/segmented_sort_tests.cpp b/cpp/tests/lists/sort_lists_tests.cpp similarity index 95% rename from cpp/tests/lists/segmented_sort_tests.cpp rename to cpp/tests/lists/sort_lists_tests.cpp index 8e4bc0feb64..dbbec65667a 100644 --- a/cpp/tests/lists/segmented_sort_tests.cpp +++ b/cpp/tests/lists/sort_lists_tests.cpp @@ -42,12 +42,12 @@ namespace cudf { namespace test { template -struct SegmentedSortLists : public BaseFixture { +struct SortLists : public BaseFixture { }; -TYPED_TEST_CASE(SegmentedSortLists, NumericTypes); +TYPED_TEST_CASE(SortLists, NumericTypes); -TYPED_TEST(SegmentedSortLists, NoNull) +TYPED_TEST(SortLists, NoNull) { using T = TypeParam; @@ -73,7 +73,7 @@ TYPED_TEST(SegmentedSortLists, NoNull) CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected2); } -TYPED_TEST(SegmentedSortLists, Null) +TYPED_TEST(SortLists, Null) { using T = TypeParam; if (std::is_same::value) return; diff --git a/cpp/tests/sort/segmented_sort_tests.cpp b/cpp/tests/sort/sort_lists_tests.cpp similarity index 100% rename from cpp/tests/sort/segmented_sort_tests.cpp rename to cpp/tests/sort/sort_lists_tests.cpp From 2303fea7c8a39800547ea02c6c9f673349accc54 Mon Sep 17 00:00:00 2001 From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com> Date: Sat, 30 Jan 2021 16:15:19 +0530 Subject: [PATCH 20/32] Apply suggestions from code review (codereport) Co-authored-by: Conor Hoekstra <36027403+codereport@users.noreply.github.com> --- cpp/src/lists/segmented_sort.cu | 15 ++++++++------- cpp/src/sort/segmented_sort.cu | 8 ++++---- cpp/tests/lists/sort_lists_tests.cpp | 3 +-- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/cpp/src/lists/segmented_sort.cu b/cpp/src/lists/segmented_sort.cu index a9f5da2aef7..c5c03b20693 100644 --- a/cpp/src/lists/segmented_sort.cu +++ b/cpp/src/lists/segmented_sort.cu @@ -27,13 +27,15 @@ #include #include -#include -#include #include #include #include #include +#include + +#include + namespace cudf { namespace lists { namespace detail { @@ -151,8 +153,8 @@ struct SortPairs { auto keys = [&]() { if (child.nullable()) { rmm::device_uvector keys(child.size(), stream); - auto null_replace_T = null_precedence == null_order::AFTER ? std::numeric_limits::max() - : std::numeric_limits::min(); + auto const null_replace_T = null_precedence == null_order::AFTER ? std::numeric_limits::max() + : std::numeric_limits::min(); auto device_child = column_device_view::create(child, stream); auto keys_in = cudf::detail::make_null_replacement_iterator(*device_child, null_replace_T); @@ -190,8 +192,7 @@ struct SortPairs { offsets.begin(), offsets.begin() + 1, stream); - std::vector> output_cols; - output_cols.push_back(std::move(output)); + std::vector> output_cols{std::move(output)}; // rearrange the null_mask. cudf::detail::gather_bitmask(cudf::table_view{{child}}, mutable_indices_view.begin(), @@ -209,7 +210,7 @@ std::unique_ptr sort_lists(lists_column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - if (input.size() == 0) return {}; + if (input.is_empty()) return {}; auto output_child = type_dispatcher(input.child().type(), SortPairs{}, diff --git a/cpp/src/sort/segmented_sort.cu b/cpp/src/sort/segmented_sort.cu index d46994e2ecd..fae432fa17f 100644 --- a/cpp/src/sort/segmented_sort.cu +++ b/cpp/src/sort/segmented_sort.cu @@ -24,20 +24,20 @@ #include #include -#include -#include -#include #include #include #include #include -//#include #include #include #include #include +#include +#include +#include + namespace cudf { namespace detail { diff --git a/cpp/tests/lists/sort_lists_tests.cpp b/cpp/tests/lists/sort_lists_tests.cpp index dbbec65667a..d75711bd12c 100644 --- a/cpp/tests/lists/sort_lists_tests.cpp +++ b/cpp/tests/lists/sort_lists_tests.cpp @@ -27,12 +27,11 @@ #include #include #include +#include #include #include -#include - template using LCW = cudf::test::lists_column_wrapper; using cudf::lists_column_view; From 2a8ee210160dde8137c2d7bef3558886aa375538 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Mon, 1 Feb 2021 13:08:37 +0530 Subject: [PATCH 21/32] style fix clang format --- cpp/src/lists/segmented_sort.cu | 5 +++-- cpp/tests/lists/sort_lists_tests.cpp | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/src/lists/segmented_sort.cu b/cpp/src/lists/segmented_sort.cu index c5c03b20693..965d5cc34b5 100644 --- a/cpp/src/lists/segmented_sort.cu +++ b/cpp/src/lists/segmented_sort.cu @@ -153,8 +153,9 @@ struct SortPairs { auto keys = [&]() { if (child.nullable()) { rmm::device_uvector keys(child.size(), stream); - auto const null_replace_T = null_precedence == null_order::AFTER ? std::numeric_limits::max() - : std::numeric_limits::min(); + auto const null_replace_T = null_precedence == null_order::AFTER + ? std::numeric_limits::max() + : std::numeric_limits::min(); auto device_child = column_device_view::create(child, stream); auto keys_in = cudf::detail::make_null_replacement_iterator(*device_child, null_replace_T); diff --git a/cpp/tests/lists/sort_lists_tests.cpp b/cpp/tests/lists/sort_lists_tests.cpp index d75711bd12c..8e5dc7cb0f2 100644 --- a/cpp/tests/lists/sort_lists_tests.cpp +++ b/cpp/tests/lists/sort_lists_tests.cpp @@ -24,10 +24,10 @@ #include #include #include +#include #include #include #include -#include #include #include From 71e048ad697e2f80db469e11521907d63e9f4e81 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Mon, 1 Feb 2021 16:41:39 +0530 Subject: [PATCH 22/32] fix vec constructor --- cpp/src/lists/segmented_sort.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/lists/segmented_sort.cu b/cpp/src/lists/segmented_sort.cu index 965d5cc34b5..be70660a1ad 100644 --- a/cpp/src/lists/segmented_sort.cu +++ b/cpp/src/lists/segmented_sort.cu @@ -193,7 +193,8 @@ struct SortPairs { offsets.begin(), offsets.begin() + 1, stream); - std::vector> output_cols{std::move(output)}; + std::vector> output_cols; + output_cols.push_back(std::move(output)); // rearrange the null_mask. cudf::detail::gather_bitmask(cudf::table_view{{child}}, mutable_indices_view.begin(), From 0a7d5655feecd650be07a410a3df0fa4fe3b6657 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Mon, 1 Feb 2021 16:42:45 +0530 Subject: [PATCH 23/32] add segmented_sort, detail::segmented_sorted_order --- cpp/include/cudf/detail/sorting.hpp | 43 +++++++++++++++++++++++++++++ cpp/include/cudf/sorting.hpp | 30 ++++++++++++++++++++ cpp/src/sort/segmented_sort.cu | 13 +++++++++ 3 files changed, 86 insertions(+) diff --git a/cpp/include/cudf/detail/sorting.hpp b/cpp/include/cudf/detail/sorting.hpp index 2a739860c4d..1118451a01d 100644 --- a/cpp/include/cudf/detail/sorting.hpp +++ b/cpp/include/cudf/detail/sorting.hpp @@ -63,6 +63,49 @@ std::unique_ptr
sort_by_key( rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @copydoc cudf::segmented_sort + * + * @param[in] stream CUDA stream used for device memory operations and kernel launches. + */ +std::unique_ptr
segmented_sort( + table_view const& values, + table_view const& keys, + column_view const& segment_offsets, + std::vector const& column_order = {}, + std::vector const& null_precedence = {}, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Returns sorted order after sorting each segment in the table. + * + * If segment_offsets contains values larger than number of rows, behaviour is undefined. + * @throws cudf::logic_error if `segment_offsets` is not `size_type` column. + * + * @param keys The table that determines the ordering of elements in each segment + * @param segment_offsets The column of `size_type` type containing start offset index for each + * contiguous segment. + * @param column_order The desired order for each column in `keys`. Size must be + * equal to `keys.num_columns()` or empty. If empty, all columns are sorted in + * ascending order. + * @param null_precedence The desired order of a null element compared to other + * elements for each column in `keys`. Size must be equal to + * `keys.num_columns()` or empty. If empty, all columns will be sorted with + * `null_order::BEFORE`. + * @param[in] stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource to allocate any returned objects + * @return sorted order of the segment sorted table . + * + */ +std::unique_ptr segmented_sorted_order( + table_view const& keys, + column_view const& segment_offsets, + std::vector const& column_order = {}, + std::vector const& null_precedence = {}, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @copydoc cudf::sort_lists * diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp index 9a2cbf74102..a0f9dd0a223 100644 --- a/cpp/include/cudf/sorting.hpp +++ b/cpp/include/cudf/sorting.hpp @@ -184,6 +184,36 @@ std::unique_ptr rank( bool percentage, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Performs a lexicographic segmented sort of a table + * + * If segment_offsets contains values larger than number of rows, behaviour is undefined. + * @throws cudf::logic_error if `values.num_rows() != keys.num_rows()`. + * @throws cudf::logic_error if `segment_offsets` is not `size_type` column. + * + * @param values The table to reorder + * @param keys The table that determines the ordering of elements in each segment + * @param segment_offsets The column of `size_type` type containing start offset index for each + * contiguous segment. + * @param column_order The desired order for each column in `keys`. Size must be + * equal to `keys.num_columns()` or empty. If empty, all columns are sorted in + * ascending order. + * @param null_precedence The desired order of a null element compared to other + * elements for each column in `keys`. Size must be equal to + * `keys.num_columns()` or empty. If empty, all columns will be sorted with + * `null_order::BEFORE`. + * @param mr Device memory resource to allocate any returned objects + * @return table with elements in each segment sorted. + * + */ +std::unique_ptr
segmented_sort( + table_view const& values, + table_view const& keys, + column_view const& segment_offsets, + std::vector const& column_order = {}, + std::vector const& null_precedence = {}, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Performs a lexicographic segmented sort of the list in each row of a table of list columns * diff --git a/cpp/src/sort/segmented_sort.cu b/cpp/src/sort/segmented_sort.cu index fae432fa17f..b6f8b2d14c4 100644 --- a/cpp/src/sort/segmented_sort.cu +++ b/cpp/src/sort/segmented_sort.cu @@ -104,6 +104,8 @@ std::unique_ptr segmented_sorted_order(table_view const& keys, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + CUDF_EXPECTS(segment_offsets.type() == data_type(type_to_id()), + "segment offsets should be size_type"); // Get segment id of each element in all segments. auto segment_ids = get_segment_indices(keys.num_rows(), segment_offsets, stream); @@ -214,6 +216,17 @@ std::unique_ptr
sort_lists(table_view const& values, } } // namespace detail +std::unique_ptr
segmented_sort(table_view const& values, + table_view const& keys, + column_view const& segment_offsets, + std::vector const& column_order, + std::vector const& null_precedence, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::segmented_sort( + values, keys, segment_offsets, column_order, null_precedence, rmm::cuda_stream_default, mr); +} std::unique_ptr
sort_lists(table_view const& values, table_view const& keys, std::vector const& column_order, From d3cadc3f770beec7508b4468389a15068113a5d6 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Mon, 1 Feb 2021 17:00:18 +0530 Subject: [PATCH 24/32] add segmented sort tests --- cpp/tests/CMakeLists.txt | 1 + cpp/tests/sort/segmented_sort_tests.cpp | 157 ++++++++++++++++++++++++ cpp/tests/sort/sort_lists_tests.cpp | 2 - 3 files changed, 158 insertions(+), 2 deletions(-) create mode 100644 cpp/tests/sort/segmented_sort_tests.cpp diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 375dbc29d58..508e9803877 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -362,6 +362,7 @@ ConfigureTest(JSON_TEST "${JSON_TEST_SRC}") # - sort tests ------------------------------------------------------------------------------------ set(SORT_TEST_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/sort/segmented_sort_tests.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_lists_tests.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_test.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/sort/rank_test.cpp") diff --git a/cpp/tests/sort/segmented_sort_tests.cpp b/cpp/tests/sort/segmented_sort_tests.cpp new file mode 100644 index 00000000000..2b90eb21ea5 --- /dev/null +++ b/cpp/tests/sort/segmented_sort_tests.cpp @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +template +using column_wrapper = cudf::test::fixed_width_column_wrapper; +using LCWstr = cudf::test::lists_column_wrapper; + +namespace cudf { +namespace test { + +template +struct SegmentedSort : public BaseFixture { +}; + +TYPED_TEST_CASE(SegmentedSort, NumericTypes); +using SegmentedSortInt = SegmentedSort; + +/* +normal case +{8, 9, 2, 3, 2, 2, 4, 1, 7, 5, 6} +{0, 2, 5, 8 11} + without null + with null +empty case + key{}, + value{}, + segment_offset{} +single case + keys{1}, value{1} + segmented_offset{0}, {0, 1} +corner case + sliced table, + sliced segment_offsets + non-zero start of segment_offsets without offset + non-zero start of segment_offsets with offset +mismatch sizes + keys, values num_rows + order, null_order + segmented_offsets beyond num_rows +//*/ + +TYPED_TEST(SegmentedSort, NoNull) +{ + using T = TypeParam; + + // segments {0 1 2} {3 4} {5} {6 7 8 9 10}{11 12}{13}{14 15} + column_wrapper col1{{10, 36, 14, 32, 49, 23, 10, 34, 12, 45, 12, 37, 43, 26, 21, 16}}; + column_wrapper col2{{10, 63, 41, 23, 94, 32, 10, 43, 21, 54, 22, 73, 34, 62, 12, 61}}; + // segment sorted order {0 2 1} {3 4} {5} {6 8 10 7 9}{11 12}{13}{15 16} + column_wrapper segments{0, 3, 5, 5, 5, 6, 11, 13, 14, 16}; + table_view input1{{col1}}; + table_view input2{{col1, col2}}; + + // Ascending + column_wrapper col1_asc{{10, 14, 36, 32, 49, 23, 10, 12, 12, 34, 45, 37, 43, 26, 16, 21}}; + + auto results = cudf::segmented_sort(input1, input1, segments, {order::ASCENDING}); + CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), table_view{{col1_asc}}); + + column_wrapper col1_des{{36, 14, 10, 49, 32, 23, 45, 34, 12, 12, 10, 43, 37, 26, 21, 16}}; + results = cudf::segmented_sort(input1, input1, segments, {order::DESCENDING}); + CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), table_view{{col1_des}}); + + column_wrapper col1_12_asc{{10, 14, 36, 32, 49, 23, 10, 12, 12, 34, 45, 37, 43, 26, 16, 21}}; + column_wrapper col2_12_asc{{10, 41, 63, 23, 94, 32, 10, 21, 22, 43, 54, 73, 34, 62, 61, 12}}; + column_wrapper col2_12_des{{10, 41, 63, 23, 94, 32, 10, 22, 21, 43, 54, 73, 34, 62, 61, 12}}; + + table_view expected12_aa{{col1_12_asc, col2_12_asc}}; + results = cudf::segmented_sort(input2, input2, segments, {}); + CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected12_aa); + + table_view expected12_ad{{col1_12_asc, col2_12_des}}; + results = cudf::segmented_sort(input2, input2, segments, {order::ASCENDING, order::DESCENDING}); + CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected12_ad); +} + +TYPED_TEST(SegmentedSort, Null) +{ + using T = TypeParam; + if (std::is_same::value) return; + + // segments {0 1 2} {3 4} {5} {6 7 8 9 10}{11 12}{13}{14 15} + column_wrapper col1{{1, 3, 2, 4, 5, 23, 6, 8, 7, 9, 7, 37, 43, 26, 21, 16}, + {1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1}}; + column_wrapper col2{{0, 0, 0, 1, 1, 4, 5, 5, 21, 5, 22, 6, 6, 7, 8, 8}, + {1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1}}; + column_wrapper segments{0, 3, 5, 5, 5, 6, 11, 13, 14, 16}; + table_view input1{{col1}}; + table_view input2{{col1, col2}}; + + // Ascending + column_wrapper col1_aa{{1, 3, 2, 4, 5, 23, 6, 7, 7, 8, 9, 37, 43, 26, 16, 21}, + {1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1}}; + column_wrapper col1_ab{{2, 1, 3, 4, 5, 23, 9, 6, 7, 7, 8, 37, 43, 26, 16, 21}, + {0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1}}; + + auto results = cudf::segmented_sort(input1, input1, segments, {}, {null_order::AFTER}); + CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), table_view{{col1_aa}}); + results = cudf::segmented_sort(input1, input1, segments, {}, {null_order::BEFORE}); + CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), table_view{{col1_ab}}); + + // Descending + column_wrapper col1_da{{2, 3, 1, 5, 4, 23, 9, 8, 7, 7, 6, 43, 37, 26, 21, 16}, + {0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1}}; + column_wrapper col1_db{{3, 1, 2, 5, 4, 23, 8, 7, 7, 6, 9, 43, 37, 26, 21, 16}, + {1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1}}; + results = + cudf::segmented_sort(input1, input1, segments, {order::DESCENDING}, {null_order::AFTER}); + CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), table_view{{col1_da}}); + results = + cudf::segmented_sort(input1, input1, segments, {order::DESCENDING}, {null_order::BEFORE}); + CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), table_view{{col1_db}}); + + // second row null order. + column_wrapper col2_12_aa{{0, 0, 0, 1, 1, 4, 5, 22, 21, 5, 5, 6, 6, 7, 8, 8}, + {1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1}}; + column_wrapper col2_12_ab{{0, 0, 0, 1, 1, 4, 5, 5, 21, 22, 5, 6, 6, 7, 8, 8}, + {1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1}}; + table_view expected12_aa{{col1_aa, col2_12_aa}}; + table_view expected12_ab{{col1_ab, col2_12_ab}}; + results = + cudf::segmented_sort(input2, input2, segments, {}, {null_order::AFTER, null_order::AFTER}); + CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected12_aa); + results = + cudf::segmented_sort(input2, input2, segments, {}, {null_order::BEFORE, null_order::BEFORE}); + CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected12_ab); +} + +} // namespace test +} // namespace cudf diff --git a/cpp/tests/sort/sort_lists_tests.cpp b/cpp/tests/sort/sort_lists_tests.cpp index c44a74e1cc2..5697fb12003 100644 --- a/cpp/tests/sort/sort_lists_tests.cpp +++ b/cpp/tests/sort/sort_lists_tests.cpp @@ -24,8 +24,6 @@ #include #include #include -#include -#include #include #include From 8f2601173acfc3348a5138b5367d9d6df1e399c5 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Tue, 2 Feb 2021 01:27:28 +0530 Subject: [PATCH 25/32] segmented_sort unit tests --- cpp/src/sort/segmented_sort.cu | 7 +- cpp/tests/sort/segmented_sort_tests.cpp | 133 ++++++++++++++++++++++-- cpp/tests/sort/sort_lists_tests.cpp | 3 - 3 files changed, 128 insertions(+), 15 deletions(-) diff --git a/cpp/src/sort/segmented_sort.cu b/cpp/src/sort/segmented_sort.cu index b6f8b2d14c4..47ee5290868 100644 --- a/cpp/src/sort/segmented_sort.cu +++ b/cpp/src/sort/segmented_sort.cu @@ -42,6 +42,7 @@ namespace cudf { namespace detail { // returns segment indices for each element for all segments. +// first segment begin index = 0, last segment end index = num_rows. rmm::device_uvector get_segment_indices(size_type num_rows, column_view const& offsets, rmm::cuda_stream_view stream) @@ -50,10 +51,10 @@ rmm::device_uvector get_segment_indices(size_type num_rows, auto offset_begin = offsets.begin(); // assumes already offset column contains offset. auto offsets_minus_one = thrust::make_transform_iterator( - offset_begin, [offset_begin] __device__(auto i) { return i - offset_begin[0] - 1; }); + offset_begin, [offset_begin] __device__(auto i) { return i - 1; }); auto counting_iter = thrust::make_counting_iterator(0); thrust::lower_bound(rmm::exec_policy(stream), - offsets_minus_one + 1, + offsets_minus_one, offsets_minus_one + offsets.size(), counting_iter, counting_iter + segment_ids.size(), @@ -136,6 +137,8 @@ std::unique_ptr
segmented_sort(table_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + CUDF_EXPECTS(values.num_rows() == keys.num_rows(), + "Mismatch in number of rows for values and keys"); auto sorted_order = segmented_sorted_order(keys, segment_offsets, column_order, diff --git a/cpp/tests/sort/segmented_sort_tests.cpp b/cpp/tests/sort/segmented_sort_tests.cpp index 2b90eb21ea5..f25bc84ce9d 100644 --- a/cpp/tests/sort/segmented_sort_tests.cpp +++ b/cpp/tests/sort/segmented_sort_tests.cpp @@ -20,16 +20,15 @@ #include #include -#include -#include +#include +#include #include -#include #include #include template -using column_wrapper = cudf::test::fixed_width_column_wrapper; +using column_wrapper = cudf::test::fixed_width_column_wrapper; using LCWstr = cudf::test::lists_column_wrapper; namespace cudf { @@ -42,12 +41,7 @@ struct SegmentedSort : public BaseFixture { TYPED_TEST_CASE(SegmentedSort, NumericTypes); using SegmentedSortInt = SegmentedSort; -/* -normal case -{8, 9, 2, 3, 2, 2, 4, 1, 7, 5, 6} -{0, 2, 5, 8 11} - without null - with null +/* Summary of test cases. empty case key{}, value{}, @@ -55,6 +49,11 @@ empty case single case keys{1}, value{1} segmented_offset{0}, {0, 1} +normal case +{8, 9, 2, 3, 2, 2, 4, 1, 7, 5, 6} +{0, 2, 5, 8 11} + without null + with null corner case sliced table, sliced segment_offsets @@ -65,6 +64,46 @@ mismatch sizes order, null_order segmented_offsets beyond num_rows //*/ +TEST_F(SegmentedSortInt, Empty) +{ + using T = int; + column_wrapper col_empty{}; + // clang-format off + column_wrapper col1{{8, 9, 2, 3, 2, 2, 4, 1, 7, 5, 6}}; + column_wrapper segments{{0, 2, 5, 8, 11}}; + // clang-format on + table_view table_empty{{col_empty}}; + table_view table_valid{{col1}}; + + CUDF_EXPECT_NO_THROW(cudf::segmented_sort(table_valid, table_valid, segments)); + CUDF_EXPECT_NO_THROW(cudf::segmented_sort(table_valid, table_valid, col_empty)); + CUDF_EXPECT_NO_THROW(cudf::segmented_sort(table_empty, table_empty, segments)); + CUDF_EXPECT_NO_THROW(cudf::segmented_sort(table_empty, table_empty, col_empty)); + + CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort(table_empty, table_valid, segments), + "Mismatch in number of rows for values and keys"); + CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort(table_empty, table_valid, col_empty), + "Mismatch in number of rows for values and keys"); + CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort(table_valid, table_empty, segments), + "Mismatch in number of rows for values and keys"); + CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort(table_valid, table_empty, col_empty), + "Mismatch in number of rows for values and keys"); +} + +TEST_F(SegmentedSortInt, Single) +{ + using T = int; + column_wrapper col1{{1}}; + column_wrapper col3{{8, 9, 2}}; + column_wrapper segments1{{0}}; + column_wrapper segments2{{0, 3}}; + table_view table_1elem{{col1}}; + table_view table_1segm{{col3}}; + CUDF_EXPECT_NO_THROW(cudf::segmented_sort(table_1elem, table_1elem, segments2)); + CUDF_EXPECT_NO_THROW(cudf::segmented_sort(table_1elem, table_1elem, segments1)); + CUDF_EXPECT_NO_THROW(cudf::segmented_sort(table_1segm, table_1segm, segments2)); + CUDF_EXPECT_NO_THROW(cudf::segmented_sort(table_1segm, table_1segm, segments1)); +} TYPED_TEST(SegmentedSort, NoNull) { @@ -153,5 +192,79 @@ TYPED_TEST(SegmentedSort, Null) CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected12_ab); } +TEST_F(SegmentedSortInt, NonZeroSegmentsStart) +{ + using T = int; + // clang-format off + column_wrapper col1{{8, 9, 2, 3, 2, 2, 4, 1, 7, 5, 6}}; + column_wrapper segments1{{0, 2, 5, 8, 11}}; + column_wrapper segments2{{ 2, 5, 8, 11}}; + column_wrapper segments3{{ 6, 8, 11}}; + column_wrapper expected1{{0, 1, 2, 4, 3, 7, 5, 6, 9, 10, 8}}; + column_wrapper expected2{{0, 1, 2, 4, 3, 7, 5, 6, 9, 10, 8}}; + column_wrapper expected3{{2, 4, 5, 3, 0, 1, 7, 6, 9, 10, 8}}; + // clang-format on + table_view input{{col1}}; + auto results = cudf::detail::segmented_sorted_order(input, segments1); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected1); + results = cudf::detail::segmented_sorted_order(input, segments2); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected2); + results = cudf::detail::segmented_sorted_order(input, segments3); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected3); +} + +TEST_F(SegmentedSortInt, Sliced) +{ + using T = int; + // clang-format off + column_wrapper col1{{8, 9, 2, 3, 2, 2, 4, 1, 7, 5, 6}}; + // sliced 2, 2, 4, 1, 7, 5, 6 + column_wrapper segments1{{0, 2, 5}}; + column_wrapper segments2{{-4, 0, 2, 5}}; + column_wrapper segments3{{ 7}}; + column_wrapper expected1{{0, 1, 3, 2, 4, 5, 6}}; + column_wrapper expected2{{0, 1, 3, 2, 4, 5, 6}}; + column_wrapper expected3{{3, 0, 1, 2, 5, 6, 4}}; + // clang-format on + auto slice = cudf::slice(col1, {4, 11})[0]; // 7 elements + table_view input{{slice}}; + auto seg_slice = cudf::slice(segments2, {2, 4})[0]; // 2 elements + + // sliced input + auto results = cudf::detail::segmented_sorted_order(input, segments1); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected1); + // sliced input and sliced segment + results = cudf::detail::segmented_sorted_order(input, seg_slice); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected2); + // sliced input, segment end. + results = cudf::detail::segmented_sorted_order(input, segments3); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected3); +} + +TEST_F(SegmentedSortInt, ErrorsMismatchArgSizes) +{ + using T = int; + column_wrapper col1{{1, 2, 3, 4}}; + column_wrapper col2{{5, 6, 7, 8, 9}}; + table_view input1{{col1}}; + + // Mismatch order sizes + EXPECT_THROW(cudf::segmented_sort(input1, input1, col2, {order::ASCENDING, order::ASCENDING}, {}), + logic_error); + // Mismatch null precedence sizes + EXPECT_THROW( + cudf::segmented_sort(input1, input1, col2, {}, {null_order::AFTER, null_order::AFTER}), + logic_error); + // Both + EXPECT_THROW(cudf::segmented_sort(input1, + input1, + col2, + {order::ASCENDING, order::ASCENDING}, + {null_order::AFTER, null_order::AFTER}), + logic_error); + // segmented_offsets beyond num_rows - undefined behaviour, no throw. + CUDF_EXPECT_NO_THROW(cudf::segmented_sort(input1, input1, col2)); +} + } // namespace test } // namespace cudf diff --git a/cpp/tests/sort/sort_lists_tests.cpp b/cpp/tests/sort/sort_lists_tests.cpp index 5697fb12003..6280d03884c 100644 --- a/cpp/tests/sort/sort_lists_tests.cpp +++ b/cpp/tests/sort/sort_lists_tests.cpp @@ -20,10 +20,7 @@ #include #include -#include -#include #include -#include #include #include From 6cb960b505e5235e08c035c9d7f23be55ce7e4dc Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Tue, 2 Feb 2021 03:25:07 +0530 Subject: [PATCH 26/32] move list_size function to list_device_view.cuh --- cpp/include/cudf/lists/list_device_view.cuh | 15 +++++++++++++++ cpp/src/lists/count_elements.cu | 9 ++------- cpp/src/sort/segmented_sort.cu | 13 +------------ 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/cpp/include/cudf/lists/list_device_view.cuh b/cpp/include/cudf/lists/list_device_view.cuh index 824b10ced83..adb2e6d8189 100644 --- a/cpp/include/cudf/lists/list_device_view.cuh +++ b/cpp/include/cudf/lists/list_device_view.cuh @@ -190,4 +190,19 @@ class list_device_view { }; }; +/** + * @brief returns size of the list by row index + * + */ +struct list_size_functor { + column_device_view const d_column; + CUDA_DEVICE_CALLABLE size_type operator()(size_type idx) + { + if (d_column.is_null(idx)) return size_type{0}; + auto d_offsets = + d_column.child(lists_column_view::offsets_column_index).data() + d_column.offset(); + return d_offsets[idx + 1] - d_offsets[idx]; + } +}; + } // namespace cudf diff --git a/cpp/src/lists/count_elements.cu b/cpp/src/lists/count_elements.cu index 78549152770..ba366b3a020 100644 --- a/cpp/src/lists/count_elements.cu +++ b/cpp/src/lists/count_elements.cu @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -60,13 +61,7 @@ std::unique_ptr count_elements(lists_column_view const& input, thrust::make_counting_iterator(0), thrust::make_counting_iterator(input.size()), output->mutable_view().begin(), - [d_column] __device__(size_type idx) { - if (d_column.is_null(idx)) return size_type{0}; - auto d_offsets = - d_column.child(lists_column_view::offsets_column_index).data() + - d_column.offset(); - return d_offsets[idx + 1] - d_offsets[idx]; - }); + list_size_functor{d_column}); output->set_null_count(input.null_count()); // reset null count return output; diff --git a/cpp/src/sort/segmented_sort.cu b/cpp/src/sort/segmented_sort.cu index 47ee5290868..5169348d800 100644 --- a/cpp/src/sort/segmented_sort.cu +++ b/cpp/src/sort/segmented_sort.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -62,18 +63,6 @@ rmm::device_uvector get_segment_indices(size_type num_rows, return std::move(segment_ids); } -// same as count_elements(list). TODO: DRY. -struct list_size_functor { - column_device_view const& d_column; - __device__ size_type operator()(size_type idx) - { - if (d_column.is_null(idx)) return size_type{0}; - auto d_offsets = - d_column.child(lists_column_view::offsets_column_index).data() + d_column.offset(); - return d_offsets[idx + 1] - d_offsets[idx]; - } -}; - void validate_list_columns(table_view const& keys, rmm::cuda_stream_view stream) { // check if all are list columns From f6c058a71bca859ac7c68b31b37764519a4d6e12 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Tue, 2 Feb 2021 04:34:07 +0530 Subject: [PATCH 27/32] Apply suggestions from code review (jake) --- cpp/include/cudf/detail/sorting.hpp | 4 +- cpp/include/cudf/lists/list_device_view.cuh | 8 +++ cpp/include/cudf/sorting.hpp | 6 +- cpp/src/lists/segmented_sort.cu | 3 +- cpp/src/sort/segmented_sort.cu | 40 ++++++------ cpp/tests/sort/segmented_sort_tests.cpp | 70 +++++++++++---------- 6 files changed, 69 insertions(+), 62 deletions(-) diff --git a/cpp/include/cudf/detail/sorting.hpp b/cpp/include/cudf/detail/sorting.hpp index 1118451a01d..da185b296b2 100644 --- a/cpp/include/cudf/detail/sorting.hpp +++ b/cpp/include/cudf/detail/sorting.hpp @@ -64,11 +64,11 @@ std::unique_ptr
sort_by_key( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @copydoc cudf::segmented_sort + * @copydoc cudf::segmented_sort_by_key * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr
segmented_sort( +std::unique_ptr
segmented_sort_by_key( table_view const& values, table_view const& keys, column_view const& segment_offsets, diff --git a/cpp/include/cudf/lists/list_device_view.cuh b/cpp/include/cudf/lists/list_device_view.cuh index adb2e6d8189..b5752d7b401 100644 --- a/cpp/include/cudf/lists/list_device_view.cuh +++ b/cpp/include/cudf/lists/list_device_view.cuh @@ -196,6 +196,14 @@ class list_device_view { */ struct list_size_functor { column_device_view const d_column; + CUDA_HOST_DEVICE_CALLABLE list_size_functor(column_device_view const& d_col) : d_column(d_col) + { +#if defined(__CUDA_ARCH__) + release_assert(d_col.type().id() == type_id::LIST && "Only list type column is supported"); +#else + CUDF_EXPECTS(d_col.type().id() == type_id::LIST, "Only list type column is supported"); +#endif + } CUDA_DEVICE_CALLABLE size_type operator()(size_type idx) { if (d_column.is_null(idx)) return size_type{0}; diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp index a0f9dd0a223..a9627363afb 100644 --- a/cpp/include/cudf/sorting.hpp +++ b/cpp/include/cudf/sorting.hpp @@ -206,7 +206,7 @@ std::unique_ptr rank( * @return table with elements in each segment sorted. * */ -std::unique_ptr
segmented_sort( +std::unique_ptr
segmented_sort_by_key( table_view const& values, table_view const& keys, column_view const& segment_offsets, @@ -215,9 +215,9 @@ std::unique_ptr
segmented_sort( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Performs a lexicographic segmented sort of the list in each row of a table of list columns + * @brief Performs a lexicographic sort of lists in each row of a table. * - * `keys` with list columns of depth 1 is only supported. + * `keys` and `values` with list columns of depth 1 is only supported. * @throws cudf::logic_error if `values.num_rows() != keys.num_rows()`. * @throws cudf::logic_error if any list sizes of corresponding row in each column are not equal. * @throws cudf::logic_error if any column of `keys` or `values` is not a list column. diff --git a/cpp/src/lists/segmented_sort.cu b/cpp/src/lists/segmented_sort.cu index be70660a1ad..e62c7623365 100644 --- a/cpp/src/lists/segmented_sort.cu +++ b/cpp/src/lists/segmented_sort.cu @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -212,7 +213,7 @@ std::unique_ptr sort_lists(lists_column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - if (input.is_empty()) return {}; + if (input.is_empty()) return empty_like(input.parent()); auto output_child = type_dispatcher(input.child().type(), SortPairs{}, diff --git a/cpp/src/sort/segmented_sort.cu b/cpp/src/sort/segmented_sort.cu index 5169348d800..2095a9cd3aa 100644 --- a/cpp/src/sort/segmented_sort.cu +++ b/cpp/src/sort/segmented_sort.cu @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -69,7 +70,7 @@ void validate_list_columns(table_view const& keys, rmm::cuda_stream_view stream) CUDF_EXPECTS(std::all_of(keys.begin(), keys.end(), [](column_view const& col) { return col.type().id() == type_id::LIST; }), - "segmented_sort only supports lists columns"); + "segmented_sort_by_key only supports lists columns"); // check if all list sizes are equal. auto table_device = table_device_view::create(keys, stream); auto counting_iter = thrust::make_counting_iterator(0); @@ -118,13 +119,13 @@ std::unique_ptr segmented_sorted_order(table_view const& keys, return detail::sorted_order(segid_keys, child_column_order, child_null_precedence, stream, mr); } -std::unique_ptr
segmented_sort(table_view const& values, - table_view const& keys, - column_view const& segment_offsets, - std::vector const& column_order, - std::vector const& null_precedence, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::unique_ptr
segmented_sort_by_key(table_view const& values, + table_view const& keys, + column_view const& segment_offsets, + std::vector const& column_order, + std::vector const& null_precedence, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(values.num_rows() == keys.num_rows(), "Mismatch in number of rows for values and keys"); @@ -167,12 +168,7 @@ std::unique_ptr
sort_lists(table_view const& values, // segment offsets from first list column auto lc = lists_column_view{keys.column(0)}; auto offset = lc.offsets(); - auto segment_offsets = column_view(offset.type(), - offset.size(), - offset.head(), - offset.null_mask(), - offset.null_count(), - offset.offset() + lc.offset()); + auto segment_offsets = cudf::detail::slice(offset, {lc.offset(), offset.size()}, stream)[0]; // child columns of values auto child_value_columns = thrust::make_transform_iterator( values.begin(), [stream](auto col) { return lists_column_view(col).get_sliced_child(stream); }); @@ -181,7 +177,7 @@ std::unique_ptr
sort_lists(table_view const& values, // Get segment sorted child columns of list columns auto child_result = - segmented_sort( + segmented_sort_by_key( child_values, child_keys, segment_offsets, column_order, null_precedence, stream, mr) ->release(); @@ -208,15 +204,15 @@ std::unique_ptr
sort_lists(table_view const& values, } } // namespace detail -std::unique_ptr
segmented_sort(table_view const& values, - table_view const& keys, - column_view const& segment_offsets, - std::vector const& column_order, - std::vector const& null_precedence, - rmm::mr::device_memory_resource* mr) +std::unique_ptr
segmented_sort_by_key(table_view const& values, + table_view const& keys, + column_view const& segment_offsets, + std::vector const& column_order, + std::vector const& null_precedence, + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::segmented_sort( + return detail::segmented_sort_by_key( values, keys, segment_offsets, column_order, null_precedence, rmm::cuda_stream_default, mr); } std::unique_ptr
sort_lists(table_view const& values, diff --git a/cpp/tests/sort/segmented_sort_tests.cpp b/cpp/tests/sort/segmented_sort_tests.cpp index f25bc84ce9d..e907212c9e8 100644 --- a/cpp/tests/sort/segmented_sort_tests.cpp +++ b/cpp/tests/sort/segmented_sort_tests.cpp @@ -75,18 +75,18 @@ TEST_F(SegmentedSortInt, Empty) table_view table_empty{{col_empty}}; table_view table_valid{{col1}}; - CUDF_EXPECT_NO_THROW(cudf::segmented_sort(table_valid, table_valid, segments)); - CUDF_EXPECT_NO_THROW(cudf::segmented_sort(table_valid, table_valid, col_empty)); - CUDF_EXPECT_NO_THROW(cudf::segmented_sort(table_empty, table_empty, segments)); - CUDF_EXPECT_NO_THROW(cudf::segmented_sort(table_empty, table_empty, col_empty)); + CUDF_EXPECT_NO_THROW(cudf::segmented_sort_by_key(table_valid, table_valid, segments)); + CUDF_EXPECT_NO_THROW(cudf::segmented_sort_by_key(table_valid, table_valid, col_empty)); + CUDF_EXPECT_NO_THROW(cudf::segmented_sort_by_key(table_empty, table_empty, segments)); + CUDF_EXPECT_NO_THROW(cudf::segmented_sort_by_key(table_empty, table_empty, col_empty)); - CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort(table_empty, table_valid, segments), + CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort_by_key(table_empty, table_valid, segments), "Mismatch in number of rows for values and keys"); - CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort(table_empty, table_valid, col_empty), + CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort_by_key(table_empty, table_valid, col_empty), "Mismatch in number of rows for values and keys"); - CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort(table_valid, table_empty, segments), + CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort_by_key(table_valid, table_empty, segments), "Mismatch in number of rows for values and keys"); - CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort(table_valid, table_empty, col_empty), + CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort_by_key(table_valid, table_empty, col_empty), "Mismatch in number of rows for values and keys"); } @@ -99,10 +99,10 @@ TEST_F(SegmentedSortInt, Single) column_wrapper segments2{{0, 3}}; table_view table_1elem{{col1}}; table_view table_1segm{{col3}}; - CUDF_EXPECT_NO_THROW(cudf::segmented_sort(table_1elem, table_1elem, segments2)); - CUDF_EXPECT_NO_THROW(cudf::segmented_sort(table_1elem, table_1elem, segments1)); - CUDF_EXPECT_NO_THROW(cudf::segmented_sort(table_1segm, table_1segm, segments2)); - CUDF_EXPECT_NO_THROW(cudf::segmented_sort(table_1segm, table_1segm, segments1)); + CUDF_EXPECT_NO_THROW(cudf::segmented_sort_by_key(table_1elem, table_1elem, segments2)); + CUDF_EXPECT_NO_THROW(cudf::segmented_sort_by_key(table_1elem, table_1elem, segments1)); + CUDF_EXPECT_NO_THROW(cudf::segmented_sort_by_key(table_1segm, table_1segm, segments2)); + CUDF_EXPECT_NO_THROW(cudf::segmented_sort_by_key(table_1segm, table_1segm, segments1)); } TYPED_TEST(SegmentedSort, NoNull) @@ -120,11 +120,11 @@ TYPED_TEST(SegmentedSort, NoNull) // Ascending column_wrapper col1_asc{{10, 14, 36, 32, 49, 23, 10, 12, 12, 34, 45, 37, 43, 26, 16, 21}}; - auto results = cudf::segmented_sort(input1, input1, segments, {order::ASCENDING}); + auto results = cudf::segmented_sort_by_key(input1, input1, segments, {order::ASCENDING}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), table_view{{col1_asc}}); column_wrapper col1_des{{36, 14, 10, 49, 32, 23, 45, 34, 12, 12, 10, 43, 37, 26, 21, 16}}; - results = cudf::segmented_sort(input1, input1, segments, {order::DESCENDING}); + results = cudf::segmented_sort_by_key(input1, input1, segments, {order::DESCENDING}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), table_view{{col1_des}}); column_wrapper col1_12_asc{{10, 14, 36, 32, 49, 23, 10, 12, 12, 34, 45, 37, 43, 26, 16, 21}}; @@ -132,11 +132,12 @@ TYPED_TEST(SegmentedSort, NoNull) column_wrapper col2_12_des{{10, 41, 63, 23, 94, 32, 10, 22, 21, 43, 54, 73, 34, 62, 61, 12}}; table_view expected12_aa{{col1_12_asc, col2_12_asc}}; - results = cudf::segmented_sort(input2, input2, segments, {}); + results = cudf::segmented_sort_by_key(input2, input2, segments, {}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected12_aa); table_view expected12_ad{{col1_12_asc, col2_12_des}}; - results = cudf::segmented_sort(input2, input2, segments, {order::ASCENDING, order::DESCENDING}); + results = + cudf::segmented_sort_by_key(input2, input2, segments, {order::ASCENDING, order::DESCENDING}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected12_ad); } @@ -160,9 +161,9 @@ TYPED_TEST(SegmentedSort, Null) column_wrapper col1_ab{{2, 1, 3, 4, 5, 23, 9, 6, 7, 7, 8, 37, 43, 26, 16, 21}, {0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1}}; - auto results = cudf::segmented_sort(input1, input1, segments, {}, {null_order::AFTER}); + auto results = cudf::segmented_sort_by_key(input1, input1, segments, {}, {null_order::AFTER}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), table_view{{col1_aa}}); - results = cudf::segmented_sort(input1, input1, segments, {}, {null_order::BEFORE}); + results = cudf::segmented_sort_by_key(input1, input1, segments, {}, {null_order::BEFORE}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), table_view{{col1_ab}}); // Descending @@ -171,10 +172,10 @@ TYPED_TEST(SegmentedSort, Null) column_wrapper col1_db{{3, 1, 2, 5, 4, 23, 8, 7, 7, 6, 9, 43, 37, 26, 21, 16}, {1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1}}; results = - cudf::segmented_sort(input1, input1, segments, {order::DESCENDING}, {null_order::AFTER}); + cudf::segmented_sort_by_key(input1, input1, segments, {order::DESCENDING}, {null_order::AFTER}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), table_view{{col1_da}}); - results = - cudf::segmented_sort(input1, input1, segments, {order::DESCENDING}, {null_order::BEFORE}); + results = cudf::segmented_sort_by_key( + input1, input1, segments, {order::DESCENDING}, {null_order::BEFORE}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), table_view{{col1_db}}); // second row null order. @@ -184,11 +185,11 @@ TYPED_TEST(SegmentedSort, Null) {1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1}}; table_view expected12_aa{{col1_aa, col2_12_aa}}; table_view expected12_ab{{col1_ab, col2_12_ab}}; - results = - cudf::segmented_sort(input2, input2, segments, {}, {null_order::AFTER, null_order::AFTER}); + results = cudf::segmented_sort_by_key( + input2, input2, segments, {}, {null_order::AFTER, null_order::AFTER}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected12_aa); - results = - cudf::segmented_sort(input2, input2, segments, {}, {null_order::BEFORE, null_order::BEFORE}); + results = cudf::segmented_sort_by_key( + input2, input2, segments, {}, {null_order::BEFORE, null_order::BEFORE}); CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected12_ab); } @@ -249,21 +250,22 @@ TEST_F(SegmentedSortInt, ErrorsMismatchArgSizes) table_view input1{{col1}}; // Mismatch order sizes - EXPECT_THROW(cudf::segmented_sort(input1, input1, col2, {order::ASCENDING, order::ASCENDING}, {}), - logic_error); + EXPECT_THROW( + cudf::segmented_sort_by_key(input1, input1, col2, {order::ASCENDING, order::ASCENDING}, {}), + logic_error); // Mismatch null precedence sizes EXPECT_THROW( - cudf::segmented_sort(input1, input1, col2, {}, {null_order::AFTER, null_order::AFTER}), + cudf::segmented_sort_by_key(input1, input1, col2, {}, {null_order::AFTER, null_order::AFTER}), logic_error); // Both - EXPECT_THROW(cudf::segmented_sort(input1, - input1, - col2, - {order::ASCENDING, order::ASCENDING}, - {null_order::AFTER, null_order::AFTER}), + EXPECT_THROW(cudf::segmented_sort_by_key(input1, + input1, + col2, + {order::ASCENDING, order::ASCENDING}, + {null_order::AFTER, null_order::AFTER}), logic_error); // segmented_offsets beyond num_rows - undefined behaviour, no throw. - CUDF_EXPECT_NO_THROW(cudf::segmented_sort(input1, input1, col2)); + CUDF_EXPECT_NO_THROW(cudf::segmented_sort_by_key(input1, input1, col2)); } } // namespace test From df4913847f68e5337c4f3862393d45d13db70b35 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Tue, 2 Feb 2021 20:45:20 +0530 Subject: [PATCH 28/32] address review comments (codereport) --- cpp/src/sort/segmented_sort.cu | 43 +++++++++++++++++------------ cpp/tests/sort/sort_lists_tests.cpp | 10 +++---- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/cpp/src/sort/segmented_sort.cu b/cpp/src/sort/segmented_sort.cu index 2095a9cd3aa..919041fa7cf 100644 --- a/cpp/src/sort/segmented_sort.cu +++ b/cpp/src/sort/segmented_sort.cu @@ -39,6 +39,7 @@ #include #include #include +#include namespace cudf { namespace detail { @@ -64,22 +65,29 @@ rmm::device_uvector get_segment_indices(size_type num_rows, return std::move(segment_ids); } -void validate_list_columns(table_view const& keys, rmm::cuda_stream_view stream) +void validate_key_value_list_columns(table_view const& keys, + table_view const& values, + rmm::cuda_stream_view stream) { + std::vector key_value_columns; + key_value_columns.reserve(keys.num_columns() + values.num_columns()); + key_value_columns.insert(key_value_columns.end(), keys.begin(), keys.end()); + key_value_columns.insert(key_value_columns.end(), values.begin(), values.end()); + table_view key_vals{key_value_columns}; // check if all are list columns - CUDF_EXPECTS(std::all_of(keys.begin(), - keys.end(), + CUDF_EXPECTS(std::all_of(key_vals.begin(), + key_vals.end(), [](column_view const& col) { return col.type().id() == type_id::LIST; }), "segmented_sort_by_key only supports lists columns"); // check if all list sizes are equal. - auto table_device = table_device_view::create(keys, stream); + auto table_device = table_device_view::create(key_vals, stream); auto counting_iter = thrust::make_counting_iterator(0); CUDF_EXPECTS( thrust::all_of(rmm::exec_policy(stream), counting_iter, - counting_iter + keys.num_rows(), + counting_iter + key_vals.num_rows(), [d_keys = *table_device] __device__(size_type idx) { - auto size = list_size_functor{d_keys.column(0)}(idx); + auto const size = list_size_functor{d_keys.column(0)}(idx); return thrust::all_of( thrust::seq, d_keys.begin(), d_keys.end(), [&](auto const& d_column) { return list_size_functor{d_column}(idx) == size; @@ -108,13 +116,16 @@ std::unique_ptr segmented_sorted_order(table_view const& keys, keys_with_segid.insert(keys_with_segid.end(), keys.begin(), keys.end()); auto segid_keys = table_view(keys_with_segid); - std::vector child_column_order(column_order); - if (not column_order.empty()) - child_column_order.insert(child_column_order.begin(), order::ASCENDING); - std::vector child_null_precedence(null_precedence); - if (not null_precedence.empty()) - child_null_precedence.insert(child_null_precedence.begin(), null_order::AFTER); - + auto prepend_default = [](auto const& vector, auto default_value) { + if (vector.empty()) return vector; + std::remove_cv_t> pre_vector; + pre_vector.reserve(pre_vector.size() + 1); + pre_vector.push_back(default_value); + pre_vector.insert(pre_vector.end(), vector.begin(), vector.end()); + return pre_vector; + }; + auto child_column_order = prepend_default(column_order, order::ASCENDING); + auto child_null_precedence = prepend_default(null_precedence, null_order::AFTER); // return sorted order of child columns return detail::sorted_order(segid_keys, child_column_order, child_null_precedence, stream, mr); } @@ -153,11 +164,7 @@ std::unique_ptr
sort_lists(table_view const& values, rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(keys.num_columns() > 0, "keys table should have atleast one list column"); - std::vector key_value_columns; - key_value_columns.reserve(keys.num_columns() + values.num_columns()); - key_value_columns.insert(key_value_columns.end(), keys.begin(), keys.end()); - key_value_columns.insert(key_value_columns.end(), values.begin(), values.end()); - validate_list_columns(table_view{key_value_columns}, stream); + validate_key_value_list_columns(keys, values, stream); // child columns of keys auto child_key_columns = thrust::make_transform_iterator( diff --git a/cpp/tests/sort/sort_lists_tests.cpp b/cpp/tests/sort/sort_lists_tests.cpp index 6280d03884c..8cb7bc7665a 100644 --- a/cpp/tests/sort/sort_lists_tests.cpp +++ b/cpp/tests/sort/sort_lists_tests.cpp @@ -54,17 +54,17 @@ TEST_F(SortListsInt, ErrorsTableSizes) CUDF_EXPECT_NO_THROW(cudf::sort_lists(input1, input1, {}, {})); // Non-List keys CUDF_EXPECT_THROW_MESSAGE(cudf::sort_lists(input2, input1, {}, {}), - "segmented_sort only supports lists columns"); + "segmented_sort_by_key only supports lists columns"); // Non-List values CUDF_EXPECT_THROW_MESSAGE(cudf::sort_lists(input1, input2, {}, {}), - "segmented_sort only supports lists columns"); + "segmented_sort_by_key only supports lists columns"); // Both CUDF_EXPECT_THROW_MESSAGE(cudf::sort_lists(input2, input2, {}, {}), - "segmented_sort only supports lists columns"); + "segmented_sort_by_key only supports lists columns"); CUDF_EXPECT_THROW_MESSAGE(cudf::sort_lists(input2, input3, {}, {}), - "segmented_sort only supports lists columns"); + "segmented_sort_by_key only supports lists columns"); CUDF_EXPECT_THROW_MESSAGE(cudf::sort_lists(input3, input3, {}, {}), - "segmented_sort only supports lists columns"); + "segmented_sort_by_key only supports lists columns"); // List sizes mismatch key CUDF_EXPECT_THROW_MESSAGE(cudf::sort_lists(input5, input4, {}, {}), "size of each list in a row of table should be same"); From 1141485a8e68f226c62d1ba5583de65cedde80c4 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Wed, 3 Feb 2021 14:59:36 +0530 Subject: [PATCH 29/32] remove sort_lists(table_view) --- cpp/include/cudf/sorting.hpp | 28 ---- cpp/src/sort/segmented_sort.cu | 95 ------------ cpp/tests/CMakeLists.txt | 1 - cpp/tests/sort/sort_lists_tests.cpp | 214 ---------------------------- 4 files changed, 338 deletions(-) delete mode 100644 cpp/tests/sort/sort_lists_tests.cpp diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp index a9627363afb..a481c44e042 100644 --- a/cpp/include/cudf/sorting.hpp +++ b/cpp/include/cudf/sorting.hpp @@ -214,33 +214,5 @@ std::unique_ptr
segmented_sort_by_key( std::vector const& null_precedence = {}, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -/** - * @brief Performs a lexicographic sort of lists in each row of a table. - * - * `keys` and `values` with list columns of depth 1 is only supported. - * @throws cudf::logic_error if `values.num_rows() != keys.num_rows()`. - * @throws cudf::logic_error if any list sizes of corresponding row in each column are not equal. - * @throws cudf::logic_error if any column of `keys` or `values` is not a list column. - * - * @param values The table with list columns to reorder - * @param keys The table with list coumns that determines the ordering of elements in each list - * @param column_order The desired order for each column in `keys`. Size must be - * equal to `keys.num_columns()` or empty. If empty, all columns are sorted in - * ascending order. - * @param null_precedence The desired order of a null element compared to other - * elements for each column in `keys`. Size must be equal to - * `keys.num_columns()` or empty. If empty, all columns will be sorted with - * `null_order::BEFORE`. - * @param mr Device memory resource to allocate any returned objects - * @return table with list columns with elements in each list sorted. - * - */ -std::unique_ptr
sort_lists( - table_view const& values, - table_view const& keys, - std::vector const& column_order = {}, - std::vector const& null_precedence = {}, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - /** @} */ // end of group } // namespace cudf diff --git a/cpp/src/sort/segmented_sort.cu b/cpp/src/sort/segmented_sort.cu index 919041fa7cf..f8b0e311c9c 100644 --- a/cpp/src/sort/segmented_sort.cu +++ b/cpp/src/sort/segmented_sort.cu @@ -65,37 +65,6 @@ rmm::device_uvector get_segment_indices(size_type num_rows, return std::move(segment_ids); } -void validate_key_value_list_columns(table_view const& keys, - table_view const& values, - rmm::cuda_stream_view stream) -{ - std::vector key_value_columns; - key_value_columns.reserve(keys.num_columns() + values.num_columns()); - key_value_columns.insert(key_value_columns.end(), keys.begin(), keys.end()); - key_value_columns.insert(key_value_columns.end(), values.begin(), values.end()); - table_view key_vals{key_value_columns}; - // check if all are list columns - CUDF_EXPECTS(std::all_of(key_vals.begin(), - key_vals.end(), - [](column_view const& col) { return col.type().id() == type_id::LIST; }), - "segmented_sort_by_key only supports lists columns"); - // check if all list sizes are equal. - auto table_device = table_device_view::create(key_vals, stream); - auto counting_iter = thrust::make_counting_iterator(0); - CUDF_EXPECTS( - thrust::all_of(rmm::exec_policy(stream), - counting_iter, - counting_iter + key_vals.num_rows(), - [d_keys = *table_device] __device__(size_type idx) { - auto const size = list_size_functor{d_keys.column(0)}(idx); - return thrust::all_of( - thrust::seq, d_keys.begin(), d_keys.end(), [&](auto const& d_column) { - return list_size_functor{d_column}(idx) == size; - }); - }), - "size of each list in a row of table should be same"); -} - std::unique_ptr segmented_sorted_order(table_view const& keys, column_view const& segment_offsets, std::vector const& column_order, @@ -155,60 +124,6 @@ std::unique_ptr
segmented_sort_by_key(table_view const& values, stream, mr); } - -std::unique_ptr
sort_lists(table_view const& values, - table_view const& keys, - std::vector const& column_order, - std::vector const& null_precedence, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - CUDF_EXPECTS(keys.num_columns() > 0, "keys table should have atleast one list column"); - validate_key_value_list_columns(keys, values, stream); - - // child columns of keys - auto child_key_columns = thrust::make_transform_iterator( - keys.begin(), [stream](auto col) { return lists_column_view(col).get_sliced_child(stream); }); - auto child_keys = - table_view{std::vector(child_key_columns, child_key_columns + keys.num_columns())}; - - // segment offsets from first list column - auto lc = lists_column_view{keys.column(0)}; - auto offset = lc.offsets(); - auto segment_offsets = cudf::detail::slice(offset, {lc.offset(), offset.size()}, stream)[0]; - // child columns of values - auto child_value_columns = thrust::make_transform_iterator( - values.begin(), [stream](auto col) { return lists_column_view(col).get_sliced_child(stream); }); - auto child_values = table_view{ - std::vector(child_value_columns, child_value_columns + values.num_columns())}; - - // Get segment sorted child columns of list columns - auto child_result = - segmented_sort_by_key( - child_values, child_keys, segment_offsets, column_order, null_precedence, stream, mr) - ->release(); - - // Construct list columns from gathered child columns & return - std::vector> list_columns; - std::transform(values.begin(), - values.end(), - std::make_move_iterator(child_result.begin()), - std::back_inserter(list_columns), - [&stream, &mr](auto& input_list, auto&& sorted_child) { - auto output_offset = - std::make_unique(lists_column_view(input_list).offsets(), stream, mr); - auto null_mask = cudf::detail::copy_bitmask(input_list, stream, mr); - // Assemble list column & return - return make_lists_column(input_list.size(), - std::move(output_offset), - std::move(sorted_child), - input_list.null_count(), - std::move(null_mask), - stream, - mr); - }); - return std::make_unique
(std::move(list_columns)); -} } // namespace detail std::unique_ptr
segmented_sort_by_key(table_view const& values, @@ -222,15 +137,5 @@ std::unique_ptr
segmented_sort_by_key(table_view const& values, return detail::segmented_sort_by_key( values, keys, segment_offsets, column_order, null_precedence, rmm::cuda_stream_default, mr); } -std::unique_ptr
sort_lists(table_view const& values, - table_view const& keys, - std::vector const& column_order, - std::vector const& null_precedence, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::sort_lists( - values, keys, column_order, null_precedence, rmm::cuda_stream_default, mr); -} } // namespace cudf diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 40db525501a..ccbc38f3696 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -363,7 +363,6 @@ ConfigureTest(JSON_TEST "${JSON_TEST_SRC}") set(SORT_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/sort/segmented_sort_tests.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_lists_tests.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_test.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/sort/rank_test.cpp") diff --git a/cpp/tests/sort/sort_lists_tests.cpp b/cpp/tests/sort/sort_lists_tests.cpp deleted file mode 100644 index 8cb7bc7665a..00000000000 --- a/cpp/tests/sort/sort_lists_tests.cpp +++ /dev/null @@ -1,214 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -#include - -#include -#include - -template -using LCW = cudf::test::lists_column_wrapper; - -namespace cudf { -namespace test { - -template -struct SortLists : public BaseFixture { -}; - -// using NumericTypesNotBool = Concat; -TYPED_TEST_CASE(SortLists, NumericTypes); - -using SortListsInt = SortLists; -TEST_F(SortListsInt, ErrorsTableSizes) -{ - LCW col1{{3, 1, 2}, {1}, {2}, {0}, {10, 9, 9}, {6, 7}}; - fixed_width_column_wrapper col2{{5, 4, 3, 5, 8, 5}, {1, 1, 0, 1, 1, 1}}; - strings_column_wrapper col3({"d", "e", "a", "d", "k", "d"}, {1, 1, 0, 1, 1, 1}); - LCW col4{{3, 1, 2}, {1}, {2}, {0}, {10, 9, 9, 4}, {6, 7}}; - table_view input1{{col1}}; - table_view input2{{col1, col2}}; - table_view input3{{col2, col3}}; - table_view input4{{col4}}; - table_view input5{{col1, col4}}; - // Valid - CUDF_EXPECT_NO_THROW(cudf::sort_lists(input1, input1, {}, {})); - // Non-List keys - CUDF_EXPECT_THROW_MESSAGE(cudf::sort_lists(input2, input1, {}, {}), - "segmented_sort_by_key only supports lists columns"); - // Non-List values - CUDF_EXPECT_THROW_MESSAGE(cudf::sort_lists(input1, input2, {}, {}), - "segmented_sort_by_key only supports lists columns"); - // Both - CUDF_EXPECT_THROW_MESSAGE(cudf::sort_lists(input2, input2, {}, {}), - "segmented_sort_by_key only supports lists columns"); - CUDF_EXPECT_THROW_MESSAGE(cudf::sort_lists(input2, input3, {}, {}), - "segmented_sort_by_key only supports lists columns"); - CUDF_EXPECT_THROW_MESSAGE(cudf::sort_lists(input3, input3, {}, {}), - "segmented_sort_by_key only supports lists columns"); - // List sizes mismatch key - CUDF_EXPECT_THROW_MESSAGE(cudf::sort_lists(input5, input4, {}, {}), - "size of each list in a row of table should be same"); - // List sizes mismatch value - CUDF_EXPECT_THROW_MESSAGE(cudf::sort_lists(input1, input5, {}, {}), - "size of each list in a row of table should be same"); - // List sizes mismatch between key-value - CUDF_EXPECT_THROW_MESSAGE(cudf::sort_lists(input1, input4, {}, {}), - "size of each list in a row of table should be same"); -} - -TEST_F(SortListsInt, ErrorsMismatchArgSizes) -{ - LCW col1{{3, 1, 2}, {1}, {2}, {0}, {10, 9, 9}, {6, 7}}; - table_view input1{{col1}}; - - // Mismatch order sizes - EXPECT_THROW(cudf::sort_lists(input1, input1, {order::ASCENDING, order::ASCENDING}, {}), - logic_error); - // Mismatch null precedence sizes - EXPECT_THROW(cudf::sort_lists(input1, input1, {}, {null_order::AFTER, null_order::AFTER}), - logic_error); - // Both - EXPECT_THROW( - cudf::sort_lists( - input1, input1, {order::ASCENDING, order::ASCENDING}, {null_order::AFTER, null_order::AFTER}), - logic_error); -} - -TYPED_TEST(SortLists, NoNull) -{ - using T = TypeParam; - - // List - LCW list1{{3, 2, 1, 4, 4, 4}, {5}, {9, 8, 9}, {6, 7}}; - LCW list2{{3, 1, 2, 3, 1, 2}, {0}, {10, 9, 9}, {6, 7}}; - table_view input{{list1, list2}}; - - // Ascending - // LCW order{{2, 1, 0, 4, 5, 3}, {0}, {1, 2, 0}, {0, 1}}; - LCW expected1{{1, 2, 3, 4, 4, 4}, {5}, {8, 9, 9}, {6, 7}}; - LCW expected2{{2, 1, 3, 1, 2, 3}, {0}, {9, 9, 10}, {6, 7}}; - table_view expected_table1{{expected1, expected2}}; - auto results = cudf::sort_lists( - input, input, {order::ASCENDING, order::ASCENDING}, {null_order::AFTER, null_order::AFTER}); - CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table1); - - results = cudf::sort_lists( - input, input, {order::ASCENDING, order::ASCENDING}, {null_order::BEFORE, null_order::BEFORE}); - CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table1); - - // Descending - // LCW order{{3, 5, 4, 0, 1, 2}, {0}, {0, 2, 1}, {1, 0}}; - LCW expected3{{4, 4, 4, 3, 2, 1}, {5}, {9, 9, 8}, {7, 6}}; - LCW expected4{{3, 2, 1, 3, 1, 2}, {0}, {10, 9, 9}, {7, 6}}; - table_view expected_table2{{expected3, expected4}}; - results = cudf::sort_lists( - input, input, {order::DESCENDING, order::DESCENDING}, {null_order::AFTER, null_order::AFTER}); - CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table2); - - results = cudf::sort_lists( - input, input, {order::DESCENDING, order::DESCENDING}, {null_order::BEFORE, null_order::BEFORE}); - CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table2); -} - -TYPED_TEST(SortLists, Nulls) -{ - using T = TypeParam; - if (std::is_same::value) return; - - // List - std::vector valids1{1, 1, 1, 0, 1, 1}; - std::vector valids1a{1, 1, 1, 1, 1, 0}; - std::vector valids2{1, 1, 0}; - std::vector valids2b{1, 0, 1}; - LCW list1{{{3, 2, 1, 4, 4, 4}, valids1.begin()}, {5}, {9, 8, 9}, {6, 7}}; - LCW list2{{3, 1, 2, 2, 1, 3}, {0}, {{10, 9, 9}, valids2.begin()}, {6, 7}}; - table_view input{{list1, list2}}; - // nulls = (4-NULL, 2), (9,9-NULL) - // (8,9), (9,10), (9,N) - - // Ascending - // LCW order{{2, 1, 0, 4, 5, 3}, {0}, {1, 0, 2}, {0, 1}}; - LCW expected1a{{{1, 2, 3, 4, 4, 4}, valids1a.begin()}, {5}, {8, 9, 9}, {6, 7}}; - LCW expected2a{{2, 1, 3, 1, 3, 2}, {0}, {{9, 10, 9}, valids2.begin()}, {6, 7}}; - table_view expected_table1a{{expected1a, expected2a}}; - auto results = cudf::sort_lists( - input, input, {order::ASCENDING, order::ASCENDING}, {null_order::AFTER, null_order::AFTER}); - CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table1a); - - // LCW order{{3, 2, 1, 0, 4, 5}, {0}, {2, 1, 0}, {0, 1}}; - LCW expected1b{{{4, 1, 2, 3, 4, 4}, valids1a.rbegin()}, {5}, {8, 9, 9}, {6, 7}}; - LCW expected2b{{2, 2, 1, 3, 1, 3}, {0}, {{9, 9, 10}, valids2b.begin()}, {6, 7}}; - table_view expected_table1b{{expected1b, expected2b}}; - results = cudf::sort_lists( - input, input, {order::ASCENDING, order::ASCENDING}, {null_order::BEFORE, null_order::BEFORE}); - CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table1b); - - // Descending - LCW expected3a{{{4, 4, 4, 3, 2, 1}, valids1a.rbegin()}, {5}, {9, 9, 8}, {7, 6}}; - LCW expected4a{{2, 3, 1, 3, 1, 2}, {0}, {{9, 10, 9}, valids2.rbegin()}, {7, 6}}; - table_view expected_table2a{{expected3a, expected4a}}; - results = cudf::sort_lists( - input, input, {order::DESCENDING, order::DESCENDING}, {null_order::AFTER, null_order::AFTER}); - CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table2a); - - LCW expected3b{{{4, 4, 3, 2, 1, 4}, valids1a.begin()}, {5}, {9, 9, 8}, {7, 6}}; - LCW expected4b{{3, 1, 3, 1, 2, 2}, {0}, {{10, 9, 9}, valids2b.begin()}, {7, 6}}; - table_view expected_table2b{{expected3b, expected4b}}; - results = cudf::sort_lists( - input, input, {order::DESCENDING, order::DESCENDING}, {null_order::BEFORE, null_order::BEFORE}); - CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table2b); -} - -TEST_F(SortListsInt, KeyValues) -{ - using T = int; - using LCWstr = cudf::test::lists_column_wrapper; - - // List - LCW a{{21, 22, 23, 22}, {22, 21, 23, 22}}; - LCW b{{13, 14, 12, 11}, {14, 13, 12, 11}}; - LCWstr c{{"a", "b", "c", "d"}, {"a", "b", "c", "d"}}; - - // Ascending {a} - // LCW order{{0, 1, 3, 2}, {1, 0, 3, 2}}; - LCW sorted_a1{{21, 22, 22, 23}, {21, 22, 22, 23}}; - LCW sorted_b1{{13, 14, 11, 12}, {13, 14, 11, 12}}; - LCWstr sorted_c1{{"a", "b", "d", "c"}, {"b", "a", "d", "c"}}; - auto results = cudf::sort_lists(table_view{{a, b, c}}, table_view{{a}}, {}, {}); - table_view expected_table1{{sorted_a1, sorted_b1, sorted_c1}}; - CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table1); - - // Ascending {a,b} - // LCW order{{0, 3, 1, 2}, {1, 3, 0, 2}}; - LCW sorted_a2{{21, 22, 22, 23}, {21, 22, 22, 23}}; - LCW sorted_b2{{13, 11, 14, 12}, {13, 11, 14, 12}}; - LCWstr sorted_c2{{"a", "d", "b", "c"}, {"b", "d", "a", "c"}}; - table_view expected_table2{{sorted_a2, sorted_b2, sorted_c2}}; - table_view keys{{a, b}}; - table_view values{{a, b, c}}; - results = cudf::sort_lists(values, keys, {}, {}); - CUDF_TEST_EXPECT_TABLES_EQUAL(results->view(), expected_table2); -} - -} // namespace test -} // namespace cudf From 476420dacf362a0aeee3415c49dbf38de664f73b Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Wed, 3 Feb 2021 15:14:47 +0530 Subject: [PATCH 30/32] add cudf::segmented_sorted_order in header --- cpp/include/cudf/detail/sorting.hpp | 37 ++++------------------------- cpp/include/cudf/sorting.hpp | 27 +++++++++++++++++++++ cpp/src/lists/segmented_sort.cu | 28 ++++++++++++++++------ 3 files changed, 52 insertions(+), 40 deletions(-) diff --git a/cpp/include/cudf/detail/sorting.hpp b/cpp/include/cudf/detail/sorting.hpp index da185b296b2..11395287541 100644 --- a/cpp/include/cudf/detail/sorting.hpp +++ b/cpp/include/cudf/detail/sorting.hpp @@ -64,40 +64,10 @@ std::unique_ptr
sort_by_key( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @copydoc cudf::segmented_sort_by_key + * @copydoc cudf::segmented_sorted_order * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr
segmented_sort_by_key( - table_view const& values, - table_view const& keys, - column_view const& segment_offsets, - std::vector const& column_order = {}, - std::vector const& null_precedence = {}, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Returns sorted order after sorting each segment in the table. - * - * If segment_offsets contains values larger than number of rows, behaviour is undefined. - * @throws cudf::logic_error if `segment_offsets` is not `size_type` column. - * - * @param keys The table that determines the ordering of elements in each segment - * @param segment_offsets The column of `size_type` type containing start offset index for each - * contiguous segment. - * @param column_order The desired order for each column in `keys`. Size must be - * equal to `keys.num_columns()` or empty. If empty, all columns are sorted in - * ascending order. - * @param null_precedence The desired order of a null element compared to other - * elements for each column in `keys`. Size must be equal to - * `keys.num_columns()` or empty. If empty, all columns will be sorted with - * `null_order::BEFORE`. - * @param[in] stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource to allocate any returned objects - * @return sorted order of the segment sorted table . - * - */ std::unique_ptr segmented_sorted_order( table_view const& keys, column_view const& segment_offsets, @@ -107,13 +77,14 @@ std::unique_ptr segmented_sorted_order( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @copydoc cudf::sort_lists + * @copydoc cudf::segmented_sort_by_key * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr
sort_lists( +std::unique_ptr
segmented_sort_by_key( table_view const& values, table_view const& keys, + column_view const& segment_offsets, std::vector const& column_order = {}, std::vector const& null_precedence = {}, rmm::cuda_stream_view stream = rmm::cuda_stream_default, diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp index a481c44e042..028aa6d9b54 100644 --- a/cpp/include/cudf/sorting.hpp +++ b/cpp/include/cudf/sorting.hpp @@ -184,6 +184,33 @@ std::unique_ptr rank( bool percentage, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Returns sorted order after sorting each segment in the table. + * + * If segment_offsets contains values larger than number of rows, behaviour is undefined. + * @throws cudf::logic_error if `segment_offsets` is not `size_type` column. + * + * @param keys The table that determines the ordering of elements in each segment + * @param segment_offsets The column of `size_type` type containing start offset index for each + * contiguous segment. + * @param column_order The desired order for each column in `keys`. Size must be + * equal to `keys.num_columns()` or empty. If empty, all columns are sorted in + * ascending order. + * @param null_precedence The desired order of a null element compared to other + * elements for each column in `keys`. Size must be equal to + * `keys.num_columns()` or empty. If empty, all columns will be sorted with + * `null_order::BEFORE`. + * @param mr Device memory resource to allocate any returned objects + * @return sorted order of the segment sorted table . + * + */ +std::unique_ptr segmented_sorted_order( + table_view const& keys, + column_view const& segment_offsets, + std::vector const& column_order = {}, + std::vector const& null_precedence = {}, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Performs a lexicographic segmented sort of a table * diff --git a/cpp/src/lists/segmented_sort.cu b/cpp/src/lists/segmented_sort.cu index e62c7623365..989f1bb75f1 100644 --- a/cpp/src/lists/segmented_sort.cu +++ b/cpp/src/lists/segmented_sort.cu @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -41,7 +42,7 @@ namespace cudf { namespace lists { namespace detail { -struct SortPairs { +struct SegmentedSortColumn { template void SortPairsAscending(KeyT const* keys_in, KeyT* keys_out, @@ -127,17 +128,26 @@ struct SortPairs { sizeof(KeyT) * 8, stream.value()); } + template std::enable_if_t(), std::unique_ptr> operator()( column_view const& child, - column_view const& offsets, + column_view const& segment_offsets, order column_order, null_order null_precedence, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_FAIL("segmented sort is not supported for non-numeric list types"); + auto child_table = segmented_sort_by_key(table_view{{child}}, + table_view{{child}}, + segment_offsets, + {column_order}, + {null_precedence}, + stream, + mr); + return std::move(child_table->release().front()); } + template std::enable_if_t(), std::unique_ptr> operator()( column_view const& child, @@ -214,18 +224,22 @@ std::unique_ptr sort_lists(lists_column_view const& input, rmm::mr::device_memory_resource* mr) { if (input.is_empty()) return empty_like(input.parent()); + auto segment_offsets = + cudf::detail::slice(input.offsets(), {input.offset(), input.offsets().size()}, stream)[0]; + // for numeric columns, calls Faster segmented radix sort path + // for non-numeric columns, calls segmented_sort_by_key. auto output_child = type_dispatcher(input.child().type(), - SortPairs{}, + SegmentedSortColumn{}, input.get_sliced_child(stream), - input.offsets(), + segment_offsets, column_order, null_precedence, stream, mr); - // Copy list offsets. - auto output_offset = std::make_unique(input.offsets(), stream, mr); + // Copy list offsets. // TODO check offset[0] value + auto output_offset = std::make_unique(segment_offsets, stream, mr); auto null_mask = cudf::detail::copy_bitmask(input.parent(), stream, mr); // Assemble list column & return From 8732fbb41e0b8b0a4bf6272cc65b812000477359 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Wed, 3 Feb 2021 17:52:50 +0530 Subject: [PATCH 31/32] update copyright year --- conda/recipes/libcudf/meta.yaml | 2 +- cpp/include/cudf/detail/sorting.hpp | 2 +- cpp/include/cudf/lists/list_device_view.cuh | 2 +- cpp/include/cudf/sorting.hpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index b4e071169f1..a1953a2d358 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} diff --git a/cpp/include/cudf/detail/sorting.hpp b/cpp/include/cudf/detail/sorting.hpp index 11395287541..3127a5f89f1 100644 --- a/cpp/include/cudf/detail/sorting.hpp +++ b/cpp/include/cudf/detail/sorting.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/cudf/lists/list_device_view.cuh b/cpp/include/cudf/lists/list_device_view.cuh index b5752d7b401..81109bedaed 100644 --- a/cpp/include/cudf/lists/list_device_view.cuh +++ b/cpp/include/cudf/lists/list_device_view.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp index 028aa6d9b54..2454cfe7c7b 100644 --- a/cpp/include/cudf/sorting.hpp +++ b/cpp/include/cudf/sorting.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From e0696ef4af0e815f06af880cb4ffd9270aadb704 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Thu, 4 Feb 2021 00:36:27 +0530 Subject: [PATCH 32/32] add more tests, enable sliced list column --- cpp/src/lists/segmented_sort.cu | 16 ++++-- cpp/tests/lists/sort_lists_tests.cpp | 76 ++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 5 deletions(-) diff --git a/cpp/src/lists/segmented_sort.cu b/cpp/src/lists/segmented_sort.cu index 989f1bb75f1..5681f7767e0 100644 --- a/cpp/src/lists/segmented_sort.cu +++ b/cpp/src/lists/segmented_sort.cu @@ -226,21 +226,27 @@ std::unique_ptr sort_lists(lists_column_view const& input, if (input.is_empty()) return empty_like(input.parent()); auto segment_offsets = cudf::detail::slice(input.offsets(), {input.offset(), input.offsets().size()}, stream)[0]; - + // Copy list offsets. + auto output_offset = allocate_like(segment_offsets, mask_allocation_policy::RETAIN, mr); + thrust::transform(rmm::exec_policy(stream), + segment_offsets.begin(), + segment_offsets.end(), + output_offset->mutable_view().begin(), + [first = segment_offsets.begin()] __device__(auto offset_index) { + return offset_index - *first; + }); // for numeric columns, calls Faster segmented radix sort path // for non-numeric columns, calls segmented_sort_by_key. auto output_child = type_dispatcher(input.child().type(), SegmentedSortColumn{}, input.get_sliced_child(stream), - segment_offsets, + output_offset->view(), column_order, null_precedence, stream, mr); - // Copy list offsets. // TODO check offset[0] value - auto output_offset = std::make_unique(segment_offsets, stream, mr); - auto null_mask = cudf::detail::copy_bitmask(input.parent(), stream, mr); + auto null_mask = cudf::detail::copy_bitmask(input.parent(), stream, mr); // Assemble list column & return return make_lists_column(input.size(), diff --git a/cpp/tests/lists/sort_lists_tests.cpp b/cpp/tests/lists/sort_lists_tests.cpp index 8e5dc7cb0f2..ac73297f088 100644 --- a/cpp/tests/lists/sort_lists_tests.cpp +++ b/cpp/tests/lists/sort_lists_tests.cpp @@ -29,6 +29,7 @@ #include #include +#include #include #include @@ -45,7 +46,23 @@ struct SortLists : public BaseFixture { }; TYPED_TEST_CASE(SortLists, NumericTypes); +using SortListsInt = SortLists; +/* +empty case + empty list + single row with empty list + multi row with empty lists +single case + single list with single element + single list with multi element +normal case without nulls +Null cases + null rows + null elements in list. +Error: + depth>1 +*/ TYPED_TEST(SortLists, NoNull) { using T = TypeParam; @@ -102,5 +119,64 @@ TYPED_TEST(SortLists, Null) CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected4); } +TEST_F(SortListsInt, Empty) +{ + using T = int; + LCW l1{}; + LCW l2{LCW{}}; + LCW l3{LCW{}, LCW{}}; + + auto results = sort_lists(lists_column_view{l1}, {}, {}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), l1); + results = sort_lists(lists_column_view{l2}, {}, {}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), l2); + results = sort_lists(lists_column_view{l3}, {}, {}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), l3); +} + +TEST_F(SortListsInt, Single) +{ + using T = int; + LCW l1{{1}}; + LCW l2{{1, 2, 3}}; + + auto results = sort_lists(lists_column_view{l1}, {}, {}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), l1); + results = sort_lists(lists_column_view{l2}, {}, {}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), l2); +} + +TEST_F(SortListsInt, NullRows) +{ + using T = int; + std::vector valids{0, 1, 0}; + LCW l1{{{1, 2, 3}, {4, 5, 6}, {7}}, valids.begin()}; // offset 0, 0, 3, 3 + + auto results = sort_lists(lists_column_view{l1}, {}, {}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), l1); +} + +/* +// Disabling this test. +// Reason: After this exception "cudaErrorAssert device-side assert triggered", further tests fail +TEST_F(SortListsInt, Depth) +{ + using T = int; + LCW l1{LCW{{1, 2}, {3}}, LCW{{4, 5}}}; + // device exception + EXPECT_THROW(sort_lists(lists_column_view{l1}, {}, {}), std::exception); +} +*/ + +TEST_F(SortListsInt, Sliced) +{ + using T = int; + LCW l1{{1, 2, 3, 4}, {5, 6, 7}, {8, 9}, {10}}; + auto sliced_list = cudf::slice(l1, {1, 4})[0]; + + auto results = sort_lists(lists_column_view{sliced_list}, {}, {}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), sliced_list); +} + } // namespace test } // namespace cudf