From 2a3e43dc1858c8e3511bf226c400b06a9b665bfc Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 10 Mar 2021 16:19:30 -0700 Subject: [PATCH 1/8] Add new sliced list for sort_lists unit tests --- cpp/tests/lists/sort_lists_tests.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/cpp/tests/lists/sort_lists_tests.cpp b/cpp/tests/lists/sort_lists_tests.cpp index ac73297f088..5f7226dc843 100644 --- a/cpp/tests/lists/sort_lists_tests.cpp +++ b/cpp/tests/lists/sort_lists_tests.cpp @@ -172,9 +172,21 @@ TEST_F(SortListsInt, Sliced) { using T = int; LCW l1{{1, 2, 3, 4}, {5, 6, 7}, {8, 9}, {10}}; - auto sliced_list = cudf::slice(l1, {1, 4})[0]; - auto results = sort_lists(lists_column_view{sliced_list}, {}, {}); + auto sliced_list = cudf::slice(l1, {0, 4})[0]; + auto results = sort_lists(lists_column_view{sliced_list}, {}, {}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), sliced_list); + + sliced_list = cudf::slice(l1, {1, 4})[0]; + results = sort_lists(lists_column_view{sliced_list}, {}, {}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), sliced_list); + + sliced_list = cudf::slice(l1, {1, 2})[0]; + results = sort_lists(lists_column_view{sliced_list}, {}, {}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), sliced_list); + + sliced_list = cudf::slice(l1, {0, 2})[0]; + results = sort_lists(lists_column_view{sliced_list}, {}, {}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), sliced_list); } From 7dec216d836b96e0d1e53ca832d98074dcc75a2d Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 10 Mar 2021 16:47:34 -0700 Subject: [PATCH 2/8] Fix segmented_sort (sort_lists) when the list is a sliced list --- cpp/src/lists/segmented_sort.cu | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/cpp/src/lists/segmented_sort.cu b/cpp/src/lists/segmented_sort.cu index 5681f7767e0..f2720a53218 100644 --- a/cpp/src/lists/segmented_sort.cu +++ b/cpp/src/lists/segmented_sort.cu @@ -167,7 +167,7 @@ struct SegmentedSortColumn { auto const null_replace_T = null_precedence == null_order::AFTER ? std::numeric_limits::max() : std::numeric_limits::min(); - auto device_child = column_device_view::create(child, stream); + auto device_child = column_device_view::create(child, stream); auto keys_in = cudf::detail::make_null_replacement_iterator(*device_child, null_replace_T); thrust::copy_n(rmm::exec_policy(stream), keys_in, child.size(), keys.begin()); @@ -224,15 +224,13 @@ std::unique_ptr sort_lists(lists_column_view const& input, rmm::mr::device_memory_resource* mr) { if (input.is_empty()) return empty_like(input.parent()); - auto segment_offsets = - cudf::detail::slice(input.offsets(), {input.offset(), input.offsets().size()}, stream)[0]; - // Copy list offsets. - auto output_offset = allocate_like(segment_offsets, mask_allocation_policy::RETAIN, mr); + auto output_offset = make_numeric_column( + input.offsets().type(), input.size() + 1, mask_state::UNALLOCATED, stream, mr); thrust::transform(rmm::exec_policy(stream), - segment_offsets.begin(), - segment_offsets.end(), + input.offsets_begin(), + input.offsets_end(), output_offset->mutable_view().begin(), - [first = segment_offsets.begin()] __device__(auto offset_index) { + [first = input.offsets_begin()] __device__(auto offset_index) { return offset_index - *first; }); // for numeric columns, calls Faster segmented radix sort path From af888b861993b8ea23849636fba89b1a5c7e111c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 10 Mar 2021 16:53:51 -0700 Subject: [PATCH 3/8] Fix the sliced list tests for sort_lists --- cpp/tests/lists/sort_lists_tests.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/tests/lists/sort_lists_tests.cpp b/cpp/tests/lists/sort_lists_tests.cpp index 5f7226dc843..28fb29c7d3c 100644 --- a/cpp/tests/lists/sort_lists_tests.cpp +++ b/cpp/tests/lists/sort_lists_tests.cpp @@ -171,23 +171,23 @@ TEST_F(SortListsInt, Depth) TEST_F(SortListsInt, Sliced) { using T = int; - LCW l1{{1, 2, 3, 4}, {5, 6, 7}, {8, 9}, {10}}; + LCW l1{{3, 2, 1, 4}, {7, 5, 6}, {8, 9}, {10}}; auto sliced_list = cudf::slice(l1, {0, 4})[0]; auto results = sort_lists(lists_column_view{sliced_list}, {}, {}); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), sliced_list); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), LCW{{1, 2, 3, 4}, {5, 6, 7}, {8, 9}, {10}}); sliced_list = cudf::slice(l1, {1, 4})[0]; results = sort_lists(lists_column_view{sliced_list}, {}, {}); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), sliced_list); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), LCW{{5, 6, 7}, {8, 9}, {10}}); sliced_list = cudf::slice(l1, {1, 2})[0]; results = sort_lists(lists_column_view{sliced_list}, {}, {}); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), sliced_list); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), LCW{{5, 6, 7}}); sliced_list = cudf::slice(l1, {0, 2})[0]; results = sort_lists(lists_column_view{sliced_list}, {}, {}); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), sliced_list); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), LCW{{1, 2, 3, 4}, {5, 6, 7}}); } } // namespace test From fbb891889836b9690fb5d80e3f5d61d262ef18f6 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 10 Mar 2021 17:07:18 -0700 Subject: [PATCH 4/8] Add lists/detail/sorting.cuh to expose the detail API of sort_lists which accepts stream parameter. --- cpp/include/cudf/lists/detail/sorting.cuh | 38 +++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 cpp/include/cudf/lists/detail/sorting.cuh diff --git a/cpp/include/cudf/lists/detail/sorting.cuh b/cpp/include/cudf/lists/detail/sorting.cuh new file mode 100644 index 00000000000..51f612593e5 --- /dev/null +++ b/cpp/include/cudf/lists/detail/sorting.cuh @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include + +namespace cudf { +namespace lists { +namespace detail { + +/** + * @copydoc cudf::lists::sort_lists + * + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +std::unique_ptr sort_lists(lists_column_view const& input, + order column_order, + null_order null_precedence, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); +} // namespace detail +} // namespace lists +} // namespace cudf From e24d5979f1e969b07a1e5d38d177967f75cd7957 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 10 Mar 2021 17:15:17 -0700 Subject: [PATCH 5/8] Fix style format --- cpp/src/lists/segmented_sort.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/lists/segmented_sort.cu b/cpp/src/lists/segmented_sort.cu index f2720a53218..3bbbc9b16b7 100644 --- a/cpp/src/lists/segmented_sort.cu +++ b/cpp/src/lists/segmented_sort.cu @@ -167,7 +167,8 @@ struct SegmentedSortColumn { auto const null_replace_T = null_precedence == null_order::AFTER ? std::numeric_limits::max() : std::numeric_limits::min(); - auto device_child = column_device_view::create(child, stream); + + auto device_child = column_device_view::create(child, stream); auto keys_in = cudf::detail::make_null_replacement_iterator(*device_child, null_replace_T); thrust::copy_n(rmm::exec_policy(stream), keys_in, child.size(), keys.begin()); From 9a97902351bfac4a1a4d7cb8e224d065421670bc Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 11 Mar 2021 06:35:21 -0700 Subject: [PATCH 6/8] Address review comments: Change lists/detail/sorting.cuh to lists/detail/sorting.hpp --- cpp/include/cudf/lists/detail/{sorting.cuh => sorting.hpp} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename cpp/include/cudf/lists/detail/{sorting.cuh => sorting.hpp} (100%) diff --git a/cpp/include/cudf/lists/detail/sorting.cuh b/cpp/include/cudf/lists/detail/sorting.hpp similarity index 100% rename from cpp/include/cudf/lists/detail/sorting.cuh rename to cpp/include/cudf/lists/detail/sorting.hpp From bdafeb8a281737537d1d92fa6abf76e31d88bd1f Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 11 Mar 2021 06:46:38 -0700 Subject: [PATCH 7/8] Update conda recipes --- conda/recipes/libcudf/meta.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index a46712def28..b23977086d3 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -132,6 +132,7 @@ test: - test -f $PREFIX/include/cudf/join.hpp - test -f $PREFIX/include/cudf/lists/detail/concatenate.hpp - test -f $PREFIX/include/cudf/lists/detail/copying.hpp + - test -f $PREFIX/include/cudf/lists/detail/sorting.hpp - test -f $PREFIX/include/cudf/lists/count_elements.hpp - test -f $PREFIX/include/cudf/lists/extract.hpp - test -f $PREFIX/include/cudf/lists/contains.hpp From b57702f2bd0d51bd032ecd28d0eab59c101052e7 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 11 Mar 2021 07:35:24 -0700 Subject: [PATCH 8/8] Address review comments: Default mr parameter for detail::sort_lists --- cpp/include/cudf/lists/detail/sorting.hpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/cpp/include/cudf/lists/detail/sorting.hpp b/cpp/include/cudf/lists/detail/sorting.hpp index 51f612593e5..f68ff872020 100644 --- a/cpp/include/cudf/lists/detail/sorting.hpp +++ b/cpp/include/cudf/lists/detail/sorting.hpp @@ -28,11 +28,12 @@ namespace detail { * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr sort_lists(lists_column_view const& input, - order column_order, - null_order null_precedence, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); +std::unique_ptr sort_lists( + lists_column_view const& input, + order column_order, + null_order null_precedence, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail } // namespace lists } // namespace cudf