Skip to content

Commit

Permalink
Fix cudf::lists::sort_lists failing for sliced column (rapidsai#7564)
Browse files Browse the repository at this point in the history
This fixes rapidsai#7530 (`cudf::lists::sort_lists` fails for sliced column). 

I also added more tests for sliced columns to cover the previously failed cases, and added a header `lists/detail/sorting.cuh` to expose the internal `detail::sort_lists` API which accepts a stream parameter.

Authors:
  - Nghia Truong (@ttnghia)

Approvers:
  - David (@davidwendt)
  - AJ Schmidt (@ajschmidt8)
  - Karthikeyan (@karthikeyann)

URL: rapidsai#7564
  • Loading branch information
ttnghia authored and hyperbolic2346 committed Mar 23, 2021
1 parent d0f6c3c commit b7dd2cd
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 11 deletions.
1 change: 1 addition & 0 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ test:
- test -f $PREFIX/include/cudf/join.hpp
- test -f $PREFIX/include/cudf/lists/detail/concatenate.hpp
- test -f $PREFIX/include/cudf/lists/detail/copying.hpp
- test -f $PREFIX/include/cudf/lists/detail/sorting.hpp
- test -f $PREFIX/include/cudf/lists/count_elements.hpp
- test -f $PREFIX/include/cudf/lists/extract.hpp
- test -f $PREFIX/include/cudf/lists/contains.hpp
Expand Down
39 changes: 39 additions & 0 deletions cpp/include/cudf/lists/detail/sorting.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cudf/lists/lists_column_view.hpp>

#include <rmm/cuda_stream_view.hpp>

namespace cudf {
namespace lists {
namespace detail {

/**
* @copydoc cudf::lists::sort_lists
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> sort_lists(
lists_column_view const& input,
order column_order,
null_order null_precedence,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
} // namespace detail
} // namespace lists
} // namespace cudf
13 changes: 6 additions & 7 deletions cpp/src/lists/segmented_sort.cu
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ struct SegmentedSortColumn {
auto const null_replace_T = null_precedence == null_order::AFTER
? std::numeric_limits<T>::max()
: std::numeric_limits<T>::min();

auto device_child = column_device_view::create(child, stream);
auto keys_in =
cudf::detail::make_null_replacement_iterator<T>(*device_child, null_replace_T);
Expand Down Expand Up @@ -224,15 +225,13 @@ std::unique_ptr<column> sort_lists(lists_column_view const& input,
rmm::mr::device_memory_resource* mr)
{
if (input.is_empty()) return empty_like(input.parent());
auto segment_offsets =
cudf::detail::slice(input.offsets(), {input.offset(), input.offsets().size()}, stream)[0];
// Copy list offsets.
auto output_offset = allocate_like(segment_offsets, mask_allocation_policy::RETAIN, mr);
auto output_offset = make_numeric_column(
input.offsets().type(), input.size() + 1, mask_state::UNALLOCATED, stream, mr);
thrust::transform(rmm::exec_policy(stream),
segment_offsets.begin<size_type>(),
segment_offsets.end<size_type>(),
input.offsets_begin(),
input.offsets_end(),
output_offset->mutable_view().begin<size_type>(),
[first = segment_offsets.begin<size_type>()] __device__(auto offset_index) {
[first = input.offsets_begin()] __device__(auto offset_index) {
return offset_index - *first;
});
// for numeric columns, calls Faster segmented radix sort path
Expand Down
20 changes: 16 additions & 4 deletions cpp/tests/lists/sort_lists_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,23 @@ TEST_F(SortListsInt, Depth)
TEST_F(SortListsInt, Sliced)
{
using T = int;
LCW<T> l1{{1, 2, 3, 4}, {5, 6, 7}, {8, 9}, {10}};
auto sliced_list = cudf::slice(l1, {1, 4})[0];
LCW<T> l1{{3, 2, 1, 4}, {7, 5, 6}, {8, 9}, {10}};

auto results = sort_lists(lists_column_view{sliced_list}, {}, {});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), sliced_list);
auto sliced_list = cudf::slice(l1, {0, 4})[0];
auto results = sort_lists(lists_column_view{sliced_list}, {}, {});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), LCW<T>{{1, 2, 3, 4}, {5, 6, 7}, {8, 9}, {10}});

sliced_list = cudf::slice(l1, {1, 4})[0];
results = sort_lists(lists_column_view{sliced_list}, {}, {});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), LCW<T>{{5, 6, 7}, {8, 9}, {10}});

sliced_list = cudf::slice(l1, {1, 2})[0];
results = sort_lists(lists_column_view{sliced_list}, {}, {});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), LCW<T>{{5, 6, 7}});

sliced_list = cudf::slice(l1, {0, 2})[0];
results = sort_lists(lists_column_view{sliced_list}, {}, {});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), LCW<T>{{1, 2, 3, 4}, {5, 6, 7}});
}

} // namespace test
Expand Down

0 comments on commit b7dd2cd

Please sign in to comment.