Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compute null-count in cudf::detail::slice #13124

Merged
merged 4 commits into from
Apr 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 9 additions & 26 deletions cpp/include/cudf/detail/copy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,36 +39,19 @@ namespace detail {
* @throws cudf::logic_error if `begin < 0`, `end < begin` or
* `end > input.size()`.
*
* @param[in] input View of input column to slice
* @param[in] begin Index of the first desired element in the slice (inclusive).
* @param[in] end Index of the last desired element in the slice (exclusive).
* @tparam ColumnView Must be either cudf::column_view or cudf::mutable_column_view
* @param input View of input column to slice
* @param begin Index of the first desired element in the slice (inclusive).
* @param end Index of the last desired element in the slice (exclusive).
* @param stream CUDA stream used for device memory operations and kernel launches
*
* @return ColumnView View of the elements `[begin,end)` from `input`.
*/
template <typename ColumnView>
ColumnView slice(ColumnView const& input, cudf::size_type begin, cudf::size_type end)
{
static_assert(std::is_same_v<ColumnView, cudf::column_view> or
std::is_same_v<ColumnView, cudf::mutable_column_view>,
"slice can be performed only on column_view and mutable_column_view");
CUDF_EXPECTS(begin >= 0, "Invalid beginning of range.");
CUDF_EXPECTS(end >= begin, "Invalid end of range.");
CUDF_EXPECTS(end <= input.size(), "Slice range out of bounds.");

std::vector<ColumnView> children{};
children.reserve(input.num_children());
for (size_type index = 0; index < input.num_children(); index++) {
children.emplace_back(input.child(index));
}

return ColumnView(input.type(),
end - begin,
input.head(),
input.null_mask(),
cudf::UNKNOWN_NULL_COUNT,
input.offset() + begin,
children);
}
ColumnView slice(ColumnView const& input,
size_type begin,
size_type end,
rmm::cuda_stream_view stream);

/**
* @copydoc cudf::slice(column_view const&, host_span<size_type const>)
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/column/column.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -261,7 +261,7 @@ struct create_column_from_view {
std::back_inserter(children),
[begin, end, stream = this->stream, mr = this->mr](auto child) {
return std::make_unique<column>(
cudf::detail::slice(child, begin, end), stream, mr);
cudf::detail::slice(child, begin, end, stream), stream, mr);
});

auto num_rows = view.size();
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/copying/get_element.cu
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ struct get_element_functor {
{
bool valid = is_element_valid_sync(input, index, stream);
auto row_contents =
std::make_unique<column>(slice(input, index, index + 1), stream, mr)->release();
std::make_unique<column>(slice(input, index, index + 1, stream), stream, mr)->release();
auto scalar_contents = table(std::move(row_contents.children));
return std::make_unique<struct_scalar>(std::move(scalar_contents), valid, stream, mr);
}
Expand Down
39 changes: 38 additions & 1 deletion cpp/src/copying/slice.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -19,6 +19,7 @@
#include <cudf/detail/copy.hpp>
#include <cudf/detail/iterator.cuh>
#include <cudf/detail/null_mask.cuh>
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/error.hpp>
Expand All @@ -31,6 +32,42 @@

namespace cudf {
namespace detail {

template <typename ColumnView>
ColumnView slice(ColumnView const& input,
size_type begin,
size_type end,
rmm::cuda_stream_view stream)
{
CUDF_EXPECTS(begin >= 0, "Invalid beginning of range.");
CUDF_EXPECTS(end >= begin, "Invalid end of range.");
CUDF_EXPECTS(end <= input.size(), "Slice range out of bounds.");

std::vector<ColumnView> children{};
children.reserve(input.num_children());
for (size_type index = 0; index < input.num_children(); index++) {
children.emplace_back(input.child(index));
}

return ColumnView(
input.type(),
end - begin,
input.head(),
input.null_mask(),
input.null_count() ? cudf::detail::null_count(input.null_mask(), begin, end, stream) : 0,
input.offset() + begin,
children);
}

template column_view slice<column_view>(column_view const&,
size_type,
size_type,
rmm::cuda_stream_view);
template mutable_column_view slice<mutable_column_view>(mutable_column_view const&,
size_type,
size_type,
rmm::cuda_stream_view);

std::vector<column_view> slice(column_view const& input,
host_span<size_type const> indices,
rmm::cuda_stream_view stream)
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/groupby/sort/sort_helper.cu
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ size_type sort_groupby_helper::num_keys(rmm::cuda_stream_view stream)
column_view sort_groupby_helper::key_sort_order(rmm::cuda_stream_view stream)
{
auto sliced_key_sorted_order = [stream, this]() {
return cudf::detail::slice(this->_key_sorted_order->view(), 0, this->num_keys(stream));
return cudf::detail::slice(this->_key_sorted_order->view(), 0, this->num_keys(stream), stream);
};

if (_key_sorted_order) { return sliced_key_sorted_order(); }
Expand Down Expand Up @@ -261,7 +261,8 @@ sort_groupby_helper::column_ptr sort_groupby_helper::sorted_values(
mr);

// Zero-copy slice this sort order so that its new size is num_keys()
column_view gather_map = cudf::detail::slice(values_sort_order->view(), 0, num_keys(stream));
column_view gather_map =
cudf::detail::slice(values_sort_order->view(), 0, num_keys(stream), stream);

auto sorted_values_table = cudf::detail::gather(table_view({values}),
gather_map,
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/interop/from_arrow.cu
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,8 @@ std::unique_ptr<column> dispatch_to_cudf_column::operator()<cudf::string_view>(
: std::make_unique<column>(
cudf::detail::slice(out_col->view(),
static_cast<size_type>(array.offset()),
static_cast<size_type>(array.offset() + array.length())),
static_cast<size_type>(array.offset() + array.length()),
stream),
stream,
mr);
}
Expand Down Expand Up @@ -391,7 +392,8 @@ std::unique_ptr<column> dispatch_to_cudf_column::operator()<cudf::list_view>(
: std::make_unique<column>(
cudf::detail::slice(out_col->view(),
static_cast<size_type>(array.offset()),
static_cast<size_type>(array.offset() + array.length())),
static_cast<size_type>(array.offset() + array.length()),
stream),
stream,
mr);
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/stream_compaction/unique.cu
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ std::unique_ptr<table> unique(table_view const& input,
return static_cast<size_type>(thrust::distance(mutable_view->begin<size_type>(), result_end));
}
}();
auto indices_view = cudf::detail::slice(column_view(*unique_indices), 0, unique_size);
auto indices_view = cudf::detail::slice(column_view(*unique_indices), 0, unique_size, stream);

// gather unique rows and return
return detail::gather(input,
Expand Down
13 changes: 7 additions & 6 deletions cpp/tests/transform/mask_to_bools_test.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,14 +14,15 @@
* limitations under the License.
*/

#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_utilities.hpp>
#include <cudf_test/column_wrapper.hpp>

#include <cudf/column/column.hpp>
#include <cudf/column/column_view.hpp>
#include <cudf/detail/copy.hpp>
#include <cudf/copying.hpp>
#include <cudf/transform.hpp>
#include <cudf/types.hpp>
#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_utilities.hpp>
#include <cudf_test/column_wrapper.hpp>

struct MaskToBools : public cudf::test::BaseFixture {
};
Expand Down Expand Up @@ -61,7 +62,7 @@ TEST_P(MaskToBoolsTest, LargeDataSizeTest)
data.cbegin(), data.cend(), data.begin(), [](auto val) { return rand() % 2 == 0; });

auto col = cudf::test::fixed_width_column_wrapper<bool>(data.begin(), data.end());
auto expected = cudf::detail::slice(static_cast<cudf::column_view>(col), begin_bit, end_bit);
auto expected = cudf::slice(static_cast<cudf::column_view>(col), {begin_bit, end_bit}).front();

auto mask = cudf::bools_to_mask(col);

Expand Down
6 changes: 3 additions & 3 deletions cpp/tests/utilities/column_utilities.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
*/

#include <cudf/column/column_view.hpp>
#include <cudf/detail/copy.hpp>
#include <cudf/copying.hpp>
#include <cudf/detail/get_value.cuh>
#include <cudf/detail/iterator.cuh>
#include <cudf/detail/utilities/vector_factories.hpp>
Expand Down Expand Up @@ -513,8 +513,8 @@ std::string stringify_column_differences(cudf::device_span<int const> difference
cudf::detail::get_value<size_type>(lhs_row_indices, index, cudf::get_default_stream());
auto const rhs_index =
cudf::detail::get_value<size_type>(rhs_row_indices, index, cudf::get_default_stream());
auto diff_lhs = cudf::detail::slice(lhs, lhs_index, lhs_index + 1);
auto diff_rhs = cudf::detail::slice(rhs, rhs_index, rhs_index + 1);
auto diff_lhs = cudf::slice(lhs, {lhs_index, lhs_index + 1}).front();
auto diff_rhs = cudf::slice(rhs, {rhs_index, rhs_index + 1}).front();
return depth_str + "first difference: " + "lhs[" + std::to_string(index) +
"] = " + to_string(diff_lhs, "") + ", rhs[" + std::to_string(index) +
"] = " + to_string(diff_rhs, "");
Expand Down