Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove UNKNOWN_NULL_COUNT where it can be easily computed #13205

Merged
merged 9 commits into from
Apr 26, 2023
4 changes: 2 additions & 2 deletions cpp/include/cudf/detail/gather.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -461,8 +461,8 @@ struct column_gatherer_impl<struct_view> {
std::transform(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(column.num_children()),
std::back_inserter(sliced_children),
[structs_view = structs_column_view{column}](auto const idx) {
return structs_view.get_sliced_child(idx);
[&stream, structs_view = structs_column_view{column}](auto const idx) {
return structs_view.get_sliced_child(idx, stream);
});

std::vector<std::unique_ptr<cudf::column>> output_struct_members;
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/detail/structs/utilities.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ struct temporary_nullable_data {
* @return New column with concatenated results.
*/
std::vector<std::vector<column_view>> extract_ordered_struct_children(
host_span<column_view const> struct_cols);
host_span<column_view const> struct_cols, rmm::cuda_stream_view stream);

/**
* @brief Check whether the specified column is of type LIST, or any LISTs in its descendent
Expand Down
10 changes: 8 additions & 2 deletions cpp/include/cudf/structs/structs_column_view.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,6 +17,9 @@

#include <cudf/column/column.hpp>
#include <cudf/column/column_view.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <rmm/cuda_stream_view.hpp>

/**
* @file
Expand Down Expand Up @@ -87,9 +90,12 @@ class structs_column_view : public column_view {
* @throw cudf::logic error if this is an empty column
*
* @param index The index of the child column to return
* @param stream The stream on which to perform the operation. Uses the default CUDF
* stream if none is specified.
* @return The child column sliced relative to the parent's offset and size
*/
[[nodiscard]] column_view get_sliced_child(int index) const;
[[nodiscard]] column_view get_sliced_child(
int index, rmm::cuda_stream_view stream = cudf::get_default_stream()) const;
vyasr marked this conversation as resolved.
Show resolved Hide resolved
}; // class structs_column_view;
/** @} */ // end of group
} // namespace cudf
2 changes: 1 addition & 1 deletion cpp/src/copying/concatenate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,7 @@ void traverse_children::operator()<cudf::struct_view>(host_span<column_view cons
std::back_inserter(nth_children),
[child_index, stream](column_view const& col) {
structs_column_view scv(col);
return scv.get_sliced_child(child_index);
return scv.get_sliced_child(child_index, stream);
});

bounds_and_type_check(nth_children, stream);
Expand Down
30 changes: 20 additions & 10 deletions cpp/src/copying/contiguous_split.cu
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,7 @@ std::pair<src_buf_info*, size_type> setup_source_buf_info(InputIter begin,
InputIter end,
src_buf_info* head,
src_buf_info* current,
rmm::cuda_stream_view stream,
int offset_stack_pos = 0,
int parent_offset_index = -1,
int offset_depth = 0);
Expand All @@ -449,7 +450,8 @@ struct buf_info_functor {
src_buf_info* current,
int offset_stack_pos,
int parent_offset_index,
int offset_depth)
int offset_depth,
rmm::cuda_stream_view stream)
{
if (col.nullable()) {
std::tie(current, offset_stack_pos) =
Expand Down Expand Up @@ -491,7 +493,8 @@ std::pair<src_buf_info*, size_type> buf_info_functor::operator()<cudf::string_vi
src_buf_info* current,
int offset_stack_pos,
int parent_offset_index,
int offset_depth)
int offset_depth,
rmm::cuda_stream_view stream)
{
if (col.nullable()) {
std::tie(current, offset_stack_pos) =
Expand Down Expand Up @@ -548,7 +551,8 @@ std::pair<src_buf_info*, size_type> buf_info_functor::operator()<cudf::list_view
src_buf_info* current,
int offset_stack_pos,
int parent_offset_index,
int offset_depth)
int offset_depth,
rmm::cuda_stream_view stream)
{
lists_column_view lcv(col);

Expand Down Expand Up @@ -587,6 +591,7 @@ std::pair<src_buf_info*, size_type> buf_info_functor::operator()<cudf::list_view
col.child_end(),
head,
current,
stream,
offset_stack_pos,
parent_offset_index,
offset_depth);
Expand All @@ -598,7 +603,8 @@ std::pair<src_buf_info*, size_type> buf_info_functor::operator()<cudf::struct_vi
src_buf_info* current,
int offset_stack_pos,
int parent_offset_index,
int offset_depth)
int offset_depth,
rmm::cuda_stream_view stream)
{
if (col.nullable()) {
std::tie(current, offset_stack_pos) =
Expand All @@ -616,14 +622,16 @@ std::pair<src_buf_info*, size_type> buf_info_functor::operator()<cudf::struct_vi
cudf::structs_column_view scv(col);
std::vector<column_view> sliced_children;
sliced_children.reserve(scv.num_children());
std::transform(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(scv.num_children()),
std::back_inserter(sliced_children),
[&scv](size_type child_index) { return scv.get_sliced_child(child_index); });
std::transform(
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(scv.num_children()),
std::back_inserter(sliced_children),
[&scv, &stream](size_type child_index) { return scv.get_sliced_child(child_index, stream); });
return setup_source_buf_info(sliced_children.begin(),
sliced_children.end(),
head,
current,
stream,
offset_stack_pos,
parent_offset_index,
offset_depth);
Expand All @@ -634,6 +642,7 @@ std::pair<src_buf_info*, size_type> setup_source_buf_info(InputIter begin,
InputIter end,
src_buf_info* head,
src_buf_info* current,
rmm::cuda_stream_view stream,
int offset_stack_pos,
int parent_offset_index,
int offset_depth)
Expand All @@ -645,7 +654,8 @@ std::pair<src_buf_info*, size_type> setup_source_buf_info(InputIter begin,
current,
offset_stack_pos,
parent_offset_index,
offset_depth);
offset_depth,
stream);
});
return {current, offset_stack_pos};
}
Expand Down Expand Up @@ -1044,7 +1054,7 @@ std::vector<packed_table> contiguous_split(cudf::table_view const& input,
std::copy(splits.begin(), splits.end(), std::next(h_indices));

// setup source buf info
setup_source_buf_info(input.begin(), input.end(), h_src_buf_info, h_src_buf_info);
setup_source_buf_info(input.begin(), input.end(), h_src_buf_info, h_src_buf_info, stream);

// HtoD indices and source buf info to device
CUDF_CUDA_TRY(cudaMemcpyAsync(
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/io/json/write_json.cu
Original file line number Diff line number Diff line change
Expand Up @@ -562,8 +562,8 @@ struct column_to_strings_fn {
operator()(column_view const& column, host_span<column_name_info const> children_names) const
{
auto const child_it = cudf::detail::make_counting_transform_iterator(
0, [structs_view = structs_column_view{column}](auto const child_idx) {
return structs_view.get_sliced_child(child_idx);
0, [&stream = stream_, structs_view = structs_column_view{column}](auto const child_idx) {
return structs_view.get_sliced_child(child_idx, stream);
});
auto col_string = operator()(
child_it, child_it + column.num_children(), children_names, row_end_wrap.value(stream_));
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/parquet/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ size_t column_size(column_view const& column, rmm::cuda_stream_view stream)
auto const scol = structs_column_view(column);
size_t ret = 0;
for (int i = 0; i < scol.num_children(); i++) {
ret += column_size(scol.get_sliced_child(i), stream);
ret += column_size(scol.get_sliced_child(i, stream), stream);
}
return ret;
} else if (column.type().id() == type_id::LIST) {
Expand Down
8 changes: 6 additions & 2 deletions cpp/src/lists/copying/copying.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -19,6 +19,7 @@
#include <cudf/detail/get_value.cuh>
#include <cudf/detail/null_mask.hpp>
#include <cudf/lists/lists_column_view.hpp>
#include <cudf/types.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>
Expand Down Expand Up @@ -81,10 +82,13 @@ std::unique_ptr<cudf::column> copy_slice(lists_column_view const& lists,
// Compute the null mask of the result:
auto null_mask = cudf::detail::copy_bitmask(lists.null_mask(), start, end, stream, mr);

auto null_count = cudf::detail::null_count(
static_cast<bitmask_type const*>(null_mask.data()), 0, end - start, stream);

return make_lists_column(lists_count,
std::move(offsets),
std::move(child),
cudf::UNKNOWN_NULL_COUNT,
null_count,
std::move(null_mask),
stream,
mr);
Expand Down
8 changes: 6 additions & 2 deletions cpp/src/merge/merge.cu
Original file line number Diff line number Diff line change
Expand Up @@ -364,8 +364,12 @@ std::unique_ptr<column> column_merger::operator()<cudf::struct_view>(

auto it = cudf::detail::make_counting_transform_iterator(
0, [&, merger = column_merger{row_order_}](size_type i) {
return cudf::type_dispatcher<dispatch_storage_type>(
lhs.child(i).type(), merger, lhs.get_sliced_child(i), rhs.get_sliced_child(i), stream, mr);
return cudf::type_dispatcher<dispatch_storage_type>(lhs.child(i).type(),
merger,
lhs.get_sliced_child(i, stream),
rhs.get_sliced_child(i, stream),
stream,
mr);
});

auto merged_children = std::vector<std::unique_ptr<column>>(it, it + lhs.num_children());
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/reductions/scan/scan_inclusive.cu
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,8 @@ struct scan_functor<Op, cudf::struct_view> {
// handle input in case it is a sliced view.
auto const input_children = [&] {
auto const it = cudf::detail::make_counting_transform_iterator(
0, [structs_view = structs_column_view{input}, stream](auto const child_idx) {
return structs_view.get_sliced_child(child_idx);
0, [structs_view = structs_column_view{input}, &stream](auto const child_idx) {
return structs_view.get_sliced_child(child_idx, stream);
});
return std::vector<column_view>(it, it + input.num_children());
}();
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/reshape/interleave_columns.cu
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ struct interleave_columns_impl<T, std::enable_if_t<std::is_same_v<T, cudf::struc
std::vector<std::unique_ptr<cudf::column>> output_struct_members;
for (size_type child_idx = 0; child_idx < num_children; ++child_idx) {
// Collect children columns from the input structs columns at index `child_idx`.
auto const child_iter =
thrust::make_transform_iterator(structs_columns.begin(), [child_idx](auto const& col) {
return structs_column_view(col).get_sliced_child(child_idx);
auto const child_iter = thrust::make_transform_iterator(
structs_columns.begin(), [&stream = stream, child_idx](auto const& col) {
return structs_column_view(col).get_sliced_child(child_idx, stream);
});
auto children = std::vector<column_view>(child_iter, child_iter + num_columns);

Expand Down
7 changes: 5 additions & 2 deletions cpp/src/strings/copying/copying.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -73,10 +73,13 @@ std::unique_ptr<cudf::column> copy_slice(strings_column_view const& strings,
auto null_mask = cudf::detail::copy_bitmask(
strings.null_mask(), offsets_offset, offsets_offset + strings_count, stream, mr);

auto null_count = cudf::detail::null_count(
static_cast<bitmask_type const*>(null_mask.data()), 0, strings_count, stream);

return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column),
UNKNOWN_NULL_COUNT,
null_count,
std::move(null_mask));
}

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/structs/copying/concatenate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ std::unique_ptr<column> concatenate(host_span<column_view const> columns,
rmm::mr::device_memory_resource* mr)
{
// get ordered children
auto ordered_children = extract_ordered_struct_children(columns);
auto ordered_children = extract_ordered_struct_children(columns, stream);

// concatenate them
std::vector<std::unique_ptr<column>> children;
Expand Down
26 changes: 15 additions & 11 deletions cpp/src/structs/structs_column_view.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -15,7 +15,9 @@
*/

#include <cudf/column/column.hpp>
#include <cudf/detail/null_mask.hpp>
#include <cudf/structs/structs_column_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/error.hpp>

namespace cudf {
Expand All @@ -27,22 +29,24 @@ structs_column_view::structs_column_view(column_view const& rhs) : column_view{r

column_view structs_column_view::parent() const { return *this; }

column_view structs_column_view::get_sliced_child(int index) const
column_view structs_column_view::get_sliced_child(int index, rmm::cuda_stream_view stream) const
{
std::vector<column_view> children;
children.reserve(child(index).num_children());
for (size_type i = 0; i < child(index).num_children(); i++) {
children.push_back(child(index).child(i));
}
return column_view{child(index).type(),
size(),
child(index).head<uint8_t>(),
child(index).null_mask(),
// TODO: could potentially compute the actual count here, but at
// the moment this interface doesn't take a stream.
UNKNOWN_NULL_COUNT,
offset(),
children};

return column_view{
child(index).type(),
size(),
child(index).head<uint8_t>(),
child(index).null_mask(),
child(index).null_count()
? cudf::detail::null_count(child(index).null_mask(), offset(), offset() + size(), stream)
: 0,
offset(),
children};
}

} // namespace cudf
19 changes: 10 additions & 9 deletions cpp/src/structs/utilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ namespace cudf::structs::detail {
* @copydoc cudf::structs::detail::extract_ordered_struct_children
*/
std::vector<std::vector<column_view>> extract_ordered_struct_children(
host_span<column_view const> struct_cols)
host_span<column_view const> struct_cols, rmm::cuda_stream_view stream)
{
auto const num_children = struct_cols[0].num_children();
auto const num_cols = static_cast<size_type>(struct_cols.size());
Expand All @@ -56,7 +56,7 @@ std::vector<std::vector<column_view>> extract_ordered_struct_children(
"Mismatch in number of children during struct concatenate");
CUDF_EXPECTS(struct_cols[0].child(child_index).type() == scv.child(child_index).type(),
"Mismatch in child types during struct concatenate");
children.push_back(scv.get_sliced_child(child_index));
children.push_back(scv.get_sliced_child(child_index, stream));
}

result.push_back(std::move(children));
Expand Down Expand Up @@ -161,7 +161,7 @@ struct table_flattener {
if (not null_precedence.empty()) { flat_null_precedence.push_back(col_null_order); }
}
for (decltype(col.num_children()) i = 0; i < col.num_children(); ++i) {
auto const& child = col.get_sliced_child(i);
auto const& child = col.get_sliced_child(i, stream);
if (child.type().id() == type_id::STRUCT) {
flatten_struct_column(structs_column_view{child}, col_order, col_null_order);
} else {
Expand Down Expand Up @@ -270,11 +270,12 @@ std::unique_ptr<column> superimpose_nulls_no_sanitize(bitmask_type const* null_m
auto content = input->release();

// Build new children columns.
std::for_each(
content.children.begin(), content.children.end(), [current_mask, stream, mr](auto& child) {
child = superimpose_nulls_no_sanitize(
current_mask, cudf::UNKNOWN_NULL_COUNT, std::move(child), stream, mr);
});
std::for_each(content.children.begin(),
content.children.end(),
[current_mask, new_null_count, stream, mr](auto& child) {
child = superimpose_nulls_no_sanitize(
current_mask, new_null_count, std::move(child), stream, mr);
});

// Replace the children columns.
return cudf::make_structs_column(num_rows,
Expand Down Expand Up @@ -304,7 +305,7 @@ std::pair<column_view, temporary_nullable_data> push_down_nulls_no_sanitize(

// Function to rewrite child null mask.
auto const child_with_new_mask = [&](auto const& child_idx) {
auto child = structs_view.get_sliced_child(child_idx);
auto child = structs_view.get_sliced_child(child_idx, stream);

// If struct is not nullable, child null mask is retained. NOOP.
if (not structs_view.nullable()) { return child; }
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/transform/row_bit_count.cu
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ struct flatten_functor {

structs_column_view scv(col);
auto iter = cudf::detail::make_counting_transform_iterator(
0, [&scv](auto i) { return scv.get_sliced_child(i); });
0, [&scv, &stream](auto i) { return scv.get_sliced_child(i, stream); });
flatten_hierarchy(iter,
iter + scv.num_children(),
out,
Expand Down
Loading