Skip to content

Commit

Permalink
Remove UNKNOWN_NULL_COUNT where it can be easily computed (#13205)
Browse files Browse the repository at this point in the history
Contributes to #11968

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Divye Gala (https://github.com/divyegala)
  - Nghia Truong (https://github.com/ttnghia)

URL: #13205
  • Loading branch information
vyasr authored Apr 26, 2023
1 parent 8b59663 commit f4e0f19
Show file tree
Hide file tree
Showing 18 changed files with 142 additions and 89 deletions.
4 changes: 2 additions & 2 deletions cpp/include/cudf/detail/gather.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -461,8 +461,8 @@ struct column_gatherer_impl<struct_view> {
std::transform(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(column.num_children()),
std::back_inserter(sliced_children),
[structs_view = structs_column_view{column}](auto const idx) {
return structs_view.get_sliced_child(idx);
[&stream, structs_view = structs_column_view{column}](auto const idx) {
return structs_view.get_sliced_child(idx, stream);
});

std::vector<std::unique_ptr<cudf::column>> output_struct_members;
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/detail/structs/utilities.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ struct temporary_nullable_data {
* @return New column with concatenated results.
*/
std::vector<std::vector<column_view>> extract_ordered_struct_children(
host_span<column_view const> struct_cols);
host_span<column_view const> struct_cols, rmm::cuda_stream_view stream);

/**
* @brief Check whether the specified column is of type LIST, or any LISTs in its descendent
Expand Down
10 changes: 8 additions & 2 deletions cpp/include/cudf/structs/structs_column_view.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,6 +17,9 @@

#include <cudf/column/column.hpp>
#include <cudf/column/column_view.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <rmm/cuda_stream_view.hpp>

/**
* @file
Expand Down Expand Up @@ -87,9 +90,12 @@ class structs_column_view : public column_view {
* @throw cudf::logic error if this is an empty column
*
* @param index The index of the child column to return
* @param stream The stream on which to perform the operation. Uses the default CUDF
* stream if none is specified.
* @return The child column sliced relative to the parent's offset and size
*/
[[nodiscard]] column_view get_sliced_child(int index) const;
[[nodiscard]] column_view get_sliced_child(
int index, rmm::cuda_stream_view stream = cudf::get_default_stream()) const;
}; // class structs_column_view;
/** @} */ // end of group
} // namespace cudf
2 changes: 1 addition & 1 deletion cpp/src/copying/concatenate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,7 @@ void traverse_children::operator()<cudf::struct_view>(host_span<column_view cons
std::back_inserter(nth_children),
[child_index, stream](column_view const& col) {
structs_column_view scv(col);
return scv.get_sliced_child(child_index);
return scv.get_sliced_child(child_index, stream);
});

bounds_and_type_check(nth_children, stream);
Expand Down
30 changes: 20 additions & 10 deletions cpp/src/copying/contiguous_split.cu
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,7 @@ std::pair<src_buf_info*, size_type> setup_source_buf_info(InputIter begin,
InputIter end,
src_buf_info* head,
src_buf_info* current,
rmm::cuda_stream_view stream,
int offset_stack_pos = 0,
int parent_offset_index = -1,
int offset_depth = 0);
Expand All @@ -450,7 +451,8 @@ struct buf_info_functor {
src_buf_info* current,
int offset_stack_pos,
int parent_offset_index,
int offset_depth)
int offset_depth,
rmm::cuda_stream_view stream)
{
if (col.nullable()) {
std::tie(current, offset_stack_pos) =
Expand Down Expand Up @@ -492,7 +494,8 @@ std::pair<src_buf_info*, size_type> buf_info_functor::operator()<cudf::string_vi
src_buf_info* current,
int offset_stack_pos,
int parent_offset_index,
int offset_depth)
int offset_depth,
rmm::cuda_stream_view stream)
{
if (col.nullable()) {
std::tie(current, offset_stack_pos) =
Expand Down Expand Up @@ -549,7 +552,8 @@ std::pair<src_buf_info*, size_type> buf_info_functor::operator()<cudf::list_view
src_buf_info* current,
int offset_stack_pos,
int parent_offset_index,
int offset_depth)
int offset_depth,
rmm::cuda_stream_view stream)
{
lists_column_view lcv(col);

Expand Down Expand Up @@ -588,6 +592,7 @@ std::pair<src_buf_info*, size_type> buf_info_functor::operator()<cudf::list_view
col.child_end(),
head,
current,
stream,
offset_stack_pos,
parent_offset_index,
offset_depth);
Expand All @@ -599,7 +604,8 @@ std::pair<src_buf_info*, size_type> buf_info_functor::operator()<cudf::struct_vi
src_buf_info* current,
int offset_stack_pos,
int parent_offset_index,
int offset_depth)
int offset_depth,
rmm::cuda_stream_view stream)
{
if (col.nullable()) {
std::tie(current, offset_stack_pos) =
Expand All @@ -617,14 +623,16 @@ std::pair<src_buf_info*, size_type> buf_info_functor::operator()<cudf::struct_vi
cudf::structs_column_view scv(col);
std::vector<column_view> sliced_children;
sliced_children.reserve(scv.num_children());
std::transform(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(scv.num_children()),
std::back_inserter(sliced_children),
[&scv](size_type child_index) { return scv.get_sliced_child(child_index); });
std::transform(
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(scv.num_children()),
std::back_inserter(sliced_children),
[&scv, &stream](size_type child_index) { return scv.get_sliced_child(child_index, stream); });
return setup_source_buf_info(sliced_children.begin(),
sliced_children.end(),
head,
current,
stream,
offset_stack_pos,
parent_offset_index,
offset_depth);
Expand All @@ -635,6 +643,7 @@ std::pair<src_buf_info*, size_type> setup_source_buf_info(InputIter begin,
InputIter end,
src_buf_info* head,
src_buf_info* current,
rmm::cuda_stream_view stream,
int offset_stack_pos,
int parent_offset_index,
int offset_depth)
Expand All @@ -646,7 +655,8 @@ std::pair<src_buf_info*, size_type> setup_source_buf_info(InputIter begin,
current,
offset_stack_pos,
parent_offset_index,
offset_depth);
offset_depth,
stream);
});
return {current, offset_stack_pos};
}
Expand Down Expand Up @@ -1045,7 +1055,7 @@ std::vector<packed_table> contiguous_split(cudf::table_view const& input,
std::copy(splits.begin(), splits.end(), std::next(h_indices));

// setup source buf info
setup_source_buf_info(input.begin(), input.end(), h_src_buf_info, h_src_buf_info);
setup_source_buf_info(input.begin(), input.end(), h_src_buf_info, h_src_buf_info, stream);

// HtoD indices and source buf info to device
CUDF_CUDA_TRY(cudaMemcpyAsync(
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/io/json/write_json.cu
Original file line number Diff line number Diff line change
Expand Up @@ -562,8 +562,8 @@ struct column_to_strings_fn {
operator()(column_view const& column, host_span<column_name_info const> children_names) const
{
auto const child_it = cudf::detail::make_counting_transform_iterator(
0, [structs_view = structs_column_view{column}](auto const child_idx) {
return structs_view.get_sliced_child(child_idx);
0, [&stream = stream_, structs_view = structs_column_view{column}](auto const child_idx) {
return structs_view.get_sliced_child(child_idx, stream);
});
auto col_string = operator()(
child_it, child_it + column.num_children(), children_names, row_end_wrap.value(stream_));
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/parquet/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ size_t column_size(column_view const& column, rmm::cuda_stream_view stream)
auto const scol = structs_column_view(column);
size_t ret = 0;
for (int i = 0; i < scol.num_children(); i++) {
ret += column_size(scol.get_sliced_child(i), stream);
ret += column_size(scol.get_sliced_child(i, stream), stream);
}
return ret;
} else if (column.type().id() == type_id::LIST) {
Expand Down
8 changes: 6 additions & 2 deletions cpp/src/lists/copying/copying.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -19,6 +19,7 @@
#include <cudf/detail/get_value.cuh>
#include <cudf/detail/null_mask.hpp>
#include <cudf/lists/lists_column_view.hpp>
#include <cudf/types.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>
Expand Down Expand Up @@ -81,10 +82,13 @@ std::unique_ptr<cudf::column> copy_slice(lists_column_view const& lists,
// Compute the null mask of the result:
auto null_mask = cudf::detail::copy_bitmask(lists.null_mask(), start, end, stream, mr);

auto null_count = cudf::detail::null_count(
static_cast<bitmask_type const*>(null_mask.data()), 0, end - start, stream);

return make_lists_column(lists_count,
std::move(offsets),
std::move(child),
cudf::UNKNOWN_NULL_COUNT,
null_count,
std::move(null_mask),
stream,
mr);
Expand Down
8 changes: 6 additions & 2 deletions cpp/src/merge/merge.cu
Original file line number Diff line number Diff line change
Expand Up @@ -364,8 +364,12 @@ std::unique_ptr<column> column_merger::operator()<cudf::struct_view>(

auto it = cudf::detail::make_counting_transform_iterator(
0, [&, merger = column_merger{row_order_}](size_type i) {
return cudf::type_dispatcher<dispatch_storage_type>(
lhs.child(i).type(), merger, lhs.get_sliced_child(i), rhs.get_sliced_child(i), stream, mr);
return cudf::type_dispatcher<dispatch_storage_type>(lhs.child(i).type(),
merger,
lhs.get_sliced_child(i, stream),
rhs.get_sliced_child(i, stream),
stream,
mr);
});

auto merged_children = std::vector<std::unique_ptr<column>>(it, it + lhs.num_children());
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/reductions/scan/scan_inclusive.cu
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,8 @@ struct scan_functor<Op, cudf::struct_view> {
// handle input in case it is a sliced view.
auto const input_children = [&] {
auto const it = cudf::detail::make_counting_transform_iterator(
0, [structs_view = structs_column_view{input}, stream](auto const child_idx) {
return structs_view.get_sliced_child(child_idx);
0, [structs_view = structs_column_view{input}, &stream](auto const child_idx) {
return structs_view.get_sliced_child(child_idx, stream);
});
return std::vector<column_view>(it, it + input.num_children());
}();
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/reshape/interleave_columns.cu
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ struct interleave_columns_impl<T, std::enable_if_t<std::is_same_v<T, cudf::struc
std::vector<std::unique_ptr<cudf::column>> output_struct_members;
for (size_type child_idx = 0; child_idx < num_children; ++child_idx) {
// Collect children columns from the input structs columns at index `child_idx`.
auto const child_iter =
thrust::make_transform_iterator(structs_columns.begin(), [child_idx](auto const& col) {
return structs_column_view(col).get_sliced_child(child_idx);
auto const child_iter = thrust::make_transform_iterator(
structs_columns.begin(), [&stream = stream, child_idx](auto const& col) {
return structs_column_view(col).get_sliced_child(child_idx, stream);
});
auto children = std::vector<column_view>(child_iter, child_iter + num_columns);

Expand Down
7 changes: 5 additions & 2 deletions cpp/src/strings/copying/copying.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -73,10 +73,13 @@ std::unique_ptr<cudf::column> copy_slice(strings_column_view const& strings,
auto null_mask = cudf::detail::copy_bitmask(
strings.null_mask(), offsets_offset, offsets_offset + strings_count, stream, mr);

auto null_count = cudf::detail::null_count(
static_cast<bitmask_type const*>(null_mask.data()), 0, strings_count, stream);

return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column),
UNKNOWN_NULL_COUNT,
null_count,
std::move(null_mask));
}

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/structs/copying/concatenate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ std::unique_ptr<column> concatenate(host_span<column_view const> columns,
rmm::mr::device_memory_resource* mr)
{
// get ordered children
auto ordered_children = extract_ordered_struct_children(columns);
auto ordered_children = extract_ordered_struct_children(columns, stream);

// concatenate them
std::vector<std::unique_ptr<column>> children;
Expand Down
26 changes: 15 additions & 11 deletions cpp/src/structs/structs_column_view.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -15,7 +15,9 @@
*/

#include <cudf/column/column.hpp>
#include <cudf/detail/null_mask.hpp>
#include <cudf/structs/structs_column_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/error.hpp>

namespace cudf {
Expand All @@ -27,22 +29,24 @@ structs_column_view::structs_column_view(column_view const& rhs) : column_view{r

column_view structs_column_view::parent() const { return *this; }

column_view structs_column_view::get_sliced_child(int index) const
column_view structs_column_view::get_sliced_child(int index, rmm::cuda_stream_view stream) const
{
std::vector<column_view> children;
children.reserve(child(index).num_children());
for (size_type i = 0; i < child(index).num_children(); i++) {
children.push_back(child(index).child(i));
}
return column_view{child(index).type(),
size(),
child(index).head<uint8_t>(),
child(index).null_mask(),
// TODO: could potentially compute the actual count here, but at
// the moment this interface doesn't take a stream.
UNKNOWN_NULL_COUNT,
offset(),
children};

return column_view{
child(index).type(),
size(),
child(index).head<uint8_t>(),
child(index).null_mask(),
child(index).null_count()
? cudf::detail::null_count(child(index).null_mask(), offset(), offset() + size(), stream)
: 0,
offset(),
children};
}

} // namespace cudf
19 changes: 10 additions & 9 deletions cpp/src/structs/utilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ namespace cudf::structs::detail {
* @copydoc cudf::structs::detail::extract_ordered_struct_children
*/
std::vector<std::vector<column_view>> extract_ordered_struct_children(
host_span<column_view const> struct_cols)
host_span<column_view const> struct_cols, rmm::cuda_stream_view stream)
{
auto const num_children = struct_cols[0].num_children();
auto const num_cols = static_cast<size_type>(struct_cols.size());
Expand All @@ -56,7 +56,7 @@ std::vector<std::vector<column_view>> extract_ordered_struct_children(
"Mismatch in number of children during struct concatenate");
CUDF_EXPECTS(struct_cols[0].child(child_index).type() == scv.child(child_index).type(),
"Mismatch in child types during struct concatenate");
children.push_back(scv.get_sliced_child(child_index));
children.push_back(scv.get_sliced_child(child_index, stream));
}

result.push_back(std::move(children));
Expand Down Expand Up @@ -161,7 +161,7 @@ struct table_flattener {
if (not null_precedence.empty()) { flat_null_precedence.push_back(col_null_order); }
}
for (decltype(col.num_children()) i = 0; i < col.num_children(); ++i) {
auto const& child = col.get_sliced_child(i);
auto const& child = col.get_sliced_child(i, stream);
if (child.type().id() == type_id::STRUCT) {
flatten_struct_column(structs_column_view{child}, col_order, col_null_order);
} else {
Expand Down Expand Up @@ -270,11 +270,12 @@ std::unique_ptr<column> superimpose_nulls_no_sanitize(bitmask_type const* null_m
auto content = input->release();

// Build new children columns.
std::for_each(
content.children.begin(), content.children.end(), [current_mask, stream, mr](auto& child) {
child = superimpose_nulls_no_sanitize(
current_mask, cudf::UNKNOWN_NULL_COUNT, std::move(child), stream, mr);
});
std::for_each(content.children.begin(),
content.children.end(),
[current_mask, new_null_count, stream, mr](auto& child) {
child = superimpose_nulls_no_sanitize(
current_mask, new_null_count, std::move(child), stream, mr);
});

// Replace the children columns.
return cudf::make_structs_column(num_rows,
Expand Down Expand Up @@ -304,7 +305,7 @@ std::pair<column_view, temporary_nullable_data> push_down_nulls_no_sanitize(

// Function to rewrite child null mask.
auto const child_with_new_mask = [&](auto const& child_idx) {
auto child = structs_view.get_sliced_child(child_idx);
auto child = structs_view.get_sliced_child(child_idx, stream);

// If struct is not nullable, child null mask is retained. NOOP.
if (not structs_view.nullable()) { return child; }
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/transform/row_bit_count.cu
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ struct flatten_functor {

structs_column_view scv(col);
auto iter = cudf::detail::make_counting_transform_iterator(
0, [&scv](auto i) { return scv.get_sliced_child(i); });
0, [&scv, &stream](auto i) { return scv.get_sliced_child(i, stream); });
flatten_hierarchy(iter,
iter + scv.num_children(),
out,
Expand Down
Loading

0 comments on commit f4e0f19

Please sign in to comment.