Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Superimpose null masks for STRUCT columns. #9144

Merged
merged 7 commits into from
Sep 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 82 additions & 0 deletions cpp/src/structs/utilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,15 @@
#include <cudf/structs/structs_column_view.hpp>
#include <cudf/table/table.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
#include <cudf/unary.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/span.hpp>
#include <cudf/utilities/traits.hpp>
#include <structs/utilities.hpp>

#include <bitset>

namespace cudf {
namespace structs {
namespace detail {
Expand Down Expand Up @@ -337,6 +341,84 @@ void superimpose_parent_nulls(bitmask_type const* parent_null_mask,
}
}

std::tuple<cudf::column_view, std::vector<rmm::device_buffer>> superimpose_parent_nulls(
column_view const& parent, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr)
{
if (parent.type().id() != type_id::STRUCT) {
// NOOP for non-STRUCT columns.
return std::make_tuple(parent, std::vector<rmm::device_buffer>{});
}

auto structs_column = structs_column_view{parent};

auto ret_validity_buffers = std::vector<rmm::device_buffer>{};

// Function to rewrite child null mask.
auto rewrite_child_mask = [&](auto const& child_idx) {
auto child = structs_column.get_sliced_child(child_idx);

// If struct is not nullable, child null mask is retained. NOOP.
if (not structs_column.nullable()) { return child; }

auto parent_child_null_masks =
std::vector<cudf::bitmask_type const*>{structs_column.null_mask(), child.null_mask()};

auto new_child_mask = [&] {
if (not child.nullable()) {
// Adopt parent STRUCT's null mask.
return structs_column.null_mask();
}

// Both STRUCT and child are nullable. AND() for the child's new null mask.
//
// Note: ANDing only [offset(), offset()+size()) would not work. The null-mask produced thus
// would start at offset=0. The column-view attempts to apply its offset() to both the _data
// and the _null_mask(). It would be better to AND the bits from the beginning, and apply
// offset() uniformly.
// Alternatively, one could construct a big enough buffer, and use inplace_bitwise_and.
ret_validity_buffers.push_back(cudf::detail::bitmask_and(parent_child_null_masks,
std::vector<size_type>{0, 0},
child.offset() + child.size(),
stream,
mr));
return reinterpret_cast<bitmask_type const*>(ret_validity_buffers.back().data());
}();

return cudf::column_view(
child.type(),
child.size(),
child.head(),
new_child_mask,
cudf::UNKNOWN_NULL_COUNT,
child.offset(),
std::vector<cudf::column_view>{child.child_begin(), child.child_end()});
};

auto child_begin =
thrust::make_transform_iterator(thrust::make_counting_iterator(0), rewrite_child_mask);
auto child_end = child_begin + structs_column.num_children();

auto ret_children = std::vector<cudf::column_view>{};
std::for_each(child_begin, child_end, [&](auto const& child) {
auto [processed_child, backing_buffers] = superimpose_parent_nulls(child, stream, mr);
ret_children.push_back(processed_child);
ret_validity_buffers.insert(ret_validity_buffers.end(),
std::make_move_iterator(backing_buffers.begin()),
std::make_move_iterator(backing_buffers.end()));
});

// Make column view out of newly constructed column_views, and all the validity buffers.

return std::make_tuple(column_view(parent.type(),
parent.size(),
nullptr,
parent.null_mask(),
parent.null_count(), // Alternatively, postpone.
parent.offset(),
ret_children),
std::move(ret_validity_buffers));
}

} // namespace detail
} // namespace structs
} // namespace cudf
27 changes: 26 additions & 1 deletion cpp/src/structs/utilities.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include <cudf/types.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>
mythrocks marked this conversation as resolved.
Show resolved Hide resolved

namespace cudf {
namespace structs {
namespace detail {
Expand Down Expand Up @@ -106,7 +108,7 @@ std::unique_ptr<cudf::table> unflatten_nested_columns(std::unique_ptr<cudf::tabl
table_view const& blueprint);

/**
* @brief Pushdown nulls from a parent mask into a child column, using AND.
* @brief Push down nulls from a parent mask into a child column, using bitwise AND.
*
* This function will recurse through all struct descendants. It is expected that
* the size of `parent_null_mask` in bits is the same as `child.size()`
Expand All @@ -123,6 +125,29 @@ void superimpose_parent_nulls(bitmask_type const* parent_null_mask,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

/**
* @brief Push down nulls from a parent mask into a child column, using bitwise AND.
*
* This function constructs a new column_view instance equivalent to the argument column_view,
* with possibly new child column_views, all with possibly new null mask values reflecting
* null rows from the parent column:
* 1. If the specified column is not STRUCT, the column is returned unmodified, with no new
* supporting device_buffer instances.
* 2. If the column is STRUCT, the null masks of the parent and child are bitwise-ANDed, and a
* modified column_view is returned. This applies recursively.
*
* @param parent The parent (possibly STRUCT) column whose nulls need to be pushed to its members.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate new device memory.
* @return A pair of:
* 1. column_view with nulls pushed down to child columns, as appropriate.
* 2. Supporting device_buffer instances, for any newly constructed null masks.
*/
std::tuple<cudf::column_view, std::vector<rmm::device_buffer>> superimpose_parent_nulls(
mythrocks marked this conversation as resolved.
Show resolved Hide resolved
column_view const& parent,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

} // namespace detail
} // namespace structs
} // namespace cudf
Loading