Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deprecate unflatten_nested_columns #11421

Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 0 additions & 29 deletions cpp/include/cudf/detail/structs/utilities.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,35 +151,6 @@ flattened_table flatten_nested_columns(
std::vector<null_order> const& null_precedence,
column_nullability nullability = column_nullability::MATCH_INCOMING);

/**
* @brief Unflatten columns flattened as by `flatten_nested_columns()`,
* based on the provided `blueprint`.
*
* cudf::flatten_nested_columns() executes depth first, and serializes the struct null vector
* before the child/member columns.
* E.g. STRUCT_1< STRUCT_2< A, B >, C > is flattened to:
* 1. Null Vector for STRUCT_1
* 2. Null Vector for STRUCT_2
* 3. Member STRUCT_2::A
* 4. Member STRUCT_2::B
* 5. Member STRUCT_1::C
*
* `unflatten_nested_columns()` reconstructs nested columns from flattened input that follows
* the convention above.
*
* Note: This function requires a null-mask vector for each STRUCT column, including for nested
* STRUCT members.
*
* @param flattened "Flattened" `table` of input columns, following the conventions in
* `flatten_nested_columns()`.
* @param blueprint The exemplar `table_view` with nested columns intact, whose structure defines
* the nesting of the reconstructed output table.
* @return std::unique_ptr<cudf::table> Unflattened table (with nested STRUCT columns) reconstructed
* based on `blueprint`.
*/
std::unique_ptr<cudf::table> unflatten_nested_columns(std::unique_ptr<cudf::table>&& flattened,
table_view const& blueprint);

/**
* @brief Push down nulls from a parent mask into a child column, using bitwise AND.
*
Expand Down
92 changes: 0 additions & 92 deletions cpp/src/structs/utilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,98 +209,6 @@ flattened_table flatten_nested_columns(table_view const& input,
return table_flattener{input, column_order, null_precedence, nullability}();
}

namespace {
using vector_of_columns = std::vector<std::unique_ptr<cudf::column>>;
using column_index_t = typename vector_of_columns::size_type;

// Forward declaration, to enable recursion via `unflattener`.
std::unique_ptr<cudf::column> unflatten_struct(vector_of_columns& flattened,
column_index_t& current_index,
cudf::column_view const& blueprint);

/**
* @brief Helper functor to reconstruct STRUCT columns from its flattened member columns.
*
*/
class unflattener {
public:
unflattener(vector_of_columns& flattened_, column_index_t& current_index_)
: flattened{flattened_}, current_index{current_index_}
{
}

auto operator()(column_view const& blueprint)
{
return is_struct(blueprint) ? unflatten_struct(flattened, current_index, blueprint)
: std::move(flattened[current_index++]);
}

private:
vector_of_columns& flattened;
column_index_t& current_index;

}; // class unflattener;

std::unique_ptr<cudf::column> unflatten_struct(vector_of_columns& flattened,
column_index_t& current_index,
cudf::column_view const& blueprint)
{
// "Consume" columns from `flattened`, starting at `current_index`,
// based on the provided `blueprint` struct col. Recurse for struct children.
CUDF_EXPECTS(blueprint.type().id() == type_id::STRUCT,
"Expected blueprint column to be a STRUCT column.");

CUDF_EXPECTS(current_index < flattened.size(), "STRUCT column can't have 0 children.");

auto const num_rows = flattened[current_index]->size();

// cudf::flatten_nested_columns() executes depth first, and serializes the struct null vector
// before the child/member columns.
// E.g. STRUCT_1< STRUCT_2< A, B >, C > is flattened to:
// 1. Null Vector for STRUCT_1
// 2. Null Vector for STRUCT_2
// 3. Member STRUCT_2::A
// 4. Member STRUCT_2::B
// 5. Member STRUCT_1::C
//
// Extract null-vector *before* child columns are constructed.
auto struct_null_column_contents = flattened[current_index++]->release();
auto unflattening_iter =
thrust::make_transform_iterator(blueprint.child_begin(), unflattener{flattened, current_index});

return cudf::make_structs_column(
num_rows,
vector_of_columns{unflattening_iter, unflattening_iter + blueprint.num_children()},
UNKNOWN_NULL_COUNT, // Do count?
std::move(*struct_null_column_contents.null_mask));
}
} // namespace

std::unique_ptr<cudf::table> unflatten_nested_columns(std::unique_ptr<cudf::table>&& flattened,
table_view const& blueprint)
{
// Bail, if LISTs are present.
auto const has_lists = std::any_of(blueprint.begin(), blueprint.end(), is_or_has_nested_lists);
CUDF_EXPECTS(not has_lists, "Unflattening LIST columns is not supported.");

// If there are no STRUCTs, unflattening is a NOOP.
auto const has_structs = std::any_of(blueprint.begin(), blueprint.end(), is_struct);
if (not has_structs) {
return std::move(flattened); // Unchanged.
}

// There be struct columns.
// Note: Requires null vectors for all struct input columns.
auto flattened_columns = flattened->release();
auto current_idx = column_index_t{0};

auto unflattening_iter =
thrust::make_transform_iterator(blueprint.begin(), unflattener{flattened_columns, current_idx});

return std::make_unique<cudf::table>(
vector_of_columns{unflattening_iter, unflattening_iter + blueprint.num_columns()});
}

// Helper function to superimpose validity of parent struct
// over the specified member (child) column.
void superimpose_parent_nulls(bitmask_type const* parent_null_mask,
Expand Down
Loading