Skip to content

Commit

Permalink
Change return type of cudf::structs::detail::flatten_nested_columns
Browse files Browse the repository at this point in the history
… to smart pointer (#12878)

This changes the return type of `cudf::structs::detail::flatten_nested_columns` from `flattened_table` to `std::unique_ptr<flattened_table>`. Using smart pointer, we can forward declare the output and reduce dependency on the header `cudf/detail/structs/utilities.hpp`.

This also does some cleanup to remove the unused header `cudf/detail/structs/utilities.hpp` from many places.

Authors:
  - Nghia Truong (https://github.com/ttnghia)
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Karthikeyan (https://github.com/karthikeyann)

URL: #12878
  • Loading branch information
ttnghia authored Mar 9, 2023
1 parent 9ec5477 commit 3048791
Show file tree
Hide file tree
Showing 14 changed files with 70 additions and 76 deletions.
3 changes: 1 addition & 2 deletions cpp/include/cudf/detail/groupby/sort_helper.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -18,7 +18,6 @@

#include <cudf/column/column.hpp>
#include <cudf/column/column_view.hpp>
#include <cudf/detail/structs/utilities.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>

Expand Down
9 changes: 6 additions & 3 deletions cpp/include/cudf/detail/join.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -16,7 +16,6 @@
#pragma once

#include <cudf/column/column.hpp>
#include <cudf/detail/structs/utilities.hpp>
#include <cudf/detail/utilities/hash_functions.cuh>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
Expand All @@ -37,6 +36,10 @@
template <typename T>
class default_allocator;

namespace cudf::structs::detail {
class flattened_table;
}

namespace cudf {
namespace detail {

Expand Down Expand Up @@ -74,7 +77,7 @@ struct hash_join {
rmm::device_buffer const _composite_bitmask; ///< Bitmask to denote whether a row is valid
cudf::null_equality const _nulls_equal; ///< whether to consider nulls as equal
cudf::table_view _build; ///< input table to build the hash map
cudf::structs::detail::flattened_table
std::unique_ptr<cudf::structs::detail::flattened_table>
_flattened_build_table; ///< flattened data structures for `_build`
map_type _hash_table; ///< hash table built on `_build`

Expand Down
10 changes: 5 additions & 5 deletions cpp/include/cudf/detail/structs/utilities.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,14 +162,14 @@ class flattened_table {
* @param input input table to be flattened
* @param column_order column order for input table
* @param null_precedence null order for input table
* @param nullability force output to have nullability columns even if input columns
* are all valid
* @param nullability force output to have nullability columns even if input columns are all valid
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate new device memory
* @return `flatten_result` with flattened table, flattened column order, flattened null precedence,
* alongside the supporting columns and device_buffers for the flattened table.
* @return A pointer of type `flattened_table` containing flattened columns, flattened column
* orders, flattened null precedence, alongside the supporting columns and device_buffers
* for the flattened table.
*/
[[nodiscard]] flattened_table flatten_nested_columns(
[[nodiscard]] std::unique_ptr<flattened_table> flatten_nested_columns(
table_view const& input,
std::vector<order> const& column_order,
std::vector<null_order> const& null_precedence,
Expand Down
3 changes: 1 addition & 2 deletions cpp/src/groupby/groupby.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -25,7 +25,6 @@
#include <cudf/detail/groupby/group_replace_nulls.hpp>
#include <cudf/detail/groupby/sort_helper.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/structs/utilities.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/dictionary/dictionary_column_view.hpp>
#include <cudf/groupby.hpp>
Expand Down
1 change: 0 additions & 1 deletion cpp/src/groupby/sort/group_rank_scan.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
#include <cudf/detail/aggregation/aggregation.hpp>
#include <cudf/detail/iterator.cuh>
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/structs/utilities.hpp>
#include <cudf/detail/utilities/device_operators.cuh>
#include <cudf/table/experimental/row_operators.cuh>
#include <cudf/utilities/span.hpp>
Expand Down
3 changes: 0 additions & 3 deletions cpp/src/groupby/sort/sort_helper.cu
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
#include <cudf/detail/labeling/label_segments.cuh>
#include <cudf/detail/scatter.hpp>
#include <cudf/detail/sorting.hpp>
#include <cudf/detail/structs/utilities.hpp>
#include <cudf/strings/string_view.hpp>
#include <cudf/table/experimental/row_operators.cuh>
#include <cudf/table/table_device_view.cuh>
Expand Down Expand Up @@ -61,8 +60,6 @@ sort_groupby_helper::sort_groupby_helper(table_view const& keys,
_include_null_keys(include_null_keys),
_null_precedence(null_precedence)
{
using namespace cudf::structs::detail;

// Cannot depend on caller's sorting if the column contains nulls,
// and null values are to be excluded.
// Re-sort the data, to filter out nulls more easily.
Expand Down
10 changes: 5 additions & 5 deletions cpp/src/join/hash_join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ hash_join<Hasher>::hash_join(cudf::table_view const& build,
// need to store off the owning structures for some of the views in _build
_flattened_build_table = structs::detail::flatten_nested_columns(
build, {}, {}, structs::detail::column_nullability::FORCE, stream);
_build = _flattened_build_table;
_build = _flattened_build_table->flattened_columns();

if (_is_empty) { return; }

Expand Down Expand Up @@ -358,7 +358,7 @@ std::size_t hash_join<Hasher>::inner_join_size(cudf::table_view const& probe,

auto flattened_probe = structs::detail::flatten_nested_columns(
probe, {}, {}, structs::detail::column_nullability::FORCE, stream);
auto const flattened_probe_table = flattened_probe.flattened_columns();
auto const flattened_probe_table = flattened_probe->flattened_columns();

auto build_table_ptr = cudf::table_device_view::create(_build, stream);
auto flattened_probe_table_ptr = cudf::table_device_view::create(flattened_probe_table, stream);
Expand All @@ -383,7 +383,7 @@ std::size_t hash_join<Hasher>::left_join_size(cudf::table_view const& probe,

auto flattened_probe = structs::detail::flatten_nested_columns(
probe, {}, {}, structs::detail::column_nullability::FORCE, stream);
auto const flattened_probe_table = flattened_probe.flattened_columns();
auto const flattened_probe_table = flattened_probe->flattened_columns();

auto build_table_ptr = cudf::table_device_view::create(_build, stream);
auto flattened_probe_table_ptr = cudf::table_device_view::create(flattened_probe_table, stream);
Expand All @@ -409,7 +409,7 @@ std::size_t hash_join<Hasher>::full_join_size(cudf::table_view const& probe,

auto flattened_probe = structs::detail::flatten_nested_columns(
probe, {}, {}, structs::detail::column_nullability::FORCE, stream);
auto const flattened_probe_table = flattened_probe.flattened_columns();
auto const flattened_probe_table = flattened_probe->flattened_columns();

auto build_table_ptr = cudf::table_device_view::create(_build, stream);
auto flattened_probe_table_ptr = cudf::table_device_view::create(flattened_probe_table, stream);
Expand Down Expand Up @@ -476,7 +476,7 @@ hash_join<Hasher>::compute_hash_join(cudf::table_view const& probe,

auto flattened_probe = structs::detail::flatten_nested_columns(
probe, {}, {}, structs::detail::column_nullability::FORCE, stream);
auto const flattened_probe_table = flattened_probe.flattened_columns();
auto const flattened_probe_table = flattened_probe->flattened_columns();

CUDF_EXPECTS(_build.num_columns() == flattened_probe_table.num_columns(),
"Mismatch in number of columns to be joined on");
Expand Down
1 change: 0 additions & 1 deletion cpp/src/reductions/simple.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

#include <cudf/detail/copy.hpp>
#include <cudf/detail/reduction.cuh>
#include <cudf/detail/structs/utilities.hpp>
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/dictionary/detail/iterator.cuh>
#include <cudf/dictionary/dictionary_column_view.hpp>
Expand Down
7 changes: 4 additions & 3 deletions cpp/src/reductions/struct_minmax_util.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ auto static constexpr DEFAULT_NULL_ORDER = cudf::null_order::BEFORE;
*/
class comparison_binop_generator {
private:
cudf::structs::detail::flattened_table const flattened_input;
std::unique_ptr<cudf::structs::detail::flattened_table> const flattened_input;
std::unique_ptr<table_device_view, std::function<void(table_device_view*)>> const
d_flattened_input_ptr;
bool const is_min_op;
Expand All @@ -103,13 +103,14 @@ class comparison_binop_generator {
std::vector<null_order>{DEFAULT_NULL_ORDER},
cudf::structs::detail::column_nullability::MATCH_INCOMING,
stream)},
d_flattened_input_ptr{table_device_view::create(flattened_input, stream)},
d_flattened_input_ptr{
table_device_view::create(flattened_input->flattened_columns(), stream)},
is_min_op(is_min_op),
has_nulls{has_nested_nulls(table_view{{input}})},
null_orders_dvec(0, stream)
{
if (is_min_op) {
null_orders = flattened_input.null_orders();
null_orders = flattened_input->null_orders();
// If the input column has nulls (at the top level), null structs are excluded from the
// operations, and that is equivalent to considering top-level nulls as larger than all other
// non-null STRUCT elements (if finding for ARGMIN), or smaller than all other non-null STRUCT
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/search/contains_table.cu
Original file line number Diff line number Diff line change
Expand Up @@ -329,8 +329,8 @@ rmm::device_uvector<bool> contains_without_lists_or_nans(table_view const& hayst
structs::detail::flatten_nested_columns(haystack, {}, {}, flatten_nullability, stream);
auto const needles_flattened_tables =
structs::detail::flatten_nested_columns(needles, {}, {}, flatten_nullability, stream);
auto const haystack_flattened = haystack_flattened_tables.flattened_columns();
auto const needles_flattened = needles_flattened_tables.flattened_columns();
auto const haystack_flattened = haystack_flattened_tables->flattened_columns();
auto const needles_flattened = needles_flattened_tables->flattened_columns();
auto const haystack_tdv_ptr = table_device_view::create(haystack_flattened, stream);
auto const needles_tdv_ptr = table_device_view::create(needles_flattened, stream);

Expand Down
1 change: 0 additions & 1 deletion cpp/src/sort/is_sorted.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
*/

#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/structs/utilities.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/table/experimental/row_operators.cuh>
#include <cudf/table/table_device_view.cuh>
Expand Down
3 changes: 1 addition & 2 deletions cpp/src/sort/sort_impl.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -18,7 +18,6 @@

#include <cudf/column/column_factories.hpp>
#include <cudf/detail/gather.hpp>
#include <cudf/detail/structs/utilities.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/table/experimental/row_operators.cuh>
#include <cudf/table/table_device_view.cuh>
Expand Down
31 changes: 19 additions & 12 deletions cpp/src/structs/utilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,23 +193,30 @@ struct table_flattener {
}
}

return flattened_table{table_view{flat_columns},
std::move(flat_column_order),
std::move(flat_null_precedence),
std::move(validity_as_column),
std::move(nullable_data)};
return std::make_unique<flattened_table>(table_view{flat_columns},
std::move(flat_column_order),
std::move(flat_null_precedence),
std::move(validity_as_column),
std::move(nullable_data));
}
};

flattened_table flatten_nested_columns(table_view const& input,
std::vector<order> const& column_order,
std::vector<null_order> const& null_precedence,
column_nullability nullability,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
std::unique_ptr<flattened_table> flatten_nested_columns(
table_view const& input,
std::vector<order> const& column_order,
std::vector<null_order> const& null_precedence,
column_nullability nullability,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto const has_struct = std::any_of(input.begin(), input.end(), is_struct);
if (not has_struct) { return flattened_table{input, column_order, null_precedence, {}, {}}; }
if (not has_struct) {
return std::make_unique<flattened_table>(input,
column_order,
null_precedence,
std::vector<std::unique_ptr<column>>{},
temporary_nullable_data{});
}

return table_flattener{input, column_order, null_precedence, nullability, stream, mr}();
}
Expand Down
60 changes: 26 additions & 34 deletions cpp/tests/structs/utilities_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,11 @@ TYPED_TEST(TypedStructUtilitiesTest, ListsAtTopLevel)
auto lists_col = lists{{0, 1}, {22, 33}, {44, 55, 66}};
auto nums_col = nums{{0, 1, 2}, cudf::test::iterators::null_at(6)};

auto table = cudf::table_view{{lists_col, nums_col}};
auto table = cudf::table_view{{lists_col, nums_col}};
auto flattened_table = cudf::structs::detail::flatten_nested_columns(
table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream());

CUDF_TEST_EXPECT_TABLES_EQUAL(
table,
cudf::structs::detail::flatten_nested_columns(
table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()));
CUDF_TEST_EXPECT_TABLES_EQUAL(table, flattened_table->flattened_columns());
}

TYPED_TEST(TypedStructUtilitiesTest, NestedListsUnsupported)
Expand Down Expand Up @@ -91,12 +90,11 @@ TYPED_TEST(TypedStructUtilitiesTest, NoStructs)
{"", "1", "22", "333", "4444", "55555", "666666"}, cudf::test::iterators::null_at(1)};
auto nuther_nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, cudf::test::iterators::null_at(6)};

auto table = cudf::table_view{{nums_col, strings_col, nuther_nums_col}};
auto table = cudf::table_view{{nums_col, strings_col, nuther_nums_col}};
auto flattened_table = cudf::structs::detail::flatten_nested_columns(
table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream());

CUDF_TEST_EXPECT_TABLES_EQUAL(
table,
cudf::structs::detail::flatten_nested_columns(
table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()));
CUDF_TEST_EXPECT_TABLES_EQUAL(table, flattened_table->flattened_columns());
}

TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStruct)
Expand All @@ -120,10 +118,9 @@ TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStruct)
auto expected = cudf::table_view{
{expected_nums_col_1, expected_structs_col, expected_nums_col_2, expected_strings_col}};

CUDF_TEST_EXPECT_TABLES_EQUAL(
expected,
cudf::structs::detail::flatten_nested_columns(
table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()));
auto flattened_table = cudf::structs::detail::flatten_nested_columns(
table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected, flattened_table->flattened_columns());
}

TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStructWithNulls)
Expand All @@ -149,10 +146,9 @@ TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStructWithNulls)
auto expected = cudf::table_view{
{expected_nums_col_1, expected_structs_col, expected_nums_col_2, expected_strings_col}};

CUDF_TEST_EXPECT_TABLES_EQUAL(
expected,
cudf::structs::detail::flatten_nested_columns(
table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()));
auto flattened_table = cudf::structs::detail::flatten_nested_columns(
table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected, flattened_table->flattened_columns());
}

TYPED_TEST(TypedStructUtilitiesTest, StructOfStruct)
Expand Down Expand Up @@ -189,10 +185,9 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStruct)
expected_nums_col_3,
expected_strings_col}};

CUDF_TEST_EXPECT_TABLES_EQUAL(
expected,
cudf::structs::detail::flatten_nested_columns(
table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()));
auto flattened_table = cudf::structs::detail::flatten_nested_columns(
table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected, flattened_table->flattened_columns());
}

TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtLeafLevel)
Expand Down Expand Up @@ -230,10 +225,9 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtLeafLevel)
expected_nums_col_3,
expected_strings_col}};

CUDF_TEST_EXPECT_TABLES_EQUAL(
expected,
cudf::structs::detail::flatten_nested_columns(
table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()));
auto flattened_table = cudf::structs::detail::flatten_nested_columns(
table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected, flattened_table->flattened_columns());
}

TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtTopLevel)
Expand Down Expand Up @@ -272,10 +266,9 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtTopLevel)
expected_nums_col_3,
expected_strings_col}};

CUDF_TEST_EXPECT_TABLES_EQUAL(
expected,
cudf::structs::detail::flatten_nested_columns(
table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()));
auto flattened_table = cudf::structs::detail::flatten_nested_columns(
table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected, flattened_table->flattened_columns());
}

TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtAllLevels)
Expand Down Expand Up @@ -314,10 +307,9 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtAllLevels)
expected_nums_col_3,
expected_strings_col}};

CUDF_TEST_EXPECT_TABLES_EQUAL(
expected,
cudf::structs::detail::flatten_nested_columns(
table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()));
auto flattened_table = cudf::structs::detail::flatten_nested_columns(
table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected, flattened_table->flattened_columns());
}

TYPED_TEST(TypedStructUtilitiesTest, ListsAreUnsupported)
Expand Down

0 comments on commit 3048791

Please sign in to comment.