Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

STRUCT column support for cudf::merge. #8422

Merged
merged 7 commits into from
Jun 16, 2021
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions cpp/include/cudf/merge.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018-2020, NVIDIA CORPORATION.
* Copyright (c) 2018-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -65,8 +65,8 @@ namespace cudf {
* (0,'b', GREEN), (1,'a', NULL), (1,'c', RED)
*
* (third column, the "color", just "goes along for the ride";
* meaning is permutted according to the data movements dictated
* by lexicographic ordering of columns 0 and 1);
* meaning it is permuted according to the data movements dictated
* by lexicographic ordering of columns 0 and 1)
*
* with result columns:
*
Expand Down
93 changes: 69 additions & 24 deletions cpp/src/merge/merge.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <cudf/dictionary/detail/merge.hpp>
#include <cudf/dictionary/detail/update_keys.hpp>
#include <cudf/strings/detail/merge.cuh>
#include <cudf/structs/structs_column_view.hpp>
#include <cudf/table/table.hpp>
#include <cudf/table/table_device_view.cuh>

Expand All @@ -49,29 +50,28 @@ using index_type = detail::index_type;
/**
* @brief Merges the bits of two validity bitmasks.
*
* Merges the bits from two column_device_views into the destination column_device_view
* according to `merged_indices` map such that bit `i` in `out_col`
* Merges the bits from two column_device_views into the destination validity buffer
* according to `merged_indices` map such that bit `i` in `out_validity`
* will be equal to bit `thrust::get<1>(merged_indices[i])` from `left_dcol`
* if `thrust::get<0>(merged_indices[i])` equals `side::LEFT`; otherwise,
* from `right_dcol`.
*
* `left_dcol`, `right_dcol` and `out_dcol` must not
* overlap.
* `left_dcol` and `right_dcol` must not overlap.
*
* @tparam left_have_valids Indicates whether left_dcol mask is unallocated (hence, ALL_VALID)
* @tparam right_have_valids Indicates whether right_dcol mask is unallocated (hence ALL_VALID)
* @param[in] left_dcol The left column_device_view whose bits will be merged
* @param[in] right_dcol The right column_device_view whose bits will be merged
* @param[out] out_dcol The output mutable_column_device_view after merging the left and right
* @param[in] num_destination_rows The number of rows in the out_dcol
* @param[out] out_validity The output validity buffer after merging the left and right buffers
* @param[in] num_destination_rows The number of rows in the out_validity buffer
* @param[in] merged_indices The map that indicates the source of the input and index
* to be copied to the output. Length must be equal to `num_destination_rows`
*/
template <bool left_have_valids, bool right_have_valids>
__global__ void materialize_merged_bitmask_kernel(
column_device_view left_dcol,
column_device_view right_dcol,
mutable_column_device_view out_dcol,
bitmask_type* out_validity,
size_type const num_destination_rows,
index_type const* const __restrict__ merged_indices)
{
Expand All @@ -95,10 +95,8 @@ __global__ void materialize_merged_bitmask_kernel(
// bitmask element
bitmask_type const result_mask{__ballot_sync(active_threads, source_bit_is_valid)};

size_type const output_element = word_index(destination_row);

// Only one thread writes output
if (0 == threadIdx.x % warpSize) { out_dcol.set_mask_word(output_element, result_mask); }
if (0 == threadIdx.x % warpSize) { out_validity[word_index(destination_row)] = result_mask; }

destination_row += blockDim.x * gridDim.x;
active_threads = __ballot_sync(active_threads, destination_row < num_destination_rows);
Expand All @@ -107,36 +105,35 @@ __global__ void materialize_merged_bitmask_kernel(

void materialize_bitmask(column_view const& left_col,
column_view const& right_col,
mutable_column_view& out_col,
bitmask_type* out_validity,
size_type out_validity_size,
nvdbaranec marked this conversation as resolved.
Show resolved Hide resolved
index_type const* merged_indices,
rmm::cuda_stream_view stream)
{
constexpr size_type BLOCK_SIZE{256};
detail::grid_1d grid_config{out_col.size(), BLOCK_SIZE};
detail::grid_1d grid_config{out_validity_size, BLOCK_SIZE};

auto p_left_dcol = column_device_view::create(left_col);
auto p_right_dcol = column_device_view::create(right_col);
auto p_out_dcol = mutable_column_device_view::create(out_col);

auto left_valid = *p_left_dcol;
auto right_valid = *p_right_dcol;
auto out_valid = *p_out_dcol;

if (left_col.has_nulls()) {
if (right_col.has_nulls()) {
materialize_merged_bitmask_kernel<true, true>
<<<grid_config.num_blocks, grid_config.num_threads_per_block, 0, stream.value()>>>(
left_valid, right_valid, out_valid, out_col.size(), merged_indices);
left_valid, right_valid, out_validity, out_validity_size, merged_indices);
} else {
materialize_merged_bitmask_kernel<true, false>
<<<grid_config.num_blocks, grid_config.num_threads_per_block, 0, stream.value()>>>(
left_valid, right_valid, out_valid, out_col.size(), merged_indices);
left_valid, right_valid, out_validity, out_validity_size, merged_indices);
}
} else {
if (right_col.has_nulls()) {
materialize_merged_bitmask_kernel<false, true>
<<<grid_config.num_blocks, grid_config.num_threads_per_block, 0, stream.value()>>>(
left_valid, right_valid, out_valid, out_col.size(), merged_indices);
left_valid, right_valid, out_validity, out_validity_size, merged_indices);
} else {
CUDF_FAIL("materialize_merged_bitmask_kernel<false, false>() should never be called.");
}
Expand Down Expand Up @@ -220,8 +217,6 @@ index_vector generate_merged_indices(table_view const& left_table,
return merged_indices;
}

} // namespace

/**
* @brief Generate merged column given row-order of merged tables
* (ordered according to indices of key_cols) and the 2 columns to merge.
Expand Down Expand Up @@ -301,7 +296,8 @@ struct column_merger {
if (lcol.has_nulls() || rcol.has_nulls()) {
// resolve null mask:
//
materialize_bitmask(lcol, rcol, merged_view, row_order_.data(), stream);
materialize_bitmask(
lcol, rcol, merged_view.null_mask(), merged_view.size(), row_order_.data(), stream);
}

return merged_col;
Expand All @@ -327,7 +323,8 @@ std::unique_ptr<column> column_merger::operator()<cudf::string_view>(
mr);
if (lcol.has_nulls() || rcol.has_nulls()) {
auto merged_view = column->mutable_view();
materialize_bitmask(lcol, rcol, merged_view, row_order_.data(), stream);
materialize_bitmask(
lcol, rcol, merged_view.null_mask(), merged_view.size(), row_order_.data(), stream);
}
return column;
}
Expand All @@ -342,17 +339,65 @@ std::unique_ptr<column> column_merger::operator()<cudf::dictionary32>(
{
auto result = cudf::dictionary::detail::merge(
cudf::dictionary_column_view(lcol), cudf::dictionary_column_view(rcol), row_order_, stream, mr);

// set the validity mask
if (lcol.has_nulls() || rcol.has_nulls()) {
auto merged_view = result->mutable_view();
materialize_bitmask(lcol, rcol, merged_view, row_order_.data(), stream);
materialize_bitmask(
lcol, rcol, merged_view.null_mask(), merged_view.size(), row_order_.data(), stream);
}
return result;
}

// specialization for structs
template <>
std::unique_ptr<column> column_merger::operator()<cudf::struct_view>(
column_view const& lcol,
column_view const& rcol,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
// merge each child.
std::vector<std::unique_ptr<column>> merged_children;
merged_children.reserve(lcol.num_children());

structs_column_view lhs(lcol);
structs_column_view rhs(rcol);

column_merger merger{row_order_};
auto iter = thrust::make_counting_iterator(0);
std::transform(
iter, iter + lhs.num_children(), std::back_inserter(merged_children), [&](size_type i) {
return cudf::type_dispatcher<dispatch_storage_type>(
lhs.child(i).type(), merger, lhs.get_sliced_child(i), rhs.get_sliced_child(i), stream, mr);
});

auto const merged_size = lcol.size() + rcol.size();
codereport marked this conversation as resolved.
Show resolved Hide resolved

// materialize the output buffer
rmm::device_buffer validity =
lcol.has_nulls() || rcol.has_nulls()
? create_null_mask(merged_size, mask_state::UNINITIALIZED, stream, mr)
: rmm::device_buffer{};
if (lcol.has_nulls() || rcol.has_nulls()) {
materialize_bitmask(lcol,
rcol,
static_cast<bitmask_type*>(validity.data()),
merged_size,
row_order_.data(),
stream);
}
Comment on lines +378 to +389
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should use is_nullable here and let the caller be responsible for introspecting the data. If either of the inputs are nullable, the output should be nullable, regardless of whether there any invalid elements.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's definitely not consistent with how cudf works though. We try and drop validity whenever possible.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's a mixed bag whether we do or not, but I think there's a strong argument for letting the user decide. Everywhere else we strongly prefer to avoid data introspection, and null-counting is data introspection. Switching to is_nullable means we are no longer introspecting the validity.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jrhemstad Pinging Jake.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Approving because there's no point in blocking on this.

Copy link
Contributor

@cwharris cwharris Jun 14, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just want to start a general discussion.

Copy link
Contributor Author

@nvdbaranec nvdbaranec Jun 14, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the meta here is that we should be expecting null count to be already computed more often than not, so at least on the performance side it's theoretically free.


return make_structs_column(merged_size,
std::move(merged_children),
lcol.null_count() + rcol.null_count(),
std::move(validity),
stream,
mr);
}

using table_ptr_type = std::unique_ptr<cudf::table>;

namespace {
table_ptr_type merge(cudf::table_view const& left_table,
cudf::table_view const& right_table,
std::vector<cudf::size_type> const& key_cols,
Expand Down Expand Up @@ -415,7 +460,7 @@ T top_and_pop(std::priority_queue<T>& q)
return moved;
}

} // namespace
} // anonymous namespace

table_ptr_type merge(std::vector<table_view> const& tables_to_merge,
std::vector<cudf::size_type> const& key_cols,
Expand Down
146 changes: 146 additions & 0 deletions cpp/tests/merge/merge_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,152 @@ TEST_F(MergeTest, KeysWithNulls)
}
}

TEST_F(MergeTest, Structs)
{
// clang-format off

cudf::test::fixed_width_column_wrapper<int> t0_col0{0, 2, 4, 6, 8};
cudf::test::strings_column_wrapper t0_scol0{"abc", "def", "ghi", "jkl", "mno"};
cudf::test::fixed_width_column_wrapper<float> t0_scol1{1, 2, 3, 4, 5};
cudf::test::structs_column_wrapper t0_col1({t0_scol0, t0_scol1});

cudf::test::fixed_width_column_wrapper<int> t1_col0{1, 3, 5, 7, 9};
cudf::test::strings_column_wrapper t1_scol0{"pqr", "stu", "vwx", "yzz", "000"};
cudf::test::fixed_width_column_wrapper<float> t1_scol1{-1, -2, -3, -4, -5};
cudf::test::structs_column_wrapper t1_col1({t1_scol0, t1_scol1});

cudf::table_view t0({t0_col0, t0_col1});
cudf::table_view t1({t1_col0, t1_col1});

auto result = cudf::merge({t0, t1}, {0}, {cudf::order::ASCENDING});

cudf::test::fixed_width_column_wrapper<int> e_col0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
cudf::test::strings_column_wrapper e_scol0{"abc", "pqr", "def", "stu", "ghi", "vwx", "jkl", "yzz", "mno", "000"};
cudf::test::fixed_width_column_wrapper<float> e_scol1{1, -1, 2, -2, 3, -3, 4, -4, 5, -5};
cudf::test::structs_column_wrapper e_col1({e_scol0, e_scol1});

cudf::table_view expected({e_col0, e_col1});

CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, *result);

// clang-format on
}

TEST_F(MergeTest, StructsWithNulls)
{
// clang-format off

cudf::test::fixed_width_column_wrapper<int> t0_col0{0, 2, 4, 6, 8};
cudf::test::strings_column_wrapper t0_scol0{{"abc", "def", "ghi", "jkl", "mno"}, {1, 1, 0, 0, 1}};
cudf::test::fixed_width_column_wrapper<float> t0_scol1{{1, 2, 3, 4, 5}, {0, 1, 0, 0, 1}};
cudf::test::structs_column_wrapper t0_col1({t0_scol0, t0_scol1}, {1, 0, 1, 0, 0});

cudf::test::fixed_width_column_wrapper<int> t1_col0{1, 3, 5, 7, 9};
cudf::test::strings_column_wrapper t1_scol0{"pqr", "stu", "vwx", "yzz", "000"};
cudf::test::fixed_width_column_wrapper<float> t1_scol1{{-1, -2, -3, -4, -5}, {1, 1, 1, 0, 0}};
cudf::test::structs_column_wrapper t1_col1({t1_scol0, t1_scol1}, {1, 1, 1, 1, 0});

cudf::table_view t0({t0_col0, t0_col1});
cudf::table_view t1({t1_col0, t1_col1});

auto result = cudf::merge({t0, t1}, {0}, {cudf::order::ASCENDING});

cudf::test::fixed_width_column_wrapper<int> e_col0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
cudf::test::strings_column_wrapper e_scol0{{"abc", "pqr", "def", "stu", "ghi", "vwx", "jkl", "yzz", "mno", "000"},
{1, 1, 0, 1, 0, 1, 0, 1, 0, 1}};
cudf::test::fixed_width_column_wrapper<float> e_scol1{{1, -1, 2, -2, 3, -3, 4, -4, 5, -5},
{0, 1, 0, 1, 0, 1, 0, 0, 0, 0}};
cudf::test::structs_column_wrapper e_col1({e_scol0, e_scol1}, {1, 1, 0, 1, 1, 1, 0, 1, 0, 0});

cudf::table_view expected({e_col0, e_col1});

CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, *result);

// clang-format on
}

TEST_F(MergeTest, StructsNested)
{
// clang-format off

cudf::test::fixed_width_column_wrapper<int> t0_col0{8, 6, 4, 2, 0};
cudf::test::strings_column_wrapper t0_scol0{"mno", "jkl", "ghi", "def", "abc"};
cudf::test::fixed_width_column_wrapper<float> t0_scol1{5, 4, 3, 2, 1};
cudf::test::strings_column_wrapper t0_sscol0{"5555", "4444", "333", "22", "1"};
cudf::test::fixed_width_column_wrapper<float> t0_sscol1{50, 40, 30, 20, 10};
cudf::test::structs_column_wrapper t0_scol2({t0_sscol0, t0_sscol1});
cudf::test::structs_column_wrapper t0_col1({t0_scol0, t0_scol1, t0_scol2});

cudf::test::fixed_width_column_wrapper<int> t1_col0{9, 7, 5, 3, 1};
cudf::test::strings_column_wrapper t1_scol0{"000", "yzz", "vwx", "stu", "pqr"};
cudf::test::fixed_width_column_wrapper<float> t1_scol1{-5, -4, -3, -2, -1};
cudf::test::strings_column_wrapper t1_sscol0{"-5555", "-4444", "-333", "-22", "-1"};
cudf::test::fixed_width_column_wrapper<float> t1_sscol1{-50, -40, -30, -20, -10};
cudf::test::structs_column_wrapper t1_scol2({t1_sscol0, t1_sscol1});
cudf::test::structs_column_wrapper t1_col1({t1_scol0, t1_scol1, t1_scol2});

cudf::table_view t0({t0_col0 , t0_col1});
cudf::table_view t1({t1_col0 , t1_col1});

auto result = cudf::merge({t0, t1}, {0}, {cudf::order::DESCENDING});

cudf::test::fixed_width_column_wrapper<int> e_col0{9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
cudf::test::strings_column_wrapper e_scol0{"000", "mno", "yzz", "jkl", "vwx", "ghi", "stu", "def", "pqr", "abc"};
cudf::test::fixed_width_column_wrapper<float> e_scol1{-5, 5, -4, 4, -3, 3, -2, 2, -1, 1};
cudf::test::strings_column_wrapper e_sscol0{"-5555", "5555", "-4444", "4444", "-333", "333", "-22", "22", "-1", "1"};
cudf::test::fixed_width_column_wrapper<float> e_sscol1{-50, 50, -40, 40, -30, 30, -20, 20, -10, 10};
cudf::test::structs_column_wrapper e_scol2({e_sscol0, e_sscol1});
cudf::test::structs_column_wrapper e_col1({e_scol0, e_scol1, e_scol2});

cudf::table_view expected({e_col0, e_col1});

CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, *result);

// clang-format on
}

TEST_F(MergeTest, StructsNestedWithNulls)
{
// clang-format off

cudf::test::fixed_width_column_wrapper<int> t0_col0{8, 6, 4, 2, 0};
cudf::test::strings_column_wrapper t0_scol0{"mno", "jkl", "ghi", "def", "abc"};
cudf::test::fixed_width_column_wrapper<float> t0_scol1{{5, 4, 3, 2, 1}, {1, 1, 0, 1, 1}};
cudf::test::strings_column_wrapper t0_sscol0{{"5555", "4444", "333", "22", "1"}, {1, 0, 1, 1, 0}};
cudf::test::fixed_width_column_wrapper<float> t0_sscol1{50, 40, 30, 20, 10};
cudf::test::structs_column_wrapper t0_scol2({t0_sscol0, t0_sscol1}, {0, 0, 1, 1, 1});
cudf::test::structs_column_wrapper t0_col1({t0_scol0, t0_scol1, t0_scol2}, {0, 0, 1, 1, 1});

cudf::test::fixed_width_column_wrapper<int> t1_col0{9, 7, 5, 3, 1};
cudf::test::strings_column_wrapper t1_scol0{"000", "yzz", "vwx", "stu", "pqr"};
cudf::test::fixed_width_column_wrapper<float> t1_scol1{{-5, -4, -3, -2, -1}, {1, 1, 1, 0, 1}};
cudf::test::strings_column_wrapper t1_sscol0{{"-5555", "-4444", "-333", "-22", "-1"}, {1, 1, 1, 1, 1}};
cudf::test::fixed_width_column_wrapper<float> t1_sscol1{-50, -40, -30, -20, -10};
cudf::test::structs_column_wrapper t1_scol2({t1_sscol0, t1_sscol1}, {1, 1, 1, 1, 0});
cudf::test::structs_column_wrapper t1_col1({t1_scol0, t1_scol1, t1_scol2});

cudf::table_view t0({t0_col0 , t0_col1});
cudf::table_view t1({t1_col0 , t1_col1});

auto result = cudf::merge({t0, t1}, {0}, {cudf::order::DESCENDING});

cudf::test::fixed_width_column_wrapper<int> e_col0{9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
cudf::test::strings_column_wrapper e_scol0{"000", "mno", "yzz", "jkl", "vwx", "ghi", "stu", "def", "pqr", "abc"};
cudf::test::fixed_width_column_wrapper<float> e_scol1{{-5, 5, -4, 4, -3, 3, -2, 2, -1, 1},
{ 1, 1, 1, 1, 1, 0, 0, 1, 1, 1}};
cudf::test::strings_column_wrapper e_sscol0{{"-5555", "5555", "-4444", "4444", "-333", "333", "-22", "22", "-1", "1"},
{ 1, 0, 1, 0, 1, 1, 1, 1, 0, 0}};
cudf::test::fixed_width_column_wrapper<float> e_sscol1{-50, 50, -40, 40, -30, 30, -20, 20, -10, 10};
cudf::test::structs_column_wrapper e_scol2({e_sscol0, e_sscol1}, {1, 0, 1, 0, 1, 1, 1, 1, 0, 1});
cudf::test::structs_column_wrapper e_col1({e_scol0, e_scol1, e_scol2}, {1, 0, 1, 0, 1, 1, 1, 1, 1, 1});

cudf::table_view expected({e_col0, e_col1});

CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, *result);

// clang-format on
}

template <typename T>
struct FixedPointTestBothReps : public cudf::test::BaseFixture {
};
Expand Down