Skip to content

Commit

Permalink
Fix struct binary search and struct flattening (#8268)
Browse files Browse the repository at this point in the history
This PR fixes several bugs. In particular:
 * Fixes a bug in struct binary search that only check for null elements at the top level.
 * Fixes a bug in struct flattening that uses default null order for the children of the input structs columns.

Unit tests for struct binary search and struct sorting are also rewritten.
Closes #8189.

Authors:
  - Nghia Truong (https://github.com/ttnghia)

Approvers:
  - Robert (Bobby) Evans (https://github.com/revans2)
  - Jake Hemstad (https://github.com/jrhemstad)
  - Mark Harris (https://github.com/harrism)

URL: #8268
  • Loading branch information
ttnghia authored May 24, 2021
1 parent 6dbf2d5 commit dd5eecd
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 44 deletions.
14 changes: 12 additions & 2 deletions cpp/include/cudf/table/table_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,9 +257,19 @@ class mutable_table_view : public detail::table_view_base<mutable_column_view> {
mutable_table_view(std::vector<mutable_table_view> const& views);
};

inline bool has_nulls(table_view view)
inline bool has_nulls(table_view const& view)
{
return std::any_of(view.begin(), view.end(), [](column_view col) { return col.has_nulls(); });
return std::any_of(view.begin(), view.end(), [](auto const& col) { return col.has_nulls(); });
}

inline bool has_nested_nulls(table_view const& input)
{
return std::any_of(input.begin(), input.end(), [](auto const& col) {
return col.has_nulls() ||
std::any_of(col.child_begin(), col.child_end(), [](auto const& child_col) {
return has_nested_nulls(table_view{{child_col}});
});
});
}

/**
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/search/search.cu
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ std::unique_ptr<column> search_ordered(table_view const& t,
auto const matched = dictionary::detail::match_dictionaries({t, values}, stream);

// Prepare to flatten the structs column
auto const has_null_elements = has_nulls(t) or has_nulls(values);
auto const has_null_elements = has_nested_nulls(t) or has_nested_nulls(values);
auto const flatten_nullability = has_null_elements
? structs::detail::column_nullability::FORCE
: structs::detail::column_nullability::MATCH_INCOMING;
Expand Down
6 changes: 2 additions & 4 deletions cpp/src/structs/utilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,13 +107,11 @@ struct flattened_table {
for (decltype(col.num_children()) i = 0; i < col.num_children(); ++i) {
auto const& child = col.get_sliced_child(i);
if (child.type().id() == type_id::STRUCT) {
flatten_struct_column(structs_column_view{child}, col_order, null_order::BEFORE);
// default spark behaviour is null_order::BEFORE
flatten_struct_column(structs_column_view{child}, col_order, col_null_order);
} else {
flat_columns.push_back(child);
if (not column_order.empty()) flat_column_order.push_back(col_order);
if (not null_precedence.empty()) flat_null_precedence.push_back(null_order::BEFORE);
// default spark behaviour is null_order::BEFORE
if (not null_precedence.empty()) flat_null_precedence.push_back(col_null_order);
}
}
}
Expand Down
20 changes: 10 additions & 10 deletions cpp/tests/search/search_struct_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,8 @@ TYPED_TEST(TypedStructSearchTest, SimpleInputWithNullsTests)
structs_t = structs_col{{child_col_t}, null_at(10)}.release();
results =
search_bounds(structs_t, structs_values, {cudf::order::ASCENDING}, {cudf::null_order::AFTER});
expected_lower_bound = int32s_col{1, 0, 10, 10, 2, 10};
expected_upper_bound = int32s_col{2, 0, 10, 11, 6, 10};
expected_lower_bound = int32s_col{1, 9, 9, 10, 2, 9};
expected_upper_bound = int32s_col{2, 10, 9, 11, 6, 9};
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, results.first->view(), print_all);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), print_all);

Expand All @@ -187,8 +187,8 @@ TYPED_TEST(TypedStructSearchTest, SimpleInputWithNullsTests)
structs_t = structs_col{{child_col_t}, null_at(10)}.release();
results =
search_bounds(structs_t, structs_values, {cudf::order::DESCENDING}, {cudf::null_order::AFTER});
expected_lower_bound = int32s_col{7, 11, 0, 0, 3, 0};
expected_upper_bound = int32s_col{8, 11, 0, 0, 7, 0};
expected_lower_bound = int32s_col{7, 0, 0, 0, 3, 0};
expected_upper_bound = int32s_col{8, 0, 0, 0, 7, 0};
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, results.first->view(), print_all);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), print_all);
}
Expand All @@ -214,8 +214,8 @@ TYPED_TEST(TypedStructSearchTest, SimpleInputWithValuesHavingNullsTests)
// Sorted asc, search nulls last
results =
search_bounds(structs_t, structs_values, {cudf::order::ASCENDING}, {cudf::null_order::AFTER});
expected_lower_bound = int32s_col{3, 0, 11, 11, 4, 11};
expected_upper_bound = int32s_col{4, 0, 11, 11, 8, 11};
expected_lower_bound = int32s_col{3, 11, 11, 11, 4, 11};
expected_upper_bound = int32s_col{4, 11, 11, 11, 8, 11};
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, results.first->view(), print_all);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), print_all);

Expand All @@ -232,8 +232,8 @@ TYPED_TEST(TypedStructSearchTest, SimpleInputWithValuesHavingNullsTests)
// Sorted dsc, search nulls last
results =
search_bounds(structs_t, structs_values, {cudf::order::DESCENDING}, {cudf::null_order::AFTER});
expected_lower_bound = int32s_col{7, 11, 0, 0, 3, 0};
expected_upper_bound = int32s_col{8, 11, 0, 0, 7, 0};
expected_lower_bound = int32s_col{7, 0, 0, 0, 3, 0};
expected_upper_bound = int32s_col{8, 0, 0, 0, 7, 0};
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, results.first->view(), print_all);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), print_all);
}
Expand Down Expand Up @@ -261,8 +261,8 @@ TYPED_TEST(TypedStructSearchTest, SimpleInputWithTargetHavingNullsTests)
structs_t = structs_col{{child_col_t}, null_at(10)}.release();
results =
search_bounds(structs_t, structs_values, {cudf::order::ASCENDING}, {cudf::null_order::AFTER});
expected_lower_bound = int32s_col{1, 0, 10, 0, 2, 10};
expected_upper_bound = int32s_col{2, 1, 10, 1, 6, 10};
expected_lower_bound = int32s_col{1, 0, 9, 0, 2, 9};
expected_upper_bound = int32s_col{2, 1, 9, 1, 6, 9};
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, results.first->view(), print_all);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), print_all);

Expand Down
61 changes: 34 additions & 27 deletions cpp/tests/sort/sort_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,16 @@ void run_sort_test(table_view input,
CUDF_TEST_EXPECT_TABLES_EQUAL(expected_sort_by_key_table->view(), got_sort_by_key_table->view());
}

using TestTypes = cudf::test::Concat<cudf::test::IntegralTypesNotBool,
cudf::test::FloatingPointTypes,
cudf::test::DurationTypes,
cudf::test::TimestampTypes>;

template <typename T>
struct Sort : public BaseFixture {
};

TYPED_TEST_CASE(Sort, NumericTypes);
TYPED_TEST_CASE(Sort, TestTypes);

TYPED_TEST(Sort, WithNullMax)
{
Expand Down Expand Up @@ -461,14 +466,14 @@ TYPED_TEST(Sort, WithStructColumnCombinations)
+------------+ +------------+ +------------+ +------------+
| s| | s| | s| | s|
+------------+ +------------+ +------------+ +------------+
2 | null| 1 | {1, null}| 2 | null| 3 |{null, null}|
4 | null| 0 | {0, null}| 4 | null| 5 |{null, null}|
1 | {1, null}| 6 | {null, 1}| 3 |{null, null}| 7 | {null, 0}|
0 | {0, null}| 7 | {null, 0}| 5 |{null, null}| 6 | {null, 1}|
6 | {null, 1}| 3 |{null, null}| 7 | {null, 0}| 0 | {0, null}|
7 | {null, 0}| 5 |{null, null}| 6 | {null, 1}| 1 | {1, null}|
3 |{null, null}| 2 | null| 0 | {0, null}| 2 | null|
5 |{null, null}| 4 | null| 1 | {1, null}| 4 | null|
2 | null| 1 | {1, null}| 2 | null| 0 | {0, null}|
4 | null| 0 | {0, null}| 4 | null| 1 | {1, null}|
3 |{null, null}| 6 | {null, 1}| 3 |{null, null}| 7 | {null, 0}|
5 |{null, null}| 7 | {null, 0}| 5 |{null, null}| 6 | {null, 1}|
6 | {null, 1}| 3 |{null, null}| 7 | {null, 0}| 3 |{null, null}|
7 | {null, 0}| 5 |{null, null}| 6 | {null, 1}| 5 |{null, null}|
1 | {1, null}| 2 | null| 0 | {0, null}| 2 | null|
0 | {0, null}| 4 | null| 1 | {1, null}| 4 | null|
+------------+ +------------+ +------------+ +------------+
*/
// clang-format on
Expand All @@ -477,7 +482,7 @@ TYPED_TEST(Sort, WithStructColumnCombinations)
std::vector<order> column_order1{order::DESCENDING};

// desc_nulls_first
fixed_width_column_wrapper<int32_t> expected1{{2, 4, 1, 0, 6, 7, 3, 5}};
fixed_width_column_wrapper<int32_t> expected1{{2, 4, 3, 5, 6, 7, 1, 0}};
auto got = sorted_order(input, column_order1, {null_order::AFTER});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, got->view());
// Run test for sort and sort_by_key
Expand All @@ -499,7 +504,7 @@ TYPED_TEST(Sort, WithStructColumnCombinations)
run_sort_test(input, expected3, column_order2, {null_order::BEFORE});

// asce_nulls_last
fixed_width_column_wrapper<int32_t> expected4{{3, 5, 7, 6, 0, 1, 2, 4}};
fixed_width_column_wrapper<int32_t> expected4{{0, 1, 7, 6, 3, 5, 2, 4}};
got = sorted_order(input, column_order2, {null_order::AFTER});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected4, got->view());
// Run test for sort and sort_by_key
Expand Down Expand Up @@ -534,14 +539,14 @@ TYPED_TEST(Sort, WithStructColumnCombinationsWithoutNulls)
+------------+ +------------+
| s| | s|
+------------+ +------------+
2 | {9, 9}| 3 |{null, null}|
4 | {9, 9}| 5 |{null, null}|
1 | {1, null}| 7 | {null, 0}|
0 | {0, null}| 6 | {null, 1}|
6 | {null, 1}| 0 | {0, null}|
7 | {null, 0}| 1 | {1, null}|
3 |{null, null}| 2 | {9, 9}|
5 |{null, null}| 4 | {9, 9}|
3 |{null, null}| 0 | {0, null}|
5 |{null, null}| 1 | {1, null}|
6 | {null, 1}| 2 | {9, 9}|
7 | {null, 0}| 4 | {9, 9}|
2 | {9, 9}| 7 | {null, 0}|
4 | {9, 9}| 6 | {null, 1}|
1 | {1, null}| 3 |{null, null}|
0 | {0, null}| 5 |{null, null}|
+------------+ +------------+
*/
// clang-format on
Expand All @@ -550,31 +555,33 @@ TYPED_TEST(Sort, WithStructColumnCombinationsWithoutNulls)
std::vector<order> column_order{order::DESCENDING};

// desc_nulls_first
fixed_width_column_wrapper<int32_t> expected1{{2, 4, 1, 0, 6, 7, 3, 5}};
fixed_width_column_wrapper<int32_t> expected1{{3, 5, 6, 7, 2, 4, 1, 0}};
auto got = sorted_order(input, column_order, {null_order::AFTER});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, got->view());
// Run test for sort and sort_by_key
run_sort_test(input, expected1, column_order, {null_order::AFTER});

// desc_nulls_last
fixed_width_column_wrapper<int32_t> expected2{{2, 4, 1, 0, 6, 7, 3, 5}};
got = sorted_order(input, column_order, {null_order::BEFORE});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, got->view());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, got->view());
// Run test for sort and sort_by_key
run_sort_test(input, expected1, column_order, {null_order::BEFORE});
run_sort_test(input, expected2, column_order, {null_order::BEFORE});

// asce_nulls_first
std::vector<order> column_order2{order::ASCENDING};
fixed_width_column_wrapper<int32_t> expected2{{3, 5, 7, 6, 0, 1, 2, 4}};
fixed_width_column_wrapper<int32_t> expected3{{3, 5, 7, 6, 0, 1, 2, 4}};
got = sorted_order(input, column_order2, {null_order::BEFORE});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, got->view());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected3, got->view());
// Run test for sort and sort_by_key
run_sort_test(input, expected2, column_order2, {null_order::BEFORE});
run_sort_test(input, expected3, column_order2, {null_order::BEFORE});

// asce_nulls_last
fixed_width_column_wrapper<int32_t> expected4{{0, 1, 2, 4, 7, 6, 3, 5}};
got = sorted_order(input, column_order2, {null_order::AFTER});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, got->view());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected4, got->view());
// Run test for sort and sort_by_key
run_sort_test(input, expected2, column_order2, {null_order::AFTER});
run_sort_test(input, expected4, column_order2, {null_order::AFTER});
}

TYPED_TEST(Sort, Stable)
Expand Down

0 comments on commit dd5eecd

Please sign in to comment.