Skip to content

Commit

Permalink
Fix empty reduce with List output and non-List input (#10435)
Browse files Browse the repository at this point in the history
For certain aggregation operations, such as: `collect_list` or `collect_set`, their output types are List, while their input might not be List.  We need to handcraft the the default scalar for this scenario, since `make_default_constructed_scalar` can not create list scalars.

Authors:
  - Alfred Xu (https://github.com/sperlingxx)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)

URL: #10435
  • Loading branch information
sperlingxx authored Mar 17, 2022
1 parent 87180ce commit 94e9f58
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 0 deletions.
9 changes: 9 additions & 0 deletions cpp/src/reductions/reductions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,17 @@ std::unique_ptr<scalar> reduce(
// handcraft the default scalar with input column.
if (col.size() <= col.null_count()) {
if (col.type().id() == type_id::EMPTY || col.type() != output_dtype) {
// Under some circumstance, the output type will become the List of input type,
// such as: collect_list or collect_set. So, we have to handcraft the default scalar.
if (output_dtype.id() == type_id::LIST) {
auto scalar = make_list_scalar(empty_like(col)->view(), stream, mr);
scalar->set_valid_async(false, stream);
return scalar;
}

return make_default_constructed_scalar(output_dtype, stream, mr);
}

return make_empty_scalar_like(col, stream, mr);
}

Expand Down
25 changes: 25 additions & 0 deletions cpp/tests/reductions/collect_ops_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -325,4 +325,29 @@ TEST_F(CollectTest, CollectStrings)
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected7, dynamic_cast<list_scalar*>(ret7.get())->view());
}

TEST_F(CollectTest, CollectEmptys)
{
using int_col = cudf::test::fixed_width_column_wrapper<int32_t>;

// test collect empty columns
auto empty = int_col{};
auto ret = cudf::reduce(
empty, make_collect_list_aggregation<reduce_aggregation>(), data_type{type_id::LIST});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(int_col{}, dynamic_cast<list_scalar*>(ret.get())->view());

ret = cudf::reduce(
empty, make_collect_set_aggregation<reduce_aggregation>(), data_type{type_id::LIST});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(int_col{}, dynamic_cast<list_scalar*>(ret.get())->view());

// test collect all null columns
auto all_nulls = int_col{{1, 2, 3, 4, 5}, {0, 0, 0, 0, 0}};
ret = cudf::reduce(
all_nulls, make_collect_list_aggregation<reduce_aggregation>(), data_type{type_id::LIST});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(int_col{}, dynamic_cast<list_scalar*>(ret.get())->view());

ret = cudf::reduce(
all_nulls, make_collect_set_aggregation<reduce_aggregation>(), data_type{type_id::LIST});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(int_col{}, dynamic_cast<list_scalar*>(ret.get())->view());
}

} // namespace cudf::test

0 comments on commit 94e9f58

Please sign in to comment.