Skip to content

Commit

Permalink
[feature](array_agg) support array_agg with param is array/map/struct… (
Browse files Browse the repository at this point in the history
#41651)

… (#40697)

this pr we support array_agg function support param with array map
struct type

## Proposed changes

Issue Number: close #xxx

<!--Describe your changes.-->
  • Loading branch information
amorynan authored Oct 10, 2024
1 parent 3120bfb commit 1db0aef
Show file tree
Hide file tree
Showing 5 changed files with 385 additions and 12 deletions.
30 changes: 20 additions & 10 deletions be/src/vec/aggregate_functions/aggregate_function_collect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,18 @@ AggregateFunctionPtr do_create_agg_function_collect(bool distinct, const DataTyp
}
}

if (distinct) {
return creator_without_type::create<AggregateFunctionCollect<
AggregateFunctionCollectSetData<T, HasLimit>, HasLimit, std::false_type>>(
argument_types, result_is_nullable);
} else {
return creator_without_type::create<AggregateFunctionCollect<
AggregateFunctionCollectListData<T, HasLimit>, HasLimit, std::false_type>>(
argument_types, result_is_nullable);
if constexpr (!std::is_same_v<T, void>) {
if (distinct) {
return creator_without_type::create<AggregateFunctionCollect<
AggregateFunctionCollectSetData<T, HasLimit>, HasLimit, std::false_type>>(
argument_types, result_is_nullable);
} else {
return creator_without_type::create<AggregateFunctionCollect<
AggregateFunctionCollectListData<T, HasLimit>, HasLimit, std::false_type>>(
argument_types, result_is_nullable);
}
}
return nullptr;
}

template <typename HasLimit, typename ShowNull>
Expand All @@ -69,15 +72,21 @@ AggregateFunctionPtr create_aggregate_function_collect_impl(const std::string& n
if (which.is_date_or_datetime()) {
return do_create_agg_function_collect<Int64, HasLimit, ShowNull>(distinct, argument_types,
result_is_nullable);
} else if (which.is_date_v2()) {
} else if (which.is_date_v2() || which.is_ipv4()) {
return do_create_agg_function_collect<UInt32, HasLimit, ShowNull>(distinct, argument_types,
result_is_nullable);
} else if (which.is_date_time_v2()) {
} else if (which.is_date_time_v2() || which.is_ipv6()) {
return do_create_agg_function_collect<UInt64, HasLimit, ShowNull>(distinct, argument_types,
result_is_nullable);
} else if (which.is_string()) {
return do_create_agg_function_collect<StringRef, HasLimit, ShowNull>(
distinct, argument_types, result_is_nullable);
} else {
// generic serialize which will not use specializations, ShowNull::value always means array_agg
if constexpr (ShowNull::value) {
return do_create_agg_function_collect<void, HasLimit, ShowNull>(
distinct, argument_types, result_is_nullable);
}
}

LOG(WARNING) << fmt::format("unsupported input type {} for aggregate function {}",
Expand Down Expand Up @@ -107,6 +116,7 @@ AggregateFunctionPtr create_aggregate_function_collect(const std::string& name,
}

void register_aggregate_function_collect_list(AggregateFunctionSimpleFactory& factory) {
// notice: array_agg only differs from collect_list in that array_agg will show null elements in array
factory.register_function_both("collect_list", create_aggregate_function_collect);
factory.register_function_both("collect_set", create_aggregate_function_collect);
factory.register_function_both("array_agg", create_aggregate_function_collect);
Expand Down
72 changes: 70 additions & 2 deletions be/src/vec/aggregate_functions/aggregate_function_collect.h
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,71 @@ struct AggregateFunctionArrayAggData<StringRef> {
}
};

template <>
struct AggregateFunctionArrayAggData<void> {
using ElementType = StringRef;
using Self = AggregateFunctionArrayAggData<void>;
MutableColumnPtr column_data;

AggregateFunctionArrayAggData() {}

AggregateFunctionArrayAggData(const DataTypes& argument_types) {
DataTypePtr column_type = argument_types[0];
column_data = column_type->create_column();
}

void add(const IColumn& column, size_t row_num) { column_data->insert_from(column, row_num); }

void deserialize_and_merge(const IColumn& column, size_t row_num) {
auto& to_arr = assert_cast<const ColumnArray&>(column);
auto& to_nested_col = to_arr.get_data();
auto start = to_arr.get_offsets()[row_num - 1];
auto end = start + to_arr.get_offsets()[row_num] - to_arr.get_offsets()[row_num - 1];
for (auto i = start; i < end; ++i) {
column_data->insert_from(to_nested_col, i);
}
}

void reset() { column_data->clear(); }

void insert_result_into(IColumn& to) const {
auto& to_arr = assert_cast<ColumnArray&>(to);
auto& to_nested_col = to_arr.get_data();
size_t num_rows = column_data->size();
for (size_t i = 0; i < num_rows; ++i) {
to_nested_col.insert_from(*column_data, i);
}
to_arr.get_offsets().push_back(to_nested_col.size());
}

void write(BufferWritable& buf) const {
const size_t size = column_data->size();
write_binary(size, buf);
for (size_t i = 0; i < size; i++) {
write_string_binary(column_data->get_data_at(i), buf);
}
}

void read(BufferReadable& buf) {
size_t size = 0;
read_binary(size, buf);
column_data->reserve(size);

StringRef s;
for (size_t i = 0; i < size; i++) {
read_string_binary(s, buf);
column_data->insert_data(s.data, s.size);
}
}

void merge(const Self& rhs) {
const auto size = rhs.column_data->size();
for (size_t i = 0; i < size; i++) {
column_data->insert_from(*rhs.column_data, i);
}
}
};

//ShowNull is just used to support array_agg because array_agg needs to display NULL
//todo: Supports order by sorting for array_agg
template <typename Data, typename HasLimit, typename ShowNull>
Expand Down Expand Up @@ -539,7 +604,8 @@ class AggregateFunctionCollect

void create(AggregateDataPtr __restrict place) const override {
if constexpr (ShowNull::value) {
if constexpr (IsDecimalNumber<typename Data::ElementType>) {
if constexpr (IsDecimalNumber<typename Data::ElementType> ||
std::is_same_v<Data, AggregateFunctionArrayAggData<void>>) {
new (place) Data(argument_types);
} else {
new (place) Data();
Expand Down Expand Up @@ -708,11 +774,13 @@ class AggregateFunctionCollect

for (size_t i = 0; i < num_rows; ++i) {
col_null->get_null_map_data().push_back(col_src.get_null_map_data()[i]);
if constexpr (std::is_same_v<StringRef, typename Data::ElementType>) {
if constexpr (std::is_same_v<Data, AggregateFunctionArrayAggData<StringRef>>) {
auto& vec = assert_cast<ColumnString&>(col_null->get_nested_column());
const auto& vec_src =
assert_cast<const ColumnString&>(col_src.get_nested_column());
vec.insert_from(vec_src, i);
} else if constexpr (std::is_same_v<Data, AggregateFunctionArrayAggData<void>>) {
to_nested_col.insert_from(col_src.get_nested_column(), i);
} else {
using ColVecType = ColumnVectorOrDecimal<typename Data::ElementType>;
auto& vec = assert_cast<ColVecType&>(col_null->get_nested_column()).get_data();
Expand Down
156 changes: 156 additions & 0 deletions regression-test/data/query_p0/aggregate/array_agg.out
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,159 @@
3 3
3 3

-- !sql_array_agg_array --
1 [["plum", "banana", "apple"], ["grape", "banana", null, "plum", "cherry"], ["apple", "banana", "kiwi", null], ["apple", "banana", "cherry", "kiwi", null], ["cherry", null], null]
10 [null, ["apple", "banana", null, "cherry", "grape"], ["cherry", "berry", null], ["peach"]]
11 [["grape", "apple", "kiwi"], null, null, null]
12 [["melon", "papaya", "kiwi"], ["plum", null, "kiwi", "banana"], ["plum", null, "mango"], ["plum", null]]
13 [["apple", null], null, ["peach", "cherry", "papaya", "kiwi", null], ["plum", null]]
14 [["orange", "mango", "plum"], ["apple", "melon"], ["orange", "grape", null], ["orange", "banana", null]]
15 [null, ["banana", "peach", "plum", null], null, ["strawberry", null]]
16 [["peach", "kiwi", null, "berry"], null, ["plum", "grape", null], ["kiwi", null]]
17 [["banana", "plum", null], ["papaya"], null, ["apple", "kiwi", null, "papaya"]]
18 [["apple", null], null, ["apple", "mango", null], null]
19 [["kiwi", "mango", null], ["pear", "grape"], ["cherry", null, "plum"], ["banana", "mango", "cherry"]]
2 [null, ["apple", null, "banana"], ["orange", "grape", null], null]
20 [["grape", null], ["kiwi", null], ["kiwi", "plum", "orange", null], ["papaya", "orange", "kiwi", null]]
21 [["kiwi", null]]
22 [["orange", "peach", null, "kiwi"]]
23 [["berry", "grape", null]]
24 [null]
25 [["mango", "plum", "apple", null]]
26 [["banana", null]]
27 [["orange", "kiwi", "plum", null]]
28 [null]
29 [["apple", null, "grape", "peach"]]
3 [["mango", null], ["orange"], ["apple", "kiwi", "papaya"], ["plum", "peach", null]]
30 [["kiwi", "banana", null]]
31 [["cherry", "berry", null, "plum"]]
32 [null]
33 [["apple", null, "kiwi", "orange"]]
34 [["grape", "plum", null]]
35 [["banana", null]]
36 [["kiwi", "orange", "plum", null]]
37 [null]
38 [["apple", null]]
39 [["plum", "grape", null]]
4 [["mango", null, "orange", "plum", "berry", "kiwi"], ["orange", "grape", "mango", "berry"], ["plum", "kiwi", null, "peach", "berry"], null]
40 [["banana", "kiwi", "peach", null]]
41 [["grape", null, "plum"]]
42 [["orange", "kiwi", "peach", null]]
43 [null]
44 [["apple", "banana", null]]
45 [["grape", null]]
46 [["plum", "kiwi", null, "orange"]]
47 [null]
48 [["mango", null]]
49 [["kiwi", "plum", "banana", null]]
5 [["peach", "melon", null], ["apple", null, "kiwi"], ["grape", "kiwi", null], null]
50 [null]
6 [null, null, ["cherry", "apple", null, "plum"], null]
7 [["papaya", "cherry", "apple", null], ["melon"], ["melon", null, "papaya", "grape", "kiwi", "berry", null], ["orange", "grape", "kiwi"]]
8 [["plum", "peach", null, "orange"], ["banana", null], ["berry", "cherry"], ["banana", "mango", null]]
9 [["orange", "kiwi", "berry", null, "plum"], ["apple", "kiwi", "plum", null, "mango"], ["kiwi", null], null]

-- !sql_array_agg_map --
1 [{"key5":null}, {"key2":15, "key3":8}, {"key1":10, "key2":5}, {"key1":10, "key2":20}, {"key2":null}, null]
10 [{"key3":5, "key4":null}, {"key1":null, "key6":9}, {"key2":10, "key7":null}, {"key1":10}]
11 [{"key1":9}, {"key4":5, "key5":null}, {"key1":3, "key5":null}, {"key4":null}]
12 [null, {"key4":25}, {"key2":20, "key3":null}, {"key1":null, "key2":5}]
13 [{"key2":null, "key3":7}, null, null, {"key3":null, "key5":10}]
14 [{"key6":5}, {"key5":15, "key6":25}, {"key1":3, "key6":8}, {"key4":3, "key5":null}]
15 [{"key1":18, "key6":22}, {"key2":4}, {"key3":null}, null]
16 [{"key2":20}, {"key2":2}, {"key2":8, "key3":null}, {"key7":7, "key3":null}]
17 [{"key4":8}, {"key6":9, "key7":null}, {"key1":10, "key4":14}, {"key7":null}]
18 [{"key1":11}, {"key1":10, "key2":null}, {"key2":2}, {"key2":null, "key5":10}]
19 [{"key7":9}, {"key1":1, "key2":2, "key3":3}, {"key1":null, "key7":6}, {"key3":7, "key4":null}]
2 [{"key1":null, "key5":25}, {"key1":10, "key2":null, "key3":20}, {"key2":null, "key3":7}, {"key3":null}]
20 [{"key1":null, "key3":6}, {"key1":1, "key9":6}, {"key1":14}, {"key5":3, "key7":null}]
21 [{"key1":10, "key6":2}]
22 [{"key3":null}]
23 [{"key1":8}]
24 [{"key2":15, "key4":null}]
25 [{"key7":18}]
26 [{"key3":12}]
27 [{"key5":10}]
28 [{"key1":14}]
29 [{"key2":4, "key4":null}]
3 [{"key1":12}, {"key1":5}, {"key3":null}, {"key1":5, "key4":null}]
30 [{"key6":6}]
31 [{"key3":null}]
32 [{"key2":9, "key7":null}]
33 [{"key1":7}]
34 [{"key4":20}]
35 [{"key1":12, "key5":null}]
36 [{"key3":11}]
37 [{"key1":null}]
38 [{"key2":3, "key6":9}]
39 [{"key5":8}]
4 [{"key2":30}, null, {"key4":15}, {"key3":7, "key4":null}]
40 [{"key1":15}]
41 [{"key3":7}]
42 [{"key4":5}]
43 [{"key1":2, "key7":null}]
44 [{"key2":14}]
45 [{"key4":12}]
46 [{"key6":10}]
47 [{"key2":null}]
48 [{"key5":9}]
49 [{"key1":13}]
5 [{"key1":10}, {"key1":7, "key2":8}, null, {"key2":8, "key5":null}]
50 [{"key7":8}]
6 [{"key4":7, "key6":null}, {"key1":1, "key2":2, "key3":null, "key4":4}, {"key3":null, "key6":12}, {"key2":null, "key3":25}]
7 [{"key1":12, "key3":6}, null, {"key4":15, "key5":null}, {"key1":5}]
8 [{"key1":6, "key7":12}, {"key2":9}, {"key1":null, "key5":50}, null]
9 [{"key2":null, "key5":40}, null, {"key2":14, "key5":7}, {"key1":10, "key2":20, "key3":30, "key4":40, "key5":50, "key6":60, "key7":null}]

-- !sql_array_agg_struct --
1 [{"id":1}, {"id":1}, {"id":1}, {"id":1}, {"id":1}, null]
10 [{"id":10}, {"id":10}, {"id":10}, {"id":null}]
11 [{"id":11}, {"id":11}, {"id":11}, {"id":null}]
12 [{"id":12}, {"id":12}, {"id":12}, {"id":null}]
13 [{"id":13}, {"id":13}, {"id":13}, {"id":null}]
14 [{"id":14}, {"id":null}, {"id":14}, {"id":null}]
15 [{"id":15}, {"id":null}, {"id":15}, {"id":null}]
16 [{"id":16}, {"id":16}, {"id":16}, {"id":16}]
17 [{"id":17}, {"id":17}, {"id":17}, {"id":17}]
18 [{"id":18}, {"id":null}, {"id":18}, {"id":18}]
19 [{"id":19}, {"id":null}, {"id":19}, {"id":19}]
2 [{"id":2}, {"id":null}, {"id":2}, {"id":2}]
20 [{"id":20}, {"id":20}, {"id":null}, {"id":null}]
21 [{"id":21}]
22 [{"id":22}]
23 [{"id":23}]
24 [{"id":24}]
25 [{"id":25}]
26 [{"id":26}]
27 [{"id":27}]
28 [{"id":28}]
29 [{"id":29}]
3 [{"id":3}, {"id":3}, {"id":3}, {"id":3}]
30 [{"id":30}]
31 [{"id":31}]
32 [{"id":32}]
33 [{"id":33}]
34 [{"id":34}]
35 [{"id":35}]
36 [{"id":36}]
37 [{"id":37}]
38 [{"id":38}]
39 [{"id":39}]
4 [{"id":null}, {"id":4}, {"id":4}, {"id":4}]
40 [{"id":40}]
41 [{"id":41}]
42 [{"id":42}]
43 [{"id":43}]
44 [{"id":44}]
45 [{"id":45}]
46 [{"id":46}]
47 [{"id":47}]
48 [{"id":48}]
49 [{"id":49}]
5 [{"id":5}, {"id":null}, {"id":5}, {"id":5}]
50 [{"id":50}]
6 [{"id":6}, {"id":6}, {"id":6}, {"id":6}]
7 [{"id":null}, {"id":null}, {"id":null}, {"id":7}]
8 [{"id":8}, {"id":8}, {"id":8}, {"id":8}]
9 [{"id":9}, {"id":9}, {"id":9}, {"id":9}]

Loading

0 comments on commit 1db0aef

Please sign in to comment.