Skip to content

Commit

Permalink
fix(agg_ir): avg/sum should check null before return
Browse files Browse the repository at this point in the history
this is also part of #1586
  • Loading branch information
aceforeverd committed Jun 17, 2022
1 parent 94345d3 commit 1e720c6
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 37 deletions.
56 changes: 56 additions & 0 deletions cases/query/udaf_query.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,59 @@ cases:
3, 5, 5, 5, 5, 1
4, 5, 5, 5, 5, 2
5, 5, 5, 5, 5, 3
- id: 3
desc: correctness for codegen loop combining
inputs:
- name: t1
columns:
- id int
- key1 string
- l1 int64
- l2 int64
- f1 float
- d1 double
- ts timestamp
indexs:
- idx:key1:ts
data: |
1, g1, null, -3, null, 2.0, 1590738990000
2, g1, 1, null, 2.0, 2.0, 1590738990001
3, g1, 2, -1, null, 2.0, 1590738990002
4, g2, null, null, 4.4, null, 1590738990002
5, g2, null, -2, 9.0, 4.0, 1590738990003
6, g2, 1, 1, 7.0, 4.0, 1590738990004
7, g2, 1, null, null, null, 1590738990005
8, g2, null, null, null, 7.0, 1590738990006
sql: |
select
id,
key1,
count(id) over w as c1,
count(l1) over w as c2,
max(`l1`) over w as mi,
min(`l2`) over w as ma,
sum(f1) over w as sum,
avg(d1) over w as av
from t1
window w as (partition by `key1` order by `ts` rows_range between 5s open preceding and 0s preceding maxsize 10);
expect:
columns:
- id int
- key1 string
- c1 int64
- c2 int64
- mi int64
- ma int64
- sum float
- av double
order: id
data: |
1, g1, 1, 0, null, -3, null, 2.0
2, g1, 2, 1, 1, -3, 2.0, 2.0
3, g1, 3, 2, 2, -3, 2.0, 2.0
4, g2, 1, 0, null, null, 4.4, null
5, g2, 2, 0, null, -2, 13.4, 4.0
6, g2, 3, 1, 1, -2, 20.4, 4.0
7, g2, 4, 2, 1, -2, 20.4, 4.0
8, g2, 5, 2, 1, -2, 20.4, 5.0
59 changes: 24 additions & 35 deletions hybridse/src/codegen/aggregate_ir_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -232,34 +232,25 @@ class StatisticalAggGenerator {

void GenInitState(::llvm::IRBuilder<>* builder) {
for (size_t i = 0; i < col_num_; ++i) {
// any of min/max/avg/sum/count need the count info
count_state_[i] = GenCountInitState(builder);

if (!sum_idxs_[i].empty()) {
sum_states_[i] = GenSumInitState(builder);
}
if (!avg_idxs_[i].empty()) {
if (col_type_ == ::hybridse::node::kDouble) {
sum_states_[i] = GenSumInitState(builder);
if (sum_states_[i] == nullptr) {
sum_states_[i] = GenSumInitState(builder);
}
} else {
avg_states_[i] = GenAvgInitState(builder);
}
if (count_state_[i] == nullptr) {
count_state_[i] = GenCountInitState(builder);
}
}
if (!count_idxs_[i].empty()) {
if (count_state_[i] == nullptr) {
count_state_[i] = GenCountInitState(builder);
}
}
if (!min_idxs_[i].empty()) {
if (count_state_[i] == nullptr) {
count_state_[i] = GenCountInitState(builder);
}
min_states_[i] = GenMinInitState(builder);
}
if (!max_idxs_[i].empty()) {
if (count_state_[i] == nullptr) {
count_state_[i] = GenCountInitState(builder);
}
max_states_[i] = GenMaxInitState(builder);
}
}
Expand Down Expand Up @@ -334,15 +325,13 @@ class StatisticalAggGenerator {
const std::vector<::llvm::Value*>& inputs,
const std::vector<::llvm::Value*>& is_null) {
for (size_t i = 0; i < col_num_; ++i) {
GenCountUpdate(i, is_null[i], builder);
if (!sum_idxs_[i].empty() || (!avg_idxs_[i].empty() && avg_states_[i] == nullptr)) {
GenSumUpdate(i, inputs[i], is_null[i], builder);
}
if (!avg_idxs_[i].empty() && avg_states_[i] != nullptr) {
GenAvgUpdate(i, inputs[i], is_null[i], builder);
}
if (!avg_idxs_[i].empty() || !count_idxs_[i].empty() || !min_idxs_[i].empty() || !max_idxs_[i].empty()) {
GenCountUpdate(i, is_null[i], builder);
}
if (!min_idxs_[i].empty()) {
GenMinUpdate(i, inputs[i], is_null[i], builder);
}
Expand All @@ -352,49 +341,46 @@ class StatisticalAggGenerator {
}
}

void GenOutputs(::llvm::IRBuilder<>* builder,
std::vector<std::pair<size_t, NativeValue>>* outputs) {
void GenOutputs(::llvm::IRBuilder<>* builder, std::vector<std::pair<size_t, NativeValue>>* outputs) {
for (size_t i = 0; i < col_num_; ++i) {
// cnt always exists
::llvm::Value* cnt = builder->CreateLoad(count_state_[i]);
::llvm::Value* is_empty = builder->CreateICmpEQ(cnt, builder->getInt64(0));

if (!sum_idxs_[i].empty()) {
::llvm::Value* accum = builder->CreateLoad(sum_states_[i]);
for (int idx : sum_idxs_[i]) {
outputs->emplace_back(idx, NativeValue::Create(accum));
outputs->emplace_back(idx, NativeValue::CreateWithFlag(accum, is_empty));
}
}
::llvm::Value* cnt = nullptr;
if (count_state_[i] != nullptr) {
cnt = builder->CreateLoad(count_state_[i]);
}

if (!avg_idxs_[i].empty()) {
::llvm::Type* avg_ty = AggregateIRBuilder::GetOutputLlvmType(
builder->getContext(), "avg", col_type_);
::llvm::Type* avg_ty = AggregateIRBuilder::GetOutputLlvmType(builder->getContext(), "avg", col_type_);
::llvm::Value* sum;
if (avg_states_[i] == nullptr) {
sum = builder->CreateLoad(sum_states_[i]);
} else {
sum = builder->CreateLoad(avg_states_[i]);
}
::llvm::Value* avg = builder->CreateFDiv(
sum, builder->CreateSIToFP(cnt, avg_ty));
::llvm::Value* raw_avg = builder->CreateFDiv(sum, builder->CreateSIToFP(cnt, avg_ty));
for (int idx : avg_idxs_[i]) {
outputs->emplace_back(idx, NativeValue::Create(avg));
outputs->emplace_back(idx, NativeValue::CreateWithFlag(raw_avg, is_empty));
}
}

if (!count_idxs_[i].empty()) {
for (int idx : count_idxs_[i]) {
outputs->emplace_back(idx, NativeValue::Create(cnt));
}
}
::llvm::Value* is_empty = nullptr;
if (cnt != nullptr) {
is_empty = builder->CreateICmpEQ(cnt, builder->getInt64(0));
}

if (!min_idxs_[i].empty()) {
::llvm::Value* accum = builder->CreateLoad(min_states_[i]);
for (int idx : min_idxs_[i]) {
outputs->emplace_back(idx, NativeValue::CreateWithFlag(accum, is_empty));
}
}

if (!max_idxs_[i].empty()) {
::llvm::Value* accum = builder->CreateLoad(max_states_[i]);
for (int idx : max_idxs_[i]) {
Expand Down Expand Up @@ -442,6 +428,9 @@ class StatisticalAggGenerator {
std::vector<::llvm::Value*> avg_states_;
std::vector<::llvm::Value*> min_states_;
std::vector<::llvm::Value*> max_states_;
// the count of non-null values of corresponding column
// represent the result of `count(col)`
// and help sum/avg/min/max to determine if `NULL` should returned
std::vector<::llvm::Value*> count_state_;
};

Expand Down Expand Up @@ -539,7 +528,7 @@ base::Status ScheduleAggGenerators(
StatisticalAggGenerator agg_gen(cur_col_type, agg_gen_col_seq);
for (size_t i = 0; i < agg_gen_col_seq.size(); ++i) {
auto& geninfo = agg_col_infos[agg_gen_col_seq[i]];
geninfo.Show();
DLOG(INFO) << geninfo.DebugString();
for (size_t j = 0; j < geninfo.GetOutputNum(); j++) {
auto& fname = geninfo.agg_funcs[j];
size_t out_idx = geninfo.output_idxs[j];
Expand Down
5 changes: 3 additions & 2 deletions hybridse/src/codegen/aggregate_ir_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ struct AggColumnInfo {

size_t GetOutputNum() const { return output_idxs.size(); }

void Show() {
std::string DebugString() const {
std::stringstream ss;
ss << DataTypeName(col_type) << " " << GetColKey() << ": [";
for (size_t i = 0; i < GetOutputNum(); ++i) {
Expand All @@ -73,7 +73,8 @@ struct AggColumnInfo {
ss << ", ";
}
}
ss << "]\n";
ss << "]";
return ss.str();
}
};

Expand Down

0 comments on commit 1e720c6

Please sign in to comment.