Skip to content

Commit

Permalink
[Exec](expr) Opt the compound pred performace (apache#45414)
Browse files Browse the repository at this point in the history
before:
```
 mysqlslap -hd3 -uroot -P9130  --create-schema=test_db2 -c 10 -i 500 -q "SELECT count(k) FROM sbtest1_dup WHERE k BETWEEN 4850578 AND 8454295 OR k BETWEEN 8776291 AND 29749077;"
Benchmark
	Average number of seconds to run all queries: 0.041 seconds
	Minimum number of seconds to run all queries: 0.037 seconds
	Maximum number of seconds to run all queries: 0.115 seconds
	Number of clients running queries: 10
	Average number of queries per client: 1
```

after:
```
mysqlslap -hd3 -uroot -P9030  --create-schema=test_db -c 10 -i 500 -q "SELECT count(k) FROM sbtest1 WHERE k BETWEEN 4850578 AND 8454295 OR k BETWEEN 8776291 AND 29749077;"
Benchmark
	Average number of seconds to run all queries: 0.029 seconds
	Minimum number of seconds to run all queries: 0.027 seconds
	Maximum number of seconds to run all queries: 0.034 seconds
	Number of clients running queries: 10
	Average number of queries per client: 1
```
  • Loading branch information
HappenLee authored Dec 16, 2024
1 parent b74b5c6 commit 0c97e04
Showing 1 changed file with 60 additions and 22 deletions.
82 changes: 60 additions & 22 deletions be/src/vec/exprs/vcompound_pred.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,12 +158,15 @@ class VCompoundPred : public VectorizedFnCall {
if (_can_fast_execute && fast_execute(context, block, result_column_id)) {
return Status::OK();
}
if (get_num_children() == 1 || !_all_child_is_compound_and_not_const()) {
if (get_num_children() == 1 || _has_const_child()) {
return VectorizedFnCall::execute(context, block, result_column_id);
}

int lhs_id = -1;
int rhs_id = -1;
bool lhs_mem_can_reuse = _children[0]->is_compound_predicate();
bool rhs_mem_can_reuse = _children[1]->is_compound_predicate();

RETURN_IF_ERROR(_children[0]->execute(context, block, &lhs_id));
ColumnPtr lhs_column =
block->get_by_position(lhs_id).column->convert_to_full_column_if_const();
Expand Down Expand Up @@ -210,13 +213,22 @@ class VCompoundPred : public VectorizedFnCall {
return Status::OK();
};

auto return_result_column_id = [&](ColumnPtr res_column, int res_id) -> int {
auto return_result_column_id = [&](ColumnPtr res_column, int res_id,
bool mem_reuse) -> int {
if (!mem_reuse) {
res_column = res_column->clone_resized(size);
}

if (result_is_nullable && !res_column->is_nullable()) {
auto result_column =
ColumnNullable::create(res_column, ColumnUInt8::create(size, 0));
res_id = block->columns();
block->insert({std::move(result_column), _data_type, _expr_name});
} else if (!mem_reuse) {
res_id = block->columns();
block->insert({std::move(res_column), _data_type, _expr_name});
}

return res_id;
};

Expand All @@ -231,6 +243,33 @@ class VCompoundPred : public VectorizedFnCall {
return null_map_data;
};

auto vector_vector = [&]<bool is_and_op>() {
if (lhs_mem_can_reuse) {
*result_column_id = lhs_id;
} else if (rhs_mem_can_reuse) {
*result_column_id = rhs_id;

auto tmp_column = rhs_data_column;
rhs_data_column = lhs_data_column;
lhs_data_column = tmp_column;
} else {
*result_column_id = block->columns();

auto col_res = lhs_column->clone_resized(size);
lhs_data_column = assert_cast<ColumnUInt8*>(col_res.get())->get_data().data();
block->insert({std::move(col_res), _data_type, _expr_name});
}

if constexpr (is_and_op) {
for (size_t i = 0; i < size; ++i) {
lhs_data_column[i] &= rhs_data_column[i];
}
} else {
for (size_t i = 0; i < size; ++i) {
lhs_data_column[i] |= rhs_data_column[i];
}
}
};
auto vector_vector_null = [&]<bool is_and_op>() {
auto col_res = ColumnUInt8::create(size);
auto col_nulls = ColumnUInt8::create(size);
Expand Down Expand Up @@ -269,28 +308,28 @@ class VCompoundPred : public VectorizedFnCall {
//2. nullable column: null map all is not null
if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false && lhs_all_is_not_null)) {
// false and any = false, return lhs
*result_column_id = return_result_column_id(lhs_column, lhs_id);
*result_column_id = return_result_column_id(lhs_column, lhs_id, lhs_mem_can_reuse);
} else {
RETURN_IF_ERROR(get_rhs_colum());

if ((lhs_all_true && !lhs_is_nullable) || //not null column
(lhs_all_true && lhs_all_is_not_null)) { //nullable column
// true and any = any, return rhs
*result_column_id = return_result_column_id(rhs_column, rhs_id);
*result_column_id =
return_result_column_id(rhs_column, rhs_id, rhs_mem_can_reuse);
} else if ((rhs_all_false && !rhs_is_nullable) ||
(rhs_all_false && rhs_all_is_not_null)) {
// any and false = false, return rhs
*result_column_id = return_result_column_id(rhs_column, rhs_id);
*result_column_id =
return_result_column_id(rhs_column, rhs_id, rhs_mem_can_reuse);
} else if ((rhs_all_true && !rhs_is_nullable) ||
(rhs_all_true && rhs_all_is_not_null)) {
// any and true = any, return lhs
*result_column_id = return_result_column_id(lhs_column, lhs_id);
*result_column_id =
return_result_column_id(lhs_column, lhs_id, lhs_mem_can_reuse);
} else {
if (!result_is_nullable) {
*result_column_id = lhs_id;
for (size_t i = 0; i < size; i++) {
lhs_data_column[i] &= rhs_data_column[i];
}
vector_vector.template operator()<true>();
} else {
vector_vector_null.template operator()<true>();
}
Expand All @@ -301,26 +340,26 @@ class VCompoundPred : public VectorizedFnCall {
// false or NULL ----> NULL
if ((lhs_all_true && !lhs_is_nullable) || (lhs_all_true && lhs_all_is_not_null)) {
// true or any = true, return lhs
*result_column_id = return_result_column_id(lhs_column, lhs_id);
*result_column_id = return_result_column_id(lhs_column, lhs_id, lhs_mem_can_reuse);
} else {
RETURN_IF_ERROR(get_rhs_colum());
if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false && lhs_all_is_not_null)) {
// false or any = any, return rhs
*result_column_id = return_result_column_id(rhs_column, rhs_id);
*result_column_id =
return_result_column_id(rhs_column, rhs_id, rhs_mem_can_reuse);
} else if ((rhs_all_true && !rhs_is_nullable) ||
(rhs_all_true && rhs_all_is_not_null)) {
// any or true = true, return rhs
*result_column_id = return_result_column_id(rhs_column, rhs_id);
*result_column_id =
return_result_column_id(rhs_column, rhs_id, rhs_mem_can_reuse);
} else if ((rhs_all_false && !rhs_is_nullable) ||
(rhs_all_false && rhs_all_is_not_null)) {
// any or false = any, return lhs
*result_column_id = return_result_column_id(lhs_column, lhs_id);
*result_column_id =
return_result_column_id(lhs_column, lhs_id, lhs_mem_can_reuse);
} else {
if (!result_is_nullable) {
*result_column_id = lhs_id;
for (size_t i = 0; i < size; i++) {
lhs_data_column[i] |= rhs_data_column[i];
}
vector_vector.template operator()<false>();
} else {
vector_vector_null.template operator()<false>();
}
Expand All @@ -345,10 +384,9 @@ class VCompoundPred : public VectorizedFnCall {
return (l_null & r_null) | (r_null & (r_null ^ a)) | (l_null & (l_null ^ b));
}

bool _all_child_is_compound_and_not_const() const {
return std::ranges::all_of(_children, [](const VExprSPtr& arg) -> bool {
return arg->is_compound_predicate() && !arg->is_constant();
});
bool _has_const_child() const {
return std::ranges::any_of(_children,
[](const VExprSPtr& arg) -> bool { return arg->is_constant(); });
}

std::pair<uint8*, uint8*> _get_raw_data_and_null_map(ColumnPtr column,
Expand Down

0 comments on commit 0c97e04

Please sign in to comment.