Skip to content

Commit

Permalink
Improve topN with projection pullup optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
kryonix committed Apr 4, 2024
1 parent 4652c82 commit 9b43333
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 62 deletions.
99 changes: 37 additions & 62 deletions src/optimizer/topn_optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
namespace duckdb {

bool TopN::CanOptimize(LogicalOperator &op) {
if (op.type == LogicalOperatorType::LOGICAL_LIMIT &&
op.children[0]->type == LogicalOperatorType::LOGICAL_ORDER_BY) {
if (op.type == LogicalOperatorType::LOGICAL_LIMIT) {
auto &limit = op.Cast<LogicalLimit>();

if (limit.limit_val.Type() != LimitNodeType::CONSTANT_VALUE) {
Expand All @@ -20,13 +19,41 @@ bool TopN::CanOptimize(LogicalOperator &op) {
// we need offset to be either not set (i.e. limit without offset) OR have offset be
return false;
}
return true;

auto child_op = op.children[0].get();

while (child_op->type == LogicalOperatorType::LOGICAL_PROJECTION) {
D_ASSERT(!child_op->children.empty());
child_op = child_op->children[0].get();
}

return child_op->type == LogicalOperatorType::LOGICAL_ORDER_BY;
}
return false;
}

unique_ptr<LogicalOperator> TopN::Optimize(unique_ptr<LogicalOperator> op) {
if (CanOptimize(*op)) {

vector<unique_ptr<LogicalOperator>> projections;

if (op->children[0]->type != LogicalOperatorType::LOGICAL_ORDER_BY) {
// traverse operator tree and collect all projection nodes until we reach
// the order by operator

auto child = std::move(op->children[0]);
// collect all projections until we get to the order by
while (child->type == LogicalOperatorType::LOGICAL_PROJECTION) {
D_ASSERT(!child->children.empty());
auto tmp = std::move(child->children[0]);
projections.push_back(std::move(child));
child = std::move(tmp);
}

// Move order by operator into children of limit operator
op->children[0] = std::move(child);
}

auto &limit = op->Cast<LogicalLimit>();
auto &order_by = (op->children[0])->Cast<LogicalOrder>();
auto limit_val = int64_t(limit.limit_val.GetConstantValue());
Expand All @@ -37,67 +64,15 @@ unique_ptr<LogicalOperator> TopN::Optimize(unique_ptr<LogicalOperator> op) {
auto topn = make_uniq<LogicalTopN>(std::move(order_by.orders), limit_val, offset_val);
topn->AddChild(std::move(order_by.children[0]));
op = std::move(topn);
} else {
// Check if we can push the limit operator down through projection operators
if (op->type == LogicalOperatorType::LOGICAL_LIMIT) {
auto &limit = op->Cast<LogicalLimit>();
if (limit.limit_val.Type() == LimitNodeType::CONSTANT_VALUE &&
limit.offset_val.Type() != LimitNodeType::EXPRESSION_VALUE) {
auto child_op = op->children[0].get();
bool can_optimize = false;

// Check if there are only projection operators between the limit operator
// and an order by operator.
while (child_op && !can_optimize) {
switch (child_op->type) {
case LogicalOperatorType::LOGICAL_PROJECTION:
child_op = child_op->children[0].get();
if (child_op->type == LogicalOperatorType::LOGICAL_ORDER_BY) {
can_optimize = true;
}
break;
default:
child_op = nullptr;
can_optimize = false;
break;
}
}

if (can_optimize) {
// traverse operator tree and collect all projection nodes until we reach
// the order by operator
vector<unique_ptr<LogicalOperator>> projections;
projections.push_back(std::move(op->children[0]));
while (true) {
auto child = projections.back().get();
if (child->type != LogicalOperatorType::LOGICAL_PROJECTION) {
break;
}
projections.push_back(std::move(child->children[0]));
}

// Move order by operator into children of limit operator
auto order_by = std::move(projections.back());
projections.pop_back();
op->children.clear();
op->children.push_back(std::move(order_by));

// reconstruct all projection nodes above limit operator
unique_ptr<LogicalOperator> root = std::move(op);
while (!projections.empty()) {
unique_ptr<LogicalOperator> node = std::move(projections.back());
node->children.clear();
node->children.push_back(std::move(root));
root = std::move(node);
projections.pop_back();
}

// recurse into Optimize function to apply TopN optimization
return Optimize(std::move(root));
}
}
// reconstruct all projection nodes above limit operator
while (!projections.empty()) {
auto node = std::move(projections.back());
node->children[0] = std::move(op);
op = std::move(node);
projections.pop_back();
}

} else {
for (auto &child : op->children) {
child = Optimize(std::move(child));
}
Expand Down
14 changes: 14 additions & 0 deletions test/optimizer/topn_optimizer.test
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,17 @@ query II
EXPLAIN SELECT * FROM (SELECT * FROM range(100000000) AS _(x) ORDER BY x) AS cte LIMIT 10
----
logical_opt <REGEX>:.*TOP_N.*

# top n optimization with more complex projection pull up
query II
EXPLAIN
WITH cte1 AS (SELECT range%2000 a,
(range%2000 + 2000) b,
(range%2000 + 4000) c,
(range%2000 + 6000) d
FROM range(10000) ORDER BY range),
cte2 as (select *, a as x, b as y, c as z from cte1),
cte3 as (select *, a as l, b as m, c as n, d as o from cte2)
SELECT * FROM cte3 LIMIT 4;
----
logical_opt <REGEX>:.*TOP_N.*

0 comments on commit 9b43333

Please sign in to comment.