Skip to content

Commit

Permalink
Improve TopN optimization pass
Browse files Browse the repository at this point in the history
Remove TopN optimization from physical plan mapping

Add TopN optimization test

Add TopN optimization benchmark
  • Loading branch information
kryonix committed Mar 26, 2024
1 parent dda1943 commit 43c1dfb
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 24 deletions.
14 changes: 14 additions & 0 deletions benchmark/micro/optimizer/topn_optimization.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# name: benchmark/micro/optimizer/topn_optimization.benchmark
# description: Benchmark of top n optimization
# group: [optimizer]

name TopN Optimization
group micro
subgroup optimizer

load
CREATE TABLE integers AS SELECT * FROM range(100000000) tbl(i);
CREATE TABLE other_table AS SELECT 337 i UNION ALL SELECT 948247 UNION ALL SELECT 17797934 UNION ALL SELECT 99999998 UNION ALL SELECT 99999999

run
SELECT * FROM integers WHERE i IN (SELECT * FROM other_table) ORDER BY i LIMIT 4
24 changes: 0 additions & 24 deletions src/execution/physical_plan/plan_limit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,30 +37,6 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalLimit &op)

auto plan = CreatePlan(*op.children[0]);

if (plan->type == PhysicalOperatorType::ORDER_BY && op.limit_val.Type() == LimitNodeType::CONSTANT_VALUE &&
op.offset_val.Type() != LimitNodeType::EXPRESSION_VALUE) {
auto &order_by = plan->Cast<PhysicalOrder>();
// Can not use TopN operator if PhysicalOrder uses projections
bool omit_projection = true;
for (idx_t i = 0; i < order_by.projections.size(); i++) {
if (order_by.projections[i] == i) {
continue;
}
omit_projection = false;
break;
}
if (order_by.projections.empty() || omit_projection) {
idx_t offset_val = 0;
if (op.offset_val.Type() == LimitNodeType::CONSTANT_VALUE) {
offset_val = op.offset_val.GetConstantValue();
}
auto top_n = make_uniq<PhysicalTopN>(order_by.children[0]->types, std::move(order_by.orders),
op.limit_val.GetConstantValue(), offset_val, op.estimated_cardinality);
top_n->children.push_back(std::move(order_by.children[0]));
return std::move(top_n);
}
}

unique_ptr<PhysicalOperator> limit;
switch (op.limit_val.Type()) {
case LimitNodeType::EXPRESSION_PERCENTAGE:
Expand Down
60 changes: 60 additions & 0 deletions src/optimizer/topn_optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,66 @@ unique_ptr<LogicalOperator> TopN::Optimize(unique_ptr<LogicalOperator> op) {
topn->AddChild(std::move(order_by.children[0]));
op = std::move(topn);
} else {
// Check if we can push the limit operator down through projection operators
if (op->type == LogicalOperatorType::LOGICAL_LIMIT) {
auto &limit = op->Cast<LogicalLimit>();
if (limit.limit_val.Type() == LimitNodeType::CONSTANT_VALUE &&
limit.offset_val.Type() != LimitNodeType::EXPRESSION_VALUE) {
auto child_op = op->children[0].get();
bool can_optimize = false;

// Check if there are only projection operators between the limit operator
// and an order by operator.
while (child_op && !can_optimize) {
switch (child_op->type) {
case LogicalOperatorType::LOGICAL_PROJECTION:
child_op = child_op->children[0].get();
if (child_op->type == LogicalOperatorType::LOGICAL_ORDER_BY) {
can_optimize = true;
}
break;
default:
child_op = nullptr;
can_optimize = false;
break;
}
}

if (can_optimize) {
// traverse operator tree and collect all projection nodes until we reach
// the order by operator
vector<unique_ptr<LogicalOperator>> projections;
projections.push_back(std::move(op->children[0]));
while (true) {
auto child = projections.back().get();
if (child->type != LogicalOperatorType::LOGICAL_PROJECTION) {
break;
}
projections.push_back(std::move(child->children[0]));
}

// Move order by operator into children of limit operator
auto order_by = std::move(projections.back());
projections.pop_back();
op->children.clear();
op->children.push_back(std::move(order_by));

// reconstruct all projection nodes above limit operator
unique_ptr<LogicalOperator> root = std::move(op);
while (!projections.empty()) {
unique_ptr<LogicalOperator> node = std::move(projections.back());
node->children.clear();
node->children.push_back(std::move(root));
root = std::move(node);
projections.pop_back();
}

// recurse into Optimize function to apply TopN optimization
return Optimize(std::move(root));
}
}
}

for (auto &child : op->children) {
child = Optimize(std::move(child));
}
Expand Down
6 changes: 6 additions & 0 deletions test/optimizer/topn_optimizer.test
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,9 @@ query II
EXPLAIN SELECT i FROM integers ORDER BY i OFFSET 3
----
logical_opt <!REGEX>:.*TOP_N.*

# only limit, but in subquery with projection pull up optimization
query II
EXPLAIN SELECT * FROM (SELECT * FROM range(100000000) AS _(x) ORDER BY x) AS cte LIMIT 10
----
logical_opt <!REGEX>:.*TOP_N.*

0 comments on commit 43c1dfb

Please sign in to comment.