Skip to content

Commit

Permalink
pull up pushed down filters from table scan when replacing with index…
Browse files Browse the repository at this point in the history
… scan
  • Loading branch information
Maxxen committed Oct 16, 2024
1 parent ded0c5b commit 2a4451f
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 15 deletions.
64 changes: 49 additions & 15 deletions spatial/src/spatial/core/index/rtree/rtree_index_plan_scan.cpp
Original file line number Diff line number Diff line change
@@ -1,24 +1,28 @@
#include "duckdb/catalog/catalog_entry/duck_table_entry.hpp"
#include "duckdb/optimizer/column_lifetime_analyzer.hpp"
#include "duckdb/optimizer/matcher/expression_matcher.hpp"
#include "duckdb/optimizer/matcher/function_matcher.hpp"
#include "duckdb/optimizer/optimizer_extension.hpp"
#include "duckdb/optimizer/remove_unused_columns.hpp"
#include "duckdb/planner/expression/bound_constant_expression.hpp"
#include "duckdb/planner/expression/bound_function_expression.hpp"
#include "duckdb/planner/expression/bound_reference_expression.hpp"
#include "duckdb/planner/operator/logical_filter.hpp"
#include "duckdb/planner/operator/logical_get.hpp"
#include "duckdb/planner/operator_extension.hpp"
#include "duckdb/storage/data_table.hpp"
#include "spatial/core/geometry/bbox.hpp"
#include "spatial/core/geometry/geometry_type.hpp"
#include "spatial/core/index/rtree/rtree_index.hpp"
#include "spatial/core/index/rtree/rtree_index_create_logical.hpp"
#include "spatial/core/index/rtree/rtree_index_scan.hpp"
#include "spatial/core/index/rtree/rtree_module.hpp"
#include "spatial/core/types.hpp"
#include "spatial/core/util/math.hpp"

#include "duckdb/optimizer/matcher/expression_matcher.hpp"
#include "duckdb/optimizer/matcher/function_matcher.hpp"
#include "spatial/core/index/rtree/rtree_index_create_logical.hpp"
#include <duckdb/optimizer/column_binding_replacer.hpp>
#include <duckdb/optimizer/optimizer.hpp>
#include <duckdb/planner/operator/logical_projection.hpp>

namespace spatial {

Expand Down Expand Up @@ -95,7 +99,7 @@ class RTreeIndexScanOptimizer : public OptimizerExtension {
return true;
}

static bool TryOptimize(ClientContext &context, unique_ptr<LogicalOperator> &plan) {
static bool TryOptimize(Binder &binder, ClientContext &context, unique_ptr<LogicalOperator> &plan, unique_ptr<LogicalOperator> &root) {
// Look for a FILTER with a spatial predicate followed by a LOGICAL_GET table scan
auto &op = *plan;

Expand All @@ -116,7 +120,8 @@ class RTreeIndexScanOptimizer : public OptimizerExtension {
if (filter.children.front()->type != LogicalOperatorType::LOGICAL_GET) {
return false;
}
auto &get = filter.children.front()->Cast<LogicalGet>();
auto &get_ptr = filter.children.front();
auto &get = get_ptr->Cast<LogicalGet>();
if (get.function.name != "seq_scan") {
return false;
}
Expand All @@ -126,10 +131,6 @@ class RTreeIndexScanOptimizer : public OptimizerExtension {
return false;
}

if(!get.table_filters.filters.empty()) {
return false;
}

// We can replace the scan function with a rtree index scan (if the table has a rtree index)
// Get the table
auto &table = *get.GetTable();
Expand Down Expand Up @@ -191,24 +192,57 @@ class RTreeIndexScanOptimizer : public OptimizerExtension {
return false;
}

// Replace the scan with our custom index scan function
// If there are no table filters pushed down into the get, we can just replace the get with the index scan
const auto cardinality = get.function.cardinality(context, bind_data.get());
get.function = RTreeIndexScanFunction::GetFunction();
auto cardinality = get.function.cardinality(context, bind_data.get());
get.has_estimated_cardinality = cardinality->has_estimated_cardinality;
get.estimated_cardinality = cardinality->estimated_cardinality;
get.bind_data = std::move(bind_data);

if(get.table_filters.filters.empty()) {
return true;
}
get.projection_ids.clear();
get.types.clear();

// Otherwise, things get more complicated. We need to pullup the filters from the table scan as our index scan
// does not support regular filter pushdown.
auto new_filter = make_uniq<LogicalFilter>();
auto &column_ids = get.GetColumnIds();
for(const auto &entry : get.table_filters.filters) {
idx_t column_id = entry.first;
auto &type = get.returned_types[column_id];
bool found = false;
for(idx_t i = 0; i < column_ids.size(); i++) {
if (column_ids[i] == column_id) {
column_id = i;
found = true;
break;
}
}
if (!found) {
throw InternalException("Could not find column id for filter");
}
auto column = make_uniq<BoundColumnRefExpression>(type, ColumnBinding(get.table_index, column_id));
new_filter->expressions.push_back(entry.second->ToExpression(*column));
}
new_filter->children.push_back(std::move(get_ptr));
new_filter->ResolveOperatorTypes();
get_ptr = std::move(new_filter);
return true;
}

static void Optimize(OptimizerExtensionInput &input, unique_ptr<LogicalOperator> &plan) {
if (!TryOptimize(input.context, plan)) {
static void OptimizeRecursive(OptimizerExtensionInput &input, unique_ptr<LogicalOperator> &plan, unique_ptr<LogicalOperator> &root) {
if (!TryOptimize(input.optimizer.binder, input.context, plan, root)) {
// No match: continue with the children
for (auto &child : plan->children) {
Optimize(input, child);
OptimizeRecursive(input, child, root);
}
}
}

static void Optimize(OptimizerExtensionInput &input, unique_ptr<LogicalOperator> &plan) {
OptimizeRecursive(input, plan, plan);
}
};

//-----------------------------------------------------------------------------
Expand Down
Binary file added test/data/segments.parquet
Binary file not shown.
17 changes: 17 additions & 0 deletions test/sql/index/rtree_filter_pullup.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
require parquet

query I rowsort
SELECT id FROM '__WORKING_DIRECTORY__/test/data/segments.parquet'
WHERE subtype='road' AND ST_Intersects(geometry, ST_Buffer(ST_GeomFromText('POINT (-8476562 4795814)'), 100));
----
0862aac667ffffff043df7e4c6756d14
0862aac667ffffff047de7f2111f86ad
0862aac667ffffff047ffedcbc2db0f8


query III rowsort
SELECT id, subtype, class FROM '__WORKING_DIRECTORY__/test/data/segments.parquet'
WHERE subtype='road' AND class='residential' AND ST_Intersects(geometry, ST_Buffer(ST_GeomFromText('POINT (-8476562 4795814)'), 100));
----
0862aac667ffffff047de7f2111f86ad road residential
0862aac667ffffff047ffedcbc2db0f8 road residential

0 comments on commit 2a4451f

Please sign in to comment.