From 2a4451f583c1f15be3bc17d0725ed0d71b7ab70d Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Wed, 16 Oct 2024 13:05:52 +0200 Subject: [PATCH] pull up pushed down filters from table scan when replacing with index scan --- .../index/rtree/rtree_index_plan_scan.cpp | 64 ++++++++++++++---- test/data/segments.parquet | Bin 0 -> 1791 bytes test/sql/index/rtree_filter_pullup.test | 17 +++++ 3 files changed, 66 insertions(+), 15 deletions(-) create mode 100644 test/data/segments.parquet create mode 100644 test/sql/index/rtree_filter_pullup.test diff --git a/spatial/src/spatial/core/index/rtree/rtree_index_plan_scan.cpp b/spatial/src/spatial/core/index/rtree/rtree_index_plan_scan.cpp index ac8e26f..7a531b5 100644 --- a/spatial/src/spatial/core/index/rtree/rtree_index_plan_scan.cpp +++ b/spatial/src/spatial/core/index/rtree/rtree_index_plan_scan.cpp @@ -1,9 +1,12 @@ #include "duckdb/catalog/catalog_entry/duck_table_entry.hpp" #include "duckdb/optimizer/column_lifetime_analyzer.hpp" +#include "duckdb/optimizer/matcher/expression_matcher.hpp" +#include "duckdb/optimizer/matcher/function_matcher.hpp" #include "duckdb/optimizer/optimizer_extension.hpp" #include "duckdb/optimizer/remove_unused_columns.hpp" #include "duckdb/planner/expression/bound_constant_expression.hpp" #include "duckdb/planner/expression/bound_function_expression.hpp" +#include "duckdb/planner/expression/bound_reference_expression.hpp" #include "duckdb/planner/operator/logical_filter.hpp" #include "duckdb/planner/operator/logical_get.hpp" #include "duckdb/planner/operator_extension.hpp" @@ -11,14 +14,15 @@ #include "spatial/core/geometry/bbox.hpp" #include "spatial/core/geometry/geometry_type.hpp" #include "spatial/core/index/rtree/rtree_index.hpp" +#include "spatial/core/index/rtree/rtree_index_create_logical.hpp" #include "spatial/core/index/rtree/rtree_index_scan.hpp" #include "spatial/core/index/rtree/rtree_module.hpp" #include "spatial/core/types.hpp" #include "spatial/core/util/math.hpp" -#include "duckdb/optimizer/matcher/expression_matcher.hpp" -#include "duckdb/optimizer/matcher/function_matcher.hpp" -#include "spatial/core/index/rtree/rtree_index_create_logical.hpp" +#include +#include +#include namespace spatial { @@ -95,7 +99,7 @@ class RTreeIndexScanOptimizer : public OptimizerExtension { return true; } - static bool TryOptimize(ClientContext &context, unique_ptr &plan) { + static bool TryOptimize(Binder &binder, ClientContext &context, unique_ptr &plan, unique_ptr &root) { // Look for a FILTER with a spatial predicate followed by a LOGICAL_GET table scan auto &op = *plan; @@ -116,7 +120,8 @@ class RTreeIndexScanOptimizer : public OptimizerExtension { if (filter.children.front()->type != LogicalOperatorType::LOGICAL_GET) { return false; } - auto &get = filter.children.front()->Cast(); + auto &get_ptr = filter.children.front(); + auto &get = get_ptr->Cast(); if (get.function.name != "seq_scan") { return false; } @@ -126,10 +131,6 @@ class RTreeIndexScanOptimizer : public OptimizerExtension { return false; } - if(!get.table_filters.filters.empty()) { - return false; - } - // We can replace the scan function with a rtree index scan (if the table has a rtree index) // Get the table auto &table = *get.GetTable(); @@ -191,24 +192,57 @@ class RTreeIndexScanOptimizer : public OptimizerExtension { return false; } - // Replace the scan with our custom index scan function + // If there are no table filters pushed down into the get, we can just replace the get with the index scan + const auto cardinality = get.function.cardinality(context, bind_data.get()); get.function = RTreeIndexScanFunction::GetFunction(); - auto cardinality = get.function.cardinality(context, bind_data.get()); get.has_estimated_cardinality = cardinality->has_estimated_cardinality; get.estimated_cardinality = cardinality->estimated_cardinality; get.bind_data = std::move(bind_data); - + if(get.table_filters.filters.empty()) { + return true; + } + get.projection_ids.clear(); + get.types.clear(); + + // Otherwise, things get more complicated. We need to pullup the filters from the table scan as our index scan + // does not support regular filter pushdown. + auto new_filter = make_uniq(); + auto &column_ids = get.GetColumnIds(); + for(const auto &entry : get.table_filters.filters) { + idx_t column_id = entry.first; + auto &type = get.returned_types[column_id]; + bool found = false; + for(idx_t i = 0; i < column_ids.size(); i++) { + if (column_ids[i] == column_id) { + column_id = i; + found = true; + break; + } + } + if (!found) { + throw InternalException("Could not find column id for filter"); + } + auto column = make_uniq(type, ColumnBinding(get.table_index, column_id)); + new_filter->expressions.push_back(entry.second->ToExpression(*column)); + } + new_filter->children.push_back(std::move(get_ptr)); + new_filter->ResolveOperatorTypes(); + get_ptr = std::move(new_filter); return true; } - static void Optimize(OptimizerExtensionInput &input, unique_ptr &plan) { - if (!TryOptimize(input.context, plan)) { + static void OptimizeRecursive(OptimizerExtensionInput &input, unique_ptr &plan, unique_ptr &root) { + if (!TryOptimize(input.optimizer.binder, input.context, plan, root)) { // No match: continue with the children for (auto &child : plan->children) { - Optimize(input, child); + OptimizeRecursive(input, child, root); } } } + + static void Optimize(OptimizerExtensionInput &input, unique_ptr &plan) { + OptimizeRecursive(input, plan, plan); + } }; //----------------------------------------------------------------------------- diff --git a/test/data/segments.parquet b/test/data/segments.parquet new file mode 100644 index 0000000000000000000000000000000000000000..64e93d8b12ba7aa812f49e33bc4257106749ae5f GIT binary patch literal 1791 zcmdUwYitx%6vywKU3a&3DlKw{8MB%)*)CAqb!K*6OSVc&qr_HeY6T6gWgd5MZM$1{ z+iJx|h*8rBen1Hz@rBXE!UsZ3@R?8piSg|R4VutkgE1OInou>dA$n#f3q|tURgRKumZ zsi}%YEhTMLB~Yd=fzm=EVabxDXc^bAA*jP)V6olvBsJ|)!?G>Kw!Mr&Hzv?}!}F-? z*pA}bl4q9xrfog)OBKRmZbVJi!StVg|FX`@tS;E9t?yG`xi8_V6 zYZb?<0gAMgb7_7gXAMTnx#C#Pp|C8GB>nZ(2J5R^8!V4u5Q7;ej2Q%#Yss=s!uXYA z@vvX4V7V}KDd83kt*Q@(VirN}(Lm$1@1c=68EHE zN#NSKbw3-Y=eGcVIQH7n5af5<^nlb4T-^8ixXlWOH@9vheIt&$CmH3ZR-L2hZZ>xeX&vyuExm@UJuI zLNDl6{y2zl1b(%3*<%*)0ev9rFK5MDYz+lIMD`!90-o60Uj{dNst? zxTDU1YxkF&hv|?-Vu-K}4A11;1{B<0=L}ksW`Ct4gyX*!x|%b5r(6qX_?Gmo;X9?` z4SscbDpzj7E4L~VKgZj|F))k_cZSm++2Puh&Oe#fvz&7-!~Y@H;bLQJiTwE z=!^QAMB6Sr4$)>)HFVR^Qktenx-Ln2lWJr%Lr$ku)yzl`ZYpcilAJOWC9SEZZulBq hR