diff --git a/.github/config/out_of_tree_extensions.cmake b/.github/config/out_of_tree_extensions.cmake index 16fe4fa556dc..f2d5996e52c7 100644 --- a/.github/config/out_of_tree_extensions.cmake +++ b/.github/config/out_of_tree_extensions.cmake @@ -90,5 +90,6 @@ if (NOT WIN32) LOAD_TESTS DONT_LINK GIT_URL https://github.com/duckdb/substrait GIT_TAG 870bab8725d1123905296bfb1f35ce737434e0b3 + APPLY_PATCHES ) endif() diff --git a/.github/patches/extensions/substrait/substrait.patch b/.github/patches/extensions/substrait/substrait.patch new file mode 100644 index 000000000000..481ca475bd29 --- /dev/null +++ b/.github/patches/extensions/substrait/substrait.patch @@ -0,0 +1,38 @@ +diff --git a/src/to_substrait.cpp b/src/to_substrait.cpp +index 03d9778..d2429c6 100644 +--- a/src/to_substrait.cpp ++++ b/src/to_substrait.cpp +@@ -777,8 +777,31 @@ substrait::Rel *DuckDBToSubstrait::TransformLimit(LogicalOperator &dop) { + auto stopn = res->mutable_fetch(); + stopn->set_allocated_input(TransformOp(*dop.children[0])); + +- stopn->set_offset(dlimit.offset_val); +- stopn->set_count(dlimit.limit_val); ++ idx_t limit_val; ++ idx_t offset_val; ++ ++ switch(dlimit.limit_val.Type()) { ++ case LimitNodeType::CONSTANT_VALUE: ++ limit_val = dlimit.limit_val.GetConstantValue(); ++ break; ++ case LimitNodeType::UNSET: ++ limit_val = 2ULL << 62ULL; ++ break; ++ default: ++ throw InternalException("Unsupported limit value type"); ++ } ++ switch(dlimit.offset_val.Type()) { ++ case LimitNodeType::CONSTANT_VALUE: ++ offset_val = dlimit.offset_val.GetConstantValue(); ++ break; ++ case LimitNodeType::UNSET: ++ offset_val = 0; ++ break; ++ default: ++ throw InternalException("Unsupported offset value type"); ++ } ++ stopn->set_offset(offset_val); ++ stopn->set_count(limit_val); + return res; + } + diff --git a/scripts/generate_serialization.py b/scripts/generate_serialization.py index 8d5a0fc60e6a..b876182a0d9f 100644 --- a/scripts/generate_serialization.py +++ b/scripts/generate_serialization.py @@ -103,6 +103,7 @@ 'LogicalType', 'ColumnDefinition', 'BaseStatistics', + 'BoundLimitNode', ] reference_list = ['ClientContext', 'bound_parameter_map_t'] diff --git a/src/common/enum_util.cpp b/src/common/enum_util.cpp index dff8dd1b73e8..9f28e17da3d8 100644 --- a/src/common/enum_util.cpp +++ b/src/common/enum_util.cpp @@ -2810,6 +2810,44 @@ KeywordCategory EnumUtil::FromString(const char *value) { throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value)); } +template<> +const char* EnumUtil::ToChars(LimitNodeType value) { + switch(value) { + case LimitNodeType::UNSET: + return "UNSET"; + case LimitNodeType::CONSTANT_VALUE: + return "CONSTANT_VALUE"; + case LimitNodeType::CONSTANT_PERCENTAGE: + return "CONSTANT_PERCENTAGE"; + case LimitNodeType::EXPRESSION_VALUE: + return "EXPRESSION_VALUE"; + case LimitNodeType::EXPRESSION_PERCENTAGE: + return "EXPRESSION_PERCENTAGE"; + default: + throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value)); + } +} + +template<> +LimitNodeType EnumUtil::FromString(const char *value) { + if (StringUtil::Equals(value, "UNSET")) { + return LimitNodeType::UNSET; + } + if (StringUtil::Equals(value, "CONSTANT_VALUE")) { + return LimitNodeType::CONSTANT_VALUE; + } + if (StringUtil::Equals(value, "CONSTANT_PERCENTAGE")) { + return LimitNodeType::CONSTANT_PERCENTAGE; + } + if (StringUtil::Equals(value, "EXPRESSION_VALUE")) { + return LimitNodeType::EXPRESSION_VALUE; + } + if (StringUtil::Equals(value, "EXPRESSION_PERCENTAGE")) { + return LimitNodeType::EXPRESSION_PERCENTAGE; + } + throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value)); +} + template<> const char* EnumUtil::ToChars(LoadType value) { switch(value) { @@ -2865,8 +2903,6 @@ const char* EnumUtil::ToChars(LogicalOperatorType value) { return "LOGICAL_DISTINCT"; case LogicalOperatorType::LOGICAL_SAMPLE: return "LOGICAL_SAMPLE"; - case LogicalOperatorType::LOGICAL_LIMIT_PERCENT: - return "LOGICAL_LIMIT_PERCENT"; case LogicalOperatorType::LOGICAL_PIVOT: return "LOGICAL_PIVOT"; case LogicalOperatorType::LOGICAL_COPY_DATABASE: @@ -3006,9 +3042,6 @@ LogicalOperatorType EnumUtil::FromString(const char *value) if (StringUtil::Equals(value, "LOGICAL_SAMPLE")) { return LogicalOperatorType::LOGICAL_SAMPLE; } - if (StringUtil::Equals(value, "LOGICAL_LIMIT_PERCENT")) { - return LogicalOperatorType::LOGICAL_LIMIT_PERCENT; - } if (StringUtil::Equals(value, "LOGICAL_PIVOT")) { return LogicalOperatorType::LOGICAL_PIVOT; } diff --git a/src/common/enums/logical_operator_type.cpp b/src/common/enums/logical_operator_type.cpp index 495bf59d516e..c2beaae96403 100644 --- a/src/common/enums/logical_operator_type.cpp +++ b/src/common/enums/logical_operator_type.cpp @@ -46,8 +46,6 @@ string LogicalOperatorToString(LogicalOperatorType type) { return "TOP_N"; case LogicalOperatorType::LOGICAL_SAMPLE: return "SAMPLE"; - case LogicalOperatorType::LOGICAL_LIMIT_PERCENT: - return "LIMIT_PERCENT"; case LogicalOperatorType::LOGICAL_COPY_TO_FILE: return "COPY_TO_FILE"; case LogicalOperatorType::LOGICAL_COPY_DATABASE: diff --git a/src/execution/operator/helper/physical_limit.cpp b/src/execution/operator/helper/physical_limit.cpp index 4fd75344b0a6..936cd311c69a 100644 --- a/src/execution/operator/helper/physical_limit.cpp +++ b/src/execution/operator/helper/physical_limit.cpp @@ -8,12 +8,10 @@ namespace duckdb { -PhysicalLimit::PhysicalLimit(vector types, idx_t limit, idx_t offset, - unique_ptr limit_expression, unique_ptr offset_expression, +PhysicalLimit::PhysicalLimit(vector types, BoundLimitNode limit_val_p, BoundLimitNode offset_val_p, idx_t estimated_cardinality) - : PhysicalOperator(PhysicalOperatorType::LIMIT, std::move(types), estimated_cardinality), limit_value(limit), - offset_value(offset), limit_expression(std::move(limit_expression)), - offset_expression(std::move(offset_expression)) { + : PhysicalOperator(PhysicalOperatorType::LIMIT, std::move(types), estimated_cardinality), + limit_val(std::move(limit_val_p)), offset_val(std::move(offset_val_p)) { } //===--------------------------------------------------------------------===// @@ -36,13 +34,12 @@ class LimitLocalState : public LocalSinkState { public: explicit LimitLocalState(ClientContext &context, const PhysicalLimit &op) : current_offset(0), data(context, op.types, true) { - this->limit = op.limit_expression ? DConstants::INVALID_INDEX : op.limit_value; - this->offset = op.offset_expression ? DConstants::INVALID_INDEX : op.offset_value; + PhysicalLimit::SetInitialLimits(op.limit_val, op.offset_val, limit, offset); } idx_t current_offset; - idx_t limit; - idx_t offset; + optional_idx limit; + optional_idx offset; BatchedDataCollection data; }; @@ -54,38 +51,56 @@ unique_ptr PhysicalLimit::GetLocalSinkState(ExecutionContext &co return make_uniq(context.client, *this); } -bool PhysicalLimit::ComputeOffset(ExecutionContext &context, DataChunk &input, idx_t &limit, idx_t &offset, - idx_t current_offset, idx_t &max_element, Expression *limit_expression, - Expression *offset_expression) { - if (limit != DConstants::INVALID_INDEX && offset != DConstants::INVALID_INDEX) { - max_element = limit + offset; - if ((limit == 0 || current_offset >= max_element) && !(limit_expression || offset_expression)) { - return false; - } +void PhysicalLimit::SetInitialLimits(const BoundLimitNode &limit_val, const BoundLimitNode &offset_val, + optional_idx &limit, optional_idx &offset) { + switch (limit_val.Type()) { + case LimitNodeType::CONSTANT_VALUE: + limit = limit_val.GetConstantValue(); + break; + case LimitNodeType::UNSET: + limit = MAX_LIMIT_VALUE; + break; + default: + break; } + switch (offset_val.Type()) { + case LimitNodeType::CONSTANT_VALUE: + offset = offset_val.GetConstantValue(); + break; + case LimitNodeType::UNSET: + offset = 0; + break; + default: + break; + } +} - // get the next chunk from the child - if (limit == DConstants::INVALID_INDEX) { - limit = 1ULL << 62ULL; - Value val = GetDelimiter(context, input, limit_expression); +bool PhysicalLimit::ComputeOffset(ExecutionContext &context, DataChunk &input, optional_idx &limit, + optional_idx &offset, idx_t current_offset, idx_t &max_element, + const BoundLimitNode &limit_val, const BoundLimitNode &offset_val) { + if (!limit.IsValid()) { + Value val = GetDelimiter(context, input, limit_val.GetValueExpression()); if (!val.IsNull()) { limit = val.GetValue(); + } else { + limit = MAX_LIMIT_VALUE; } - if (limit > 1ULL << 62ULL) { - throw BinderException("Max value %lld for LIMIT/OFFSET is %lld", limit, 1ULL << 62ULL); + if (limit.GetIndex() > MAX_LIMIT_VALUE) { + throw BinderException("Max value %lld for LIMIT/OFFSET is %lld", limit.GetIndex(), MAX_LIMIT_VALUE); } } - if (offset == DConstants::INVALID_INDEX) { - offset = 0; - Value val = GetDelimiter(context, input, offset_expression); + if (!offset.IsValid()) { + Value val = GetDelimiter(context, input, offset_val.GetValueExpression()); if (!val.IsNull()) { offset = val.GetValue(); + } else { + offset = 0; } - if (offset > 1ULL << 62ULL) { - throw BinderException("Max value %lld for LIMIT/OFFSET is %lld", offset, 1ULL << 62ULL); + if (offset.GetIndex() > MAX_LIMIT_VALUE) { + throw BinderException("Max value %lld for LIMIT/OFFSET is %lld", offset.GetIndex(), MAX_LIMIT_VALUE); } } - max_element = limit + offset; + max_element = limit.GetIndex() + offset.GetIndex(); if (limit == 0 || current_offset >= max_element) { return false; } @@ -100,8 +115,7 @@ SinkResultType PhysicalLimit::Sink(ExecutionContext &context, DataChunk &chunk, auto &offset = state.offset; idx_t max_element; - if (!ComputeOffset(context, chunk, limit, offset, state.current_offset, max_element, limit_expression.get(), - offset_expression.get())) { + if (!ComputeOffset(context, chunk, limit, offset, state.current_offset, max_element, limit_val, offset_val)) { return SinkResultType::FINISHED; } auto max_cardinality = max_element - state.current_offset; @@ -121,8 +135,12 @@ SinkCombineResultType PhysicalLimit::Combine(ExecutionContext &context, Operator auto &state = input.local_state.Cast(); lock_guard lock(gstate.glock); - gstate.limit = state.limit; - gstate.offset = state.offset; + if (state.limit.IsValid()) { + gstate.limit = state.limit.GetIndex(); + } + if (state.offset.IsValid()) { + gstate.offset = state.offset.GetIndex(); + } gstate.data.Merge(state.data); return SinkCombineResultType::FINISHED; @@ -209,12 +227,12 @@ bool PhysicalLimit::HandleOffset(DataChunk &input, idx_t ¤t_offset, idx_t return true; } -Value PhysicalLimit::GetDelimiter(ExecutionContext &context, DataChunk &input, Expression *expr) { +Value PhysicalLimit::GetDelimiter(ExecutionContext &context, DataChunk &input, const Expression &expr) { DataChunk limit_chunk; - vector types {expr->return_type}; + vector types {expr.return_type}; auto &allocator = Allocator::Get(context.client); limit_chunk.Initialize(allocator, types); - ExpressionExecutor limit_executor(context.client, expr); + ExpressionExecutor limit_executor(context.client, &expr); auto input_size = input.size(); input.SetCardinality(1); limit_executor.Execute(input, limit_chunk); diff --git a/src/execution/operator/helper/physical_limit_percent.cpp b/src/execution/operator/helper/physical_limit_percent.cpp index a65cc219c938..f06b49324a8c 100644 --- a/src/execution/operator/helper/physical_limit_percent.cpp +++ b/src/execution/operator/helper/physical_limit_percent.cpp @@ -7,6 +7,13 @@ namespace duckdb { +PhysicalLimitPercent::PhysicalLimitPercent(vector types, BoundLimitNode limit_val_p, + BoundLimitNode offset_val_p, idx_t estimated_cardinality) + : PhysicalOperator(PhysicalOperatorType::LIMIT_PERCENT, std::move(types), estimated_cardinality), + limit_val(std::move(limit_val_p)), offset_val(std::move(offset_val_p)) { + D_ASSERT(limit_val.Type() == LimitNodeType::CONSTANT_PERCENTAGE || + limit_val.Type() == LimitNodeType::EXPRESSION_PERCENTAGE); +} //===--------------------------------------------------------------------===// // Sink //===--------------------------------------------------------------------===// @@ -14,28 +21,37 @@ class LimitPercentGlobalState : public GlobalSinkState { public: explicit LimitPercentGlobalState(ClientContext &context, const PhysicalLimitPercent &op) : current_offset(0), data(context, op.GetTypes()) { - if (!op.limit_expression) { - this->limit_percent = op.limit_percent; - is_limit_percent_delimited = true; - } else { - this->limit_percent = 100.0; + switch (op.limit_val.Type()) { + case LimitNodeType::CONSTANT_PERCENTAGE: + this->limit_percent = op.limit_val.GetConstantPercentage(); + this->is_limit_set = true; + break; + case LimitNodeType::EXPRESSION_PERCENTAGE: + this->is_limit_set = false; + break; + default: + throw InternalException("Unsupported type for limit value in PhysicalLimitPercent"); } - - if (!op.offset_expression) { - this->offset = op.offset_value; - is_offset_delimited = true; - } else { + switch (op.offset_val.Type()) { + case LimitNodeType::CONSTANT_VALUE: + this->offset = op.offset_val.GetConstantValue(); + break; + case LimitNodeType::UNSET: this->offset = 0; + break; + case LimitNodeType::EXPRESSION_VALUE: + break; + default: + throw InternalException("Unsupported type for offset value in PhysicalLimitPercent"); } } idx_t current_offset; double limit_percent; - idx_t offset; + optional_idx offset; ColumnDataCollection data; - bool is_limit_percent_delimited = false; - bool is_offset_delimited = false; + bool is_limit_set = false; }; unique_ptr PhysicalLimitPercent::GetGlobalSinkState(ClientContext &context) const { @@ -49,28 +65,31 @@ SinkResultType PhysicalLimitPercent::Sink(ExecutionContext &context, DataChunk & auto &offset = state.offset; // get the next chunk from the child - if (!state.is_limit_percent_delimited) { - Value val = PhysicalLimit::GetDelimiter(context, chunk, limit_expression.get()); + if (!state.is_limit_set) { + Value val = PhysicalLimit::GetDelimiter(context, chunk, limit_val.GetPercentageExpression()); if (!val.IsNull()) { limit_percent = val.GetValue(); + } else { + limit_percent = 100.0; } if (limit_percent < 0.0) { throw BinderException("Percentage value(%f) can't be negative", limit_percent); } - state.is_limit_percent_delimited = true; + state.is_limit_set = true; } - if (!state.is_offset_delimited) { - Value val = PhysicalLimit::GetDelimiter(context, chunk, offset_expression.get()); + if (!state.offset.IsValid()) { + Value val = PhysicalLimit::GetDelimiter(context, chunk, offset_val.GetValueExpression()); if (!val.IsNull()) { offset = val.GetValue(); + } else { + offset = 0; } - if (offset > 1ULL << 62ULL) { - throw BinderException("Max value %lld for LIMIT/OFFSET is %lld", offset, 1ULL << 62ULL); + if (offset.GetIndex() > 1ULL << 62ULL) { + throw BinderException("Max value %lld for LIMIT/OFFSET is %lld", offset.GetIndex(), 1ULL << 62ULL); } - state.is_offset_delimited = true; } - if (!PhysicalLimit::HandleOffset(chunk, state.current_offset, offset, DConstants::INVALID_INDEX)) { + if (!PhysicalLimit::HandleOffset(chunk, state.current_offset, offset.GetIndex(), NumericLimits::Maximum())) { return SinkResultType::NEED_MORE_INPUT; } @@ -83,15 +102,14 @@ SinkResultType PhysicalLimitPercent::Sink(ExecutionContext &context, DataChunk & //===--------------------------------------------------------------------===// class LimitPercentOperatorState : public GlobalSourceState { public: - explicit LimitPercentOperatorState(const PhysicalLimitPercent &op) - : limit(DConstants::INVALID_INDEX), current_offset(0) { + explicit LimitPercentOperatorState(const PhysicalLimitPercent &op) : current_offset(0) { D_ASSERT(op.sink_state); auto &gstate = op.sink_state->Cast(); gstate.data.InitializeScan(scan_state); } ColumnDataScanState scan_state; - idx_t limit; + optional_idx limit; idx_t current_offset; }; @@ -108,33 +126,33 @@ SourceResultType PhysicalLimitPercent::GetData(ExecutionContext &context, DataCh auto &limit = state.limit; auto ¤t_offset = state.current_offset; - if (gstate.is_limit_percent_delimited && limit == DConstants::INVALID_INDEX) { + if (gstate.is_limit_set && !limit.IsValid()) { idx_t count = gstate.data.Count(); if (count > 0) { - count += offset; + count += offset.GetIndex(); } if (Value::IsNan(percent_limit) || percent_limit < 0 || percent_limit > 100) { throw OutOfRangeException("Limit percent out of range, should be between 0% and 100%"); } - double limit_dbl = percent_limit / 100 * count; - if (limit_dbl > count) { + auto limit_percentage = idx_t(percent_limit / 100.0 * double(count)); + if (limit_percentage > count) { limit = count; } else { - limit = idx_t(limit_dbl); + limit = idx_t(limit_percentage); } if (limit == 0) { return SourceResultType::FINISHED; } } - if (current_offset >= limit) { + if (current_offset >= limit.GetIndex()) { return SourceResultType::FINISHED; } if (!gstate.data.Scan(state.scan_state, chunk)) { return SourceResultType::FINISHED; } - PhysicalLimit::HandleOffset(chunk, current_offset, 0, limit); + PhysicalLimit::HandleOffset(chunk, current_offset, 0, limit.GetIndex()); return SourceResultType::HAVE_MORE_OUTPUT; } diff --git a/src/execution/operator/helper/physical_streaming_limit.cpp b/src/execution/operator/helper/physical_streaming_limit.cpp index 5fbd56a3aaa4..881375b95e71 100644 --- a/src/execution/operator/helper/physical_streaming_limit.cpp +++ b/src/execution/operator/helper/physical_streaming_limit.cpp @@ -3,13 +3,10 @@ namespace duckdb { -PhysicalStreamingLimit::PhysicalStreamingLimit(vector types, idx_t limit, idx_t offset, - unique_ptr limit_expression, - unique_ptr offset_expression, idx_t estimated_cardinality, - bool parallel) +PhysicalStreamingLimit::PhysicalStreamingLimit(vector types, BoundLimitNode limit_val_p, + BoundLimitNode offset_val_p, idx_t estimated_cardinality, bool parallel) : PhysicalOperator(PhysicalOperatorType::STREAMING_LIMIT, std::move(types), estimated_cardinality), - limit_value(limit), offset_value(offset), limit_expression(std::move(limit_expression)), - offset_expression(std::move(offset_expression)), parallel(parallel) { + limit_val(std::move(limit_val_p)), offset_val(std::move(offset_val_p)), parallel(parallel) { } //===--------------------------------------------------------------------===// @@ -18,12 +15,11 @@ PhysicalStreamingLimit::PhysicalStreamingLimit(vector types, idx_t class StreamingLimitOperatorState : public OperatorState { public: explicit StreamingLimitOperatorState(const PhysicalStreamingLimit &op) { - this->limit = op.limit_expression ? DConstants::INVALID_INDEX : op.limit_value; - this->offset = op.offset_expression ? DConstants::INVALID_INDEX : op.offset_value; + PhysicalLimit::SetInitialLimits(op.limit_val, op.offset_val, limit, offset); } - idx_t limit; - idx_t offset; + optional_idx limit; + optional_idx offset; }; class StreamingLimitGlobalState : public GlobalOperatorState { @@ -50,11 +46,11 @@ OperatorResultType PhysicalStreamingLimit::Execute(ExecutionContext &context, Da auto &offset = state.offset; idx_t current_offset = gstate.current_offset.fetch_add(input.size()); idx_t max_element; - if (!PhysicalLimit::ComputeOffset(context, input, limit, offset, current_offset, max_element, - limit_expression.get(), offset_expression.get())) { + if (!PhysicalLimit::ComputeOffset(context, input, limit, offset, current_offset, max_element, limit_val, + offset_val)) { return OperatorResultType::FINISHED; } - if (PhysicalLimit::HandleOffset(input, current_offset, offset, limit)) { + if (PhysicalLimit::HandleOffset(input, current_offset, offset.GetIndex(), limit.GetIndex())) { chunk.Reference(input); } return OperatorResultType::NEED_MORE_INPUT; diff --git a/src/execution/physical_plan/CMakeLists.txt b/src/execution/physical_plan/CMakeLists.txt index 07cf6fde4db0..f9124752f6d7 100644 --- a/src/execution/physical_plan/CMakeLists.txt +++ b/src/execution/physical_plan/CMakeLists.txt @@ -26,7 +26,6 @@ add_library_unity( plan_get.cpp plan_insert.cpp plan_limit.cpp - plan_limit_percent.cpp plan_order.cpp plan_pivot.cpp plan_positional_join.cpp diff --git a/src/execution/physical_plan/plan_limit.cpp b/src/execution/physical_plan/plan_limit.cpp index b241b3892fb4..d963179606ba 100644 --- a/src/execution/physical_plan/plan_limit.cpp +++ b/src/execution/physical_plan/plan_limit.cpp @@ -1,5 +1,6 @@ #include "duckdb/execution/operator/helper/physical_limit.hpp" #include "duckdb/execution/operator/helper/physical_streaming_limit.hpp" +#include "duckdb/execution/operator/helper/physical_limit_percent.hpp" #include "duckdb/execution/physical_plan_generator.hpp" #include "duckdb/main/config.hpp" #include "duckdb/planner/operator/logical_limit.hpp" @@ -12,21 +13,30 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalLimit &op) auto plan = CreatePlan(*op.children[0]); unique_ptr limit; - if (!PreserveInsertionOrder(*plan)) { - // use parallel streaming limit if insertion order is not important - limit = make_uniq(op.types, (idx_t)op.limit_val, op.offset_val, std::move(op.limit), - std::move(op.offset), op.estimated_cardinality, true); - } else { - // maintaining insertion order is important - if (UseBatchIndex(*plan)) { - // source supports batch index: use parallel batch limit - limit = make_uniq(op.types, (idx_t)op.limit_val, op.offset_val, std::move(op.limit), - std::move(op.offset), op.estimated_cardinality); + switch (op.limit_val.Type()) { + case LimitNodeType::EXPRESSION_PERCENTAGE: + case LimitNodeType::CONSTANT_PERCENTAGE: + limit = make_uniq(op.types, std::move(op.limit_val), std::move(op.offset_val), + op.estimated_cardinality); + break; + default: + if (!PreserveInsertionOrder(*plan)) { + // use parallel streaming limit if insertion order is not important + limit = make_uniq(op.types, std::move(op.limit_val), std::move(op.offset_val), + op.estimated_cardinality, true); } else { - // source does not support batch index: use a non-parallel streaming limit - limit = make_uniq(op.types, (idx_t)op.limit_val, op.offset_val, std::move(op.limit), - std::move(op.offset), op.estimated_cardinality, false); + // maintaining insertion order is important + if (UseBatchIndex(*plan)) { + // source supports batch index: use parallel batch limit + limit = make_uniq(op.types, std::move(op.limit_val), std::move(op.offset_val), + op.estimated_cardinality); + } else { + // source does not support batch index: use a non-parallel streaming limit + limit = make_uniq(op.types, std::move(op.limit_val), std::move(op.offset_val), + op.estimated_cardinality, false); + } } + break; } limit->children.push_back(std::move(plan)); diff --git a/src/execution/physical_plan/plan_limit_percent.cpp b/src/execution/physical_plan/plan_limit_percent.cpp deleted file mode 100644 index 7caa5b320d02..000000000000 --- a/src/execution/physical_plan/plan_limit_percent.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#include "duckdb/execution/operator/helper/physical_limit_percent.hpp" -#include "duckdb/execution/physical_plan_generator.hpp" -#include "duckdb/planner/operator/logical_limit_percent.hpp" - -namespace duckdb { - -unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalLimitPercent &op) { - D_ASSERT(op.children.size() == 1); - - auto plan = CreatePlan(*op.children[0]); - - auto limit = make_uniq(op.types, op.limit_percent, op.offset_val, std::move(op.limit), - std::move(op.offset), op.estimated_cardinality); - limit->children.push_back(std::move(plan)); - return std::move(limit); -} - -} // namespace duckdb diff --git a/src/execution/physical_plan_generator.cpp b/src/execution/physical_plan_generator.cpp index 705d522aeffa..bd79b0db592a 100644 --- a/src/execution/physical_plan_generator.cpp +++ b/src/execution/physical_plan_generator.cpp @@ -92,9 +92,6 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalOperator & case LogicalOperatorType::LOGICAL_LIMIT: plan = CreatePlan(op.Cast()); break; - case LogicalOperatorType::LOGICAL_LIMIT_PERCENT: - plan = CreatePlan(op.Cast()); - break; case LogicalOperatorType::LOGICAL_SAMPLE: plan = CreatePlan(op.Cast()); break; diff --git a/src/include/duckdb/common/enum_util.hpp b/src/include/duckdb/common/enum_util.hpp index 66a34ad8b6af..bb289b28e46f 100644 --- a/src/include/duckdb/common/enum_util.hpp +++ b/src/include/duckdb/common/enum_util.hpp @@ -154,6 +154,8 @@ enum class JoinType : uint8_t; enum class KeywordCategory : uint8_t; +enum class LimitNodeType : uint8_t; + enum class LoadType : uint8_t; enum class LogicalOperatorType : uint8_t; @@ -492,6 +494,9 @@ const char* EnumUtil::ToChars(JoinType value); template<> const char* EnumUtil::ToChars(KeywordCategory value); +template<> +const char* EnumUtil::ToChars(LimitNodeType value); + template<> const char* EnumUtil::ToChars(LoadType value); @@ -907,6 +912,9 @@ JoinType EnumUtil::FromString(const char *value); template<> KeywordCategory EnumUtil::FromString(const char *value); +template<> +LimitNodeType EnumUtil::FromString(const char *value); + template<> LoadType EnumUtil::FromString(const char *value); diff --git a/src/include/duckdb/common/enums/logical_operator_type.hpp b/src/include/duckdb/common/enums/logical_operator_type.hpp index 9c90ab7d2155..e9e435ae17b8 100644 --- a/src/include/duckdb/common/enums/logical_operator_type.hpp +++ b/src/include/duckdb/common/enums/logical_operator_type.hpp @@ -28,7 +28,6 @@ enum class LogicalOperatorType : uint8_t { LOGICAL_COPY_TO_FILE = 10, LOGICAL_DISTINCT = 11, LOGICAL_SAMPLE = 12, - LOGICAL_LIMIT_PERCENT = 13, LOGICAL_PIVOT = 14, LOGICAL_COPY_DATABASE = 15, diff --git a/src/include/duckdb/execution/operator/helper/physical_limit.hpp b/src/include/duckdb/execution/operator/helper/physical_limit.hpp index 2282dbfd0df6..2734066bb653 100644 --- a/src/include/duckdb/execution/operator/helper/physical_limit.hpp +++ b/src/include/duckdb/execution/operator/helper/physical_limit.hpp @@ -10,6 +10,7 @@ #include "duckdb/execution/physical_operator.hpp" #include "duckdb/planner/expression.hpp" +#include "duckdb/planner/bound_result_modifier.hpp" namespace duckdb { @@ -18,14 +19,14 @@ class PhysicalLimit : public PhysicalOperator { public: static constexpr const PhysicalOperatorType TYPE = PhysicalOperatorType::LIMIT; + static constexpr const idx_t MAX_LIMIT_VALUE = 1ULL << 62ULL; + public: - PhysicalLimit(vector types, idx_t limit, idx_t offset, unique_ptr limit_expression, - unique_ptr offset_expression, idx_t estimated_cardinality); + PhysicalLimit(vector types, BoundLimitNode limit_val, BoundLimitNode offset_val, + idx_t estimated_cardinality); - idx_t limit_value; - idx_t offset_value; - unique_ptr limit_expression; - unique_ptr offset_expression; + BoundLimitNode limit_val; + BoundLimitNode offset_val; public: bool SinkOrderDependent() const override { @@ -61,11 +62,13 @@ class PhysicalLimit : public PhysicalOperator { } public: - static bool ComputeOffset(ExecutionContext &context, DataChunk &input, idx_t &limit, idx_t &offset, - idx_t current_offset, idx_t &max_element, Expression *limit_expression, - Expression *offset_expression); + static void SetInitialLimits(const BoundLimitNode &limit_val, const BoundLimitNode &offset_val, optional_idx &limit, + optional_idx &offset); + static bool ComputeOffset(ExecutionContext &context, DataChunk &input, optional_idx &limit, optional_idx &offset, + idx_t current_offset, idx_t &max_element, const BoundLimitNode &limit_val, + const BoundLimitNode &offset_val); static bool HandleOffset(DataChunk &input, idx_t ¤t_offset, idx_t offset, idx_t limit); - static Value GetDelimiter(ExecutionContext &context, DataChunk &input, Expression *expr); + static Value GetDelimiter(ExecutionContext &context, DataChunk &input, const Expression &expr); }; } // namespace duckdb diff --git a/src/include/duckdb/execution/operator/helper/physical_limit_percent.hpp b/src/include/duckdb/execution/operator/helper/physical_limit_percent.hpp index 93c04ed87d85..723822ec1754 100644 --- a/src/include/duckdb/execution/operator/helper/physical_limit_percent.hpp +++ b/src/include/duckdb/execution/operator/helper/physical_limit_percent.hpp @@ -10,6 +10,7 @@ #include "duckdb/execution/physical_operator.hpp" #include "duckdb/planner/expression.hpp" +#include "duckdb/planner/bound_result_modifier.hpp" namespace duckdb { @@ -19,18 +20,11 @@ class PhysicalLimitPercent : public PhysicalOperator { static constexpr const PhysicalOperatorType TYPE = PhysicalOperatorType::LIMIT_PERCENT; public: - PhysicalLimitPercent(vector types, double limit_percent, idx_t offset, - unique_ptr limit_expression, unique_ptr offset_expression, - idx_t estimated_cardinality) - : PhysicalOperator(PhysicalOperatorType::LIMIT_PERCENT, std::move(types), estimated_cardinality), - limit_percent(limit_percent), offset_value(offset), limit_expression(std::move(limit_expression)), - offset_expression(std::move(offset_expression)) { - } + PhysicalLimitPercent(vector types, BoundLimitNode limit_val_p, BoundLimitNode offset_val_p, + idx_t estimated_cardinality); - double limit_percent; - idx_t offset_value; - unique_ptr limit_expression; - unique_ptr offset_expression; + BoundLimitNode limit_val; + BoundLimitNode offset_val; public: bool SinkOrderDependent() const override { diff --git a/src/include/duckdb/execution/operator/helper/physical_streaming_limit.hpp b/src/include/duckdb/execution/operator/helper/physical_streaming_limit.hpp index a12b4e247dc1..22cdc00d88ec 100644 --- a/src/include/duckdb/execution/operator/helper/physical_streaming_limit.hpp +++ b/src/include/duckdb/execution/operator/helper/physical_streaming_limit.hpp @@ -10,6 +10,7 @@ #include "duckdb/execution/physical_operator.hpp" #include "duckdb/planner/expression.hpp" +#include "duckdb/planner/bound_result_modifier.hpp" namespace duckdb { @@ -18,14 +19,11 @@ class PhysicalStreamingLimit : public PhysicalOperator { static constexpr const PhysicalOperatorType TYPE = PhysicalOperatorType::STREAMING_LIMIT; public: - PhysicalStreamingLimit(vector types, idx_t limit, idx_t offset, - unique_ptr limit_expression, unique_ptr offset_expression, + PhysicalStreamingLimit(vector types, BoundLimitNode limit_val_p, BoundLimitNode offset_val_p, idx_t estimated_cardinality, bool parallel); - idx_t limit_value; - idx_t offset_value; - unique_ptr limit_expression; - unique_ptr offset_expression; + BoundLimitNode limit_val; + BoundLimitNode offset_val; bool parallel; public: diff --git a/src/include/duckdb/execution/physical_plan_generator.hpp b/src/include/duckdb/execution/physical_plan_generator.hpp index ab6198104b15..cd43ba55f034 100644 --- a/src/include/duckdb/execution/physical_plan_generator.hpp +++ b/src/include/duckdb/execution/physical_plan_generator.hpp @@ -12,7 +12,6 @@ #include "duckdb/execution/physical_operator.hpp" #include "duckdb/planner/logical_operator.hpp" #include "duckdb/planner/logical_tokens.hpp" -#include "duckdb/planner/operator/logical_limit_percent.hpp" #include "duckdb/planner/joinside.hpp" #include "duckdb/catalog/dependency_list.hpp" #include "duckdb/common/unordered_map.hpp" @@ -71,7 +70,6 @@ class PhysicalPlanGenerator { unique_ptr CreatePlan(LogicalFilter &op); unique_ptr CreatePlan(LogicalGet &op); unique_ptr CreatePlan(LogicalLimit &op); - unique_ptr CreatePlan(LogicalLimitPercent &op); unique_ptr CreatePlan(LogicalOrder &op); unique_ptr CreatePlan(LogicalTopN &op); unique_ptr CreatePlan(LogicalPositionalJoin &op); diff --git a/src/include/duckdb/planner/binder.hpp b/src/include/duckdb/planner/binder.hpp index 89120cb1da1b..d7b750bf20d3 100644 --- a/src/include/duckdb/planner/binder.hpp +++ b/src/include/duckdb/planner/binder.hpp @@ -49,6 +49,7 @@ struct BoundCreateFunctionInfo; struct CommonTableExpressionInfo; struct BoundParameterMap; struct BoundPragmaInfo; +struct BoundLimitNode; struct PivotColumnEntry; struct UnpivotEntry; @@ -218,9 +219,8 @@ class Binder : public std::enable_shared_from_this { //! Bind the default values of the columns of a table void BindDefaultValues(const ColumnList &columns, vector> &bound_defaults); //! Bind a limit value (LIMIT or OFFSET) - unique_ptr BindDelimiter(ClientContext &context, OrderBinder &order_binder, - unique_ptr delimiter, const LogicalType &type, - Value &delimiter_value); + BoundLimitNode BindLimitValue(OrderBinder &order_binder, unique_ptr limit_val, bool is_percentage, + bool is_offset); //! Move correlated expressions from the child binder to this binder void MoveCorrelatedExpressions(Binder &other); diff --git a/src/include/duckdb/planner/bound_result_modifier.hpp b/src/include/duckdb/planner/bound_result_modifier.hpp index 31d3d612beb4..6d2b4935f6f2 100644 --- a/src/include/duckdb/planner/bound_result_modifier.hpp +++ b/src/include/duckdb/planner/bound_result_modifier.hpp @@ -65,6 +65,63 @@ struct BoundOrderByNode { static BoundOrderByNode Deserialize(Deserializer &deserializer); }; +enum class LimitNodeType : uint8_t { + UNSET = 0, + CONSTANT_VALUE = 1, + CONSTANT_PERCENTAGE = 2, + EXPRESSION_VALUE = 3, + EXPRESSION_PERCENTAGE = 4 +}; + +struct BoundLimitNode { +public: + BoundLimitNode(); + BoundLimitNode(LimitNodeType type, idx_t constant_integer, double constant_percentage, + unique_ptr expression); + +public: + static BoundLimitNode ConstantValue(int64_t value); + static BoundLimitNode ConstantPercentage(double percentage); + static BoundLimitNode ExpressionValue(unique_ptr expression); + static BoundLimitNode ExpressionPercentage(unique_ptr expression); + + LimitNodeType Type() const { + return type; + } + + //! Returns the constant value, only valid if Type() == CONSTANT_VALUE + idx_t GetConstantValue() const; + //! Returns the constant percentage, only valid if Type() == CONSTANT_PERCENTAGE + double GetConstantPercentage() const; + //! Returns the constant percentage, only valid if Type() == EXPRESSION_VALUE + const Expression &GetValueExpression() const; + //! Returns the constant percentage, only valid if Type() == EXPRESSION_PERCENTAGE + const Expression &GetPercentageExpression() const; + + //! Returns a pointer to the expression - should only be used for limit-agnostic optimizations. + //! Prefer using the methods above in other scenarios. + unique_ptr &GetExpression() { + return expression; + } + + void Serialize(Serializer &serializer) const; + static BoundLimitNode Deserialize(Deserializer &deserializer); + +private: + LimitNodeType type = LimitNodeType::UNSET; + //! Integer value, if value is a constant non-percentage + idx_t constant_integer = 0; + //! Percentage value, if value is a constant percentage + double constant_percentage = -1; + //! Expression in case node is not constant + unique_ptr expression; + +private: + explicit BoundLimitNode(int64_t constant_value); + explicit BoundLimitNode(double percentage_value); + explicit BoundLimitNode(unique_ptr expression, bool is_percentage); +}; + class BoundLimitModifier : public BoundResultModifier { public: static constexpr const ResultModifierType TYPE = ResultModifierType::LIMIT_MODIFIER; @@ -73,13 +130,9 @@ class BoundLimitModifier : public BoundResultModifier { BoundLimitModifier(); //! LIMIT - int64_t limit_val = NumericLimits::Maximum(); + BoundLimitNode limit_val; //! OFFSET - int64_t offset_val = 0; - //! Expression in case limit is not constant - unique_ptr limit; - //! Expression in case limit is not constant - unique_ptr offset; + BoundLimitNode offset_val; }; class BoundOrderModifier : public BoundResultModifier { @@ -119,21 +172,4 @@ class BoundDistinctModifier : public BoundResultModifier { vector> target_distincts; }; -class BoundLimitPercentModifier : public BoundResultModifier { -public: - static constexpr const ResultModifierType TYPE = ResultModifierType::LIMIT_PERCENT_MODIFIER; - -public: - BoundLimitPercentModifier(); - - //! LIMIT % - double limit_percent = 100.0; - //! OFFSET - int64_t offset_val = 0; - //! Expression in case limit is not constant - unique_ptr limit; - //! Expression in case limit is not constant - unique_ptr offset; -}; - } // namespace duckdb diff --git a/src/include/duckdb/planner/operator/list.hpp b/src/include/duckdb/planner/operator/list.hpp index c0dfd0c8aa54..199f0a65234f 100644 --- a/src/include/duckdb/planner/operator/list.hpp +++ b/src/include/duckdb/planner/operator/list.hpp @@ -25,7 +25,6 @@ #include "duckdb/planner/operator/logical_insert.hpp" #include "duckdb/planner/operator/logical_join.hpp" #include "duckdb/planner/operator/logical_limit.hpp" -#include "duckdb/planner/operator/logical_limit_percent.hpp" #include "duckdb/planner/operator/logical_materialized_cte.hpp" #include "duckdb/planner/operator/logical_order.hpp" #include "duckdb/planner/operator/logical_pivot.hpp" diff --git a/src/include/duckdb/planner/operator/logical_limit.hpp b/src/include/duckdb/planner/operator/logical_limit.hpp index 1bec099d0ebc..a8becefec7ca 100644 --- a/src/include/duckdb/planner/operator/logical_limit.hpp +++ b/src/include/duckdb/planner/operator/logical_limit.hpp @@ -9,6 +9,7 @@ #pragma once #include "duckdb/planner/logical_operator.hpp" +#include "duckdb/planner/bound_result_modifier.hpp" namespace duckdb { @@ -18,15 +19,10 @@ class LogicalLimit : public LogicalOperator { static constexpr const LogicalOperatorType TYPE = LogicalOperatorType::LOGICAL_LIMIT; public: - LogicalLimit(int64_t limit_val, int64_t offset_val, unique_ptr limit, unique_ptr offset); - - //! Limit and offset values in case they are constants, used in optimizations. - int64_t limit_val; - int64_t offset_val; - //! The maximum amount of elements to emit - unique_ptr limit; - //! The offset from the start to begin emitting elements - unique_ptr offset; + LogicalLimit(BoundLimitNode limit_val, BoundLimitNode offset_val); + + BoundLimitNode limit_val; + BoundLimitNode offset_val; public: vector GetColumnBindings() override; diff --git a/src/include/duckdb/planner/operator/logical_limit_percent.hpp b/src/include/duckdb/planner/operator/logical_limit_percent.hpp deleted file mode 100644 index da2c67105dd0..000000000000 --- a/src/include/duckdb/planner/operator/logical_limit_percent.hpp +++ /dev/null @@ -1,49 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/planner/operator/logical_limit_percent.hpp -// -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "duckdb/planner/logical_operator.hpp" - -namespace duckdb { - -//! LogicalLimitPercent represents a LIMIT PERCENT clause -class LogicalLimitPercent : public LogicalOperator { -public: - static constexpr const LogicalOperatorType TYPE = LogicalOperatorType::LOGICAL_LIMIT_PERCENT; - -public: - LogicalLimitPercent(double limit_percent, int64_t offset_val, unique_ptr limit, - unique_ptr offset) - : LogicalOperator(LogicalOperatorType::LOGICAL_LIMIT_PERCENT), limit_percent(limit_percent), - offset_val(offset_val), limit(std::move(limit)), offset(std::move(offset)) { - } - - //! Limit percent and offset values in case they are constants, used in optimizations. - double limit_percent; - int64_t offset_val; - //! The maximum amount of elements to emit - unique_ptr limit; - //! The offset from the start to begin emitting elements - unique_ptr offset; - -public: - vector GetColumnBindings() override { - return children[0]->GetColumnBindings(); - } - - void Serialize(Serializer &serializer) const override; - static unique_ptr Deserialize(Deserializer &deserializer); - idx_t EstimateCardinality(ClientContext &context) override; - -protected: - void ResolveTypes() override { - types = children[0]->types; - } -}; -} // namespace duckdb diff --git a/src/include/duckdb/storage/serialization/logical_operator.json b/src/include/duckdb/storage/serialization/logical_operator.json index 66be2b3a2a56..4fc84183599c 100644 --- a/src/include/duckdb/storage/serialization/logical_operator.json +++ b/src/include/duckdb/storage/serialization/logical_operator.json @@ -143,25 +143,15 @@ { "id": 200, "name": "limit_val", - "type": "int64_t" + "type": "BoundLimitNode" }, { "id": 201, "name": "offset_val", - "type": "int64_t" - }, - { - "id": 202, - "name": "limit", - "type": "Expression*" - }, - { - "id": 203, - "name": "offset", - "type": "Expression*" + "type": "BoundLimitNode" } ], - "constructor": ["limit_val", "offset_val", "limit", "offset"] + "constructor": ["limit_val", "offset_val"] }, { "class": "LogicalOrder", @@ -239,34 +229,6 @@ } ] }, - { - "class": "LogicalLimitPercent", - "base": "LogicalOperator", - "enum": "LOGICAL_LIMIT_PERCENT", - "members": [ - { - "id": 200, - "name": "limit_percent", - "type": "double" - }, - { - "id": 201, - "name": "offset_val", - "type": "int64_t" - }, - { - "id": 202, - "name": "limit", - "type": "Expression*" - }, - { - "id": 203, - "name": "offset", - "type": "Expression*" - } - ], - "constructor": ["limit_percent", "offset_val", "limit", "offset"] - }, { "class": "LogicalColumnDataGet", "base": "LogicalOperator", diff --git a/src/include/duckdb/storage/serialization/nodes.json b/src/include/duckdb/storage/serialization/nodes.json index 24f263f4e707..9769cc96f458 100644 --- a/src/include/duckdb/storage/serialization/nodes.json +++ b/src/include/duckdb/storage/serialization/nodes.json @@ -109,6 +109,33 @@ "pointer_type": "none", "constructor": ["type", "null_order", "expression"] }, + { + "class": "BoundLimitNode", + "members": [ + { + "id": 100, + "name": "type", + "type": "LimitNodeType" + }, + { + "id": 101, + "name": "constant_integer", + "type": "idx_t" + }, + { + "id": 102, + "name": "constant_percentage", + "type": "double" + }, + { + "id": 103, + "name": "expression", + "type": "Expression*" + } + ], + "pointer_type": "none", + "constructor": ["type", "constant_integer", "constant_percentage", "expression"] + }, { "class": "CaseCheck", "includes": [ diff --git a/src/optimizer/pushdown/pushdown_limit.cpp b/src/optimizer/pushdown/pushdown_limit.cpp index c2c2d0b00d34..10487bbdcb71 100644 --- a/src/optimizer/pushdown/pushdown_limit.cpp +++ b/src/optimizer/pushdown/pushdown_limit.cpp @@ -9,7 +9,7 @@ namespace duckdb { unique_ptr FilterPushdown::PushdownLimit(unique_ptr op) { auto &limit = op->Cast(); - if (!limit.limit && limit.limit_val == 0) { + if (limit.limit_val.Type() == LimitNodeType::CONSTANT_VALUE && limit.limit_val.GetConstantValue() == 0) { return make_uniq(std::move(op)); } diff --git a/src/optimizer/statistics/operator/propagate_join.cpp b/src/optimizer/statistics/operator/propagate_join.cpp index 5fb8092cd827..093a37905270 100644 --- a/src/optimizer/statistics/operator/propagate_join.cpp +++ b/src/optimizer/statistics/operator/propagate_join.cpp @@ -103,7 +103,7 @@ void StatisticsPropagator::PropagateStatistics(LogicalComparisonJoin &join, uniq // then the whole result should be empty. // TODO: write better CE logic for limits so that we can just look at // join.children[1].estimated_cardinality. - auto limit = make_uniq(1, 0, nullptr, nullptr); + auto limit = make_uniq(BoundLimitNode::ConstantValue(1), BoundLimitNode()); limit->AddChild(std::move(join.children[1])); auto cross_product = LogicalCrossProduct::Create(std::move(join.children[0]), std::move(limit)); *node_ptr = std::move(cross_product); diff --git a/src/optimizer/statistics/operator/propagate_limit.cpp b/src/optimizer/statistics/operator/propagate_limit.cpp index 17855bfc88a2..390e24aba15c 100644 --- a/src/optimizer/statistics/operator/propagate_limit.cpp +++ b/src/optimizer/statistics/operator/propagate_limit.cpp @@ -8,7 +8,11 @@ unique_ptr StatisticsPropagator::PropagateStatistics(LogicalLimi // propagate statistics in the child node PropagateStatistics(limit.children[0]); // return the node stats, with as expected cardinality the amount specified in the limit - return make_uniq(limit.limit_val, limit.limit_val); + if (limit.limit_val.Type() == LimitNodeType::CONSTANT_VALUE) { + auto constant_limit = limit.limit_val.GetConstantValue(); + return make_uniq(constant_limit, constant_limit); + } + return nullptr; } } // namespace duckdb diff --git a/src/optimizer/topn_optimizer.cpp b/src/optimizer/topn_optimizer.cpp index 4e7b59ad7a05..ca64e5700704 100644 --- a/src/optimizer/topn_optimizer.cpp +++ b/src/optimizer/topn_optimizer.cpp @@ -12,18 +12,15 @@ bool TopN::CanOptimize(LogicalOperator &op) { op.children[0]->type == LogicalOperatorType::LOGICAL_ORDER_BY) { auto &limit = op.Cast(); - // When there are some expressions in the limit operator, - // we shouldn't use this optimizations. Because of the expressions - // will be lost when it convert to TopN operator. - if (limit.limit || limit.offset) { + if (limit.limit_val.Type() != LimitNodeType::CONSTANT_VALUE) { + // we need LIMIT to be present AND be a constant value for us to be able to use Top-N return false; } - - // This optimization doesn't apply when OFFSET is present without LIMIT - // Or if offset is not constant - if (limit.limit_val != NumericLimits::Maximum() || limit.offset) { - return true; + if (limit.offset_val.Type() == LimitNodeType::EXPRESSION_VALUE) { + // we need offset to be either not set (i.e. limit without offset) OR have offset be + return false; } + return true; } return false; } @@ -32,8 +29,12 @@ unique_ptr TopN::Optimize(unique_ptr op) { if (CanOptimize(*op)) { auto &limit = op->Cast(); auto &order_by = (op->children[0])->Cast(); - - auto topn = make_uniq(std::move(order_by.orders), limit.limit_val, limit.offset_val); + auto limit_val = int64_t(limit.limit_val.GetConstantValue()); + int64_t offset_val = 0; + if (limit.offset_val.Type() == LimitNodeType::CONSTANT_VALUE) { + offset_val = limit.offset_val.GetConstantValue(); + } + auto topn = make_uniq(std::move(order_by.orders), limit_val, offset_val); topn->AddChild(std::move(order_by.children[0])); op = std::move(topn); } else { diff --git a/src/planner/binder/query_node/bind_select_node.cpp b/src/planner/binder/query_node/bind_select_node.cpp index 41931f5390b3..03e221511862 100644 --- a/src/planner/binder/query_node/bind_select_node.cpp +++ b/src/planner/binder/query_node/bind_select_node.cpp @@ -40,75 +40,83 @@ unique_ptr Binder::BindOrderExpression(OrderBinder &order_binder, un return bound_expr; } -unique_ptr Binder::BindDelimiter(ClientContext &context, OrderBinder &order_binder, - unique_ptr delimiter, const LogicalType &type, - Value &delimiter_value) { +BoundLimitNode Binder::BindLimitValue(OrderBinder &order_binder, unique_ptr limit_val, + bool is_percentage, bool is_offset) { auto new_binder = Binder::CreateBinder(context, this, true); - if (delimiter->HasSubquery()) { + if (limit_val->HasSubquery()) { if (!order_binder.HasExtraList()) { throw BinderException("Subquery in LIMIT/OFFSET not supported in set operation"); } - return order_binder.CreateExtraReference(std::move(delimiter)); + auto bound_limit = order_binder.CreateExtraReference(std::move(limit_val)); + if (is_percentage) { + return BoundLimitNode::ExpressionPercentage(std::move(bound_limit)); + } else { + return BoundLimitNode::ExpressionValue(std::move(bound_limit)); + } } ExpressionBinder expr_binder(*new_binder, context); - expr_binder.target_type = type; - auto expr = expr_binder.Bind(delimiter); + auto target_type = is_percentage ? LogicalType::DOUBLE : LogicalType::BIGINT; + ; + expr_binder.target_type = target_type; + auto expr = expr_binder.Bind(limit_val); if (expr->IsFoldable()) { //! this is a constant - delimiter_value = ExpressionExecutor::EvaluateScalar(context, *expr).CastAs(context, type); - return nullptr; + auto val = ExpressionExecutor::EvaluateScalar(context, *expr).CastAs(context, target_type); + if (is_percentage) { + D_ASSERT(!is_offset); + double percentage_val; + if (val.IsNull()) { + percentage_val = 100.0; + } else { + percentage_val = val.GetValue(); + } + if (Value::IsNan(percentage_val) || percentage_val < 0 || percentage_val > 100) { + throw OutOfRangeException("Limit percent out of range, should be between 0% and 100%"); + } + return BoundLimitNode::ConstantPercentage(percentage_val); + } else { + int64_t constant_val; + if (val.IsNull()) { + constant_val = is_offset ? 0 : NumericLimits::Maximum(); + } else { + constant_val = val.GetValue(); + } + if (constant_val < 0) { + throw BinderException(expr->query_location, "LIMIT/OFFSET cannot be negative"); + } + return BoundLimitNode::ConstantValue(constant_val); + } } if (!new_binder->correlated_columns.empty()) { throw BinderException("Correlated columns not supported in LIMIT/OFFSET"); } // move any correlated columns to this binder MoveCorrelatedExpressions(*new_binder); - return expr; + if (is_percentage) { + return BoundLimitNode::ExpressionPercentage(std::move(expr)); + } else { + return BoundLimitNode::ExpressionValue(std::move(expr)); + } } duckdb::unique_ptr Binder::BindLimit(OrderBinder &order_binder, LimitModifier &limit_mod) { auto result = make_uniq(); if (limit_mod.limit) { - Value val; - result->limit = BindDelimiter(context, order_binder, std::move(limit_mod.limit), LogicalType::BIGINT, val); - if (!result->limit) { - result->limit_val = val.IsNull() ? NumericLimits::Maximum() : val.GetValue(); - if (result->limit_val < 0) { - throw BinderException("LIMIT cannot be negative"); - } - } + result->limit_val = BindLimitValue(order_binder, std::move(limit_mod.limit), false, false); } if (limit_mod.offset) { - Value val; - result->offset = BindDelimiter(context, order_binder, std::move(limit_mod.offset), LogicalType::BIGINT, val); - if (!result->offset) { - result->offset_val = val.IsNull() ? 0 : val.GetValue(); - if (result->offset_val < 0) { - throw BinderException("OFFSET cannot be negative"); - } - } + result->offset_val = BindLimitValue(order_binder, std::move(limit_mod.offset), false, true); } return std::move(result); } unique_ptr Binder::BindLimitPercent(OrderBinder &order_binder, LimitPercentModifier &limit_mod) { - auto result = make_uniq(); + auto result = make_uniq(); if (limit_mod.limit) { - Value val; - result->limit = BindDelimiter(context, order_binder, std::move(limit_mod.limit), LogicalType::DOUBLE, val); - if (!result->limit) { - result->limit_percent = val.IsNull() ? 100 : val.GetValue(); - if (result->limit_percent < 0.0) { - throw InvalidInputException("Limit percentage can't be negative value"); - } - } + result->limit_val = BindLimitValue(order_binder, std::move(limit_mod.limit), true, false); } if (limit_mod.offset) { - Value val; - result->offset = BindDelimiter(context, order_binder, std::move(limit_mod.offset), LogicalType::BIGINT, val); - if (!result->offset) { - result->offset_val = val.IsNull() ? 0 : val.GetValue(); - } + result->offset_val = BindLimitValue(order_binder, std::move(limit_mod.offset), false, true); } return std::move(result); } @@ -266,14 +274,8 @@ void Binder::BindModifierTypes(BoundQueryNode &result, const vector } case ResultModifierType::LIMIT_MODIFIER: { auto &limit = bound_mod->Cast(); - AssignReturnType(limit.limit, sql_types); - AssignReturnType(limit.offset, sql_types); - break; - } - case ResultModifierType::LIMIT_PERCENT_MODIFIER: { - auto &limit = bound_mod->Cast(); - AssignReturnType(limit.limit, sql_types); - AssignReturnType(limit.offset, sql_types); + AssignReturnType(limit.limit_val.GetExpression(), sql_types); + AssignReturnType(limit.offset_val.GetExpression(), sql_types); break; } case ResultModifierType::ORDER_MODIFIER: { diff --git a/src/planner/binder/query_node/plan_query_node.cpp b/src/planner/binder/query_node/plan_query_node.cpp index ff29a54829c9..97678d2271e0 100644 --- a/src/planner/binder/query_node/plan_query_node.cpp +++ b/src/planner/binder/query_node/plan_query_node.cpp @@ -2,7 +2,6 @@ #include "duckdb/planner/binder.hpp" #include "duckdb/planner/operator/logical_distinct.hpp" #include "duckdb/planner/operator/logical_limit.hpp" -#include "duckdb/planner/operator/logical_limit_percent.hpp" #include "duckdb/planner/operator/logical_order.hpp" #include "duckdb/planner/bound_result_modifier.hpp" @@ -38,16 +37,7 @@ unique_ptr Binder::VisitQueryNode(BoundQueryNode &node, unique_ } case ResultModifierType::LIMIT_MODIFIER: { auto &bound = mod->Cast(); - auto limit = make_uniq(bound.limit_val, bound.offset_val, std::move(bound.limit), - std::move(bound.offset)); - limit->AddChild(std::move(root)); - root = std::move(limit); - break; - } - case ResultModifierType::LIMIT_PERCENT_MODIFIER: { - auto &bound = mod->Cast(); - auto limit = make_uniq(bound.limit_percent, bound.offset_val, std::move(bound.limit), - std::move(bound.offset)); + auto limit = make_uniq(std::move(bound.limit_val), std::move(bound.offset_val)); limit->AddChild(std::move(root)); root = std::move(limit); break; diff --git a/src/planner/binder/query_node/plan_subquery.cpp b/src/planner/binder/query_node/plan_subquery.cpp index d2ad056bb3fb..7370457a6031 100644 --- a/src/planner/binder/query_node/plan_subquery.cpp +++ b/src/planner/binder/query_node/plan_subquery.cpp @@ -29,7 +29,7 @@ static unique_ptr PlanUncorrelatedSubquery(Binder &binder, BoundSubq case SubqueryType::EXISTS: { // uncorrelated EXISTS // we only care about existence, hence we push a LIMIT 1 operator - auto limit = make_uniq(1, 0, nullptr, nullptr); + auto limit = make_uniq(BoundLimitNode::ConstantValue(1), BoundLimitNode()); limit->AddChild(std::move(plan)); plan = std::move(limit); @@ -79,7 +79,7 @@ static unique_ptr PlanUncorrelatedSubquery(Binder &binder, BoundSubq // in the uncorrelated case we are only interested in the first result of the query // hence we simply push a LIMIT 1 to get the first row of the subquery - auto limit = make_uniq(1, 0, nullptr, nullptr); + auto limit = make_uniq(BoundLimitNode::ConstantValue(1), BoundLimitNode()); limit->AddChild(std::move(plan)); plan = std::move(limit); diff --git a/src/planner/bound_result_modifier.cpp b/src/planner/bound_result_modifier.cpp index dea41088a680..9a1728ed71d0 100644 --- a/src/planner/bound_result_modifier.cpp +++ b/src/planner/bound_result_modifier.cpp @@ -116,6 +116,73 @@ bool BoundOrderModifier::Simplify(const vector> &groups) return orders.empty(); } +BoundLimitNode::BoundLimitNode(LimitNodeType type, idx_t constant_integer, double constant_percentage, + unique_ptr expression_p) + : type(type), constant_integer(constant_integer), constant_percentage(constant_percentage), + expression(std::move(expression_p)) { +} + +BoundLimitNode::BoundLimitNode() : type(LimitNodeType::UNSET) { +} + +BoundLimitNode::BoundLimitNode(int64_t constant_value) + : type(LimitNodeType::CONSTANT_VALUE), constant_integer(constant_value) { +} + +BoundLimitNode::BoundLimitNode(double percentage_value) + : type(LimitNodeType::CONSTANT_PERCENTAGE), constant_percentage(percentage_value) { +} + +BoundLimitNode::BoundLimitNode(unique_ptr expression_p, bool is_percentage) + : type(is_percentage ? LimitNodeType::EXPRESSION_PERCENTAGE : LimitNodeType::EXPRESSION_VALUE), + expression(std::move(expression_p)) { +} + +BoundLimitNode BoundLimitNode::ConstantValue(int64_t value) { + return BoundLimitNode(value); +} + +BoundLimitNode BoundLimitNode::ConstantPercentage(double percentage) { + return BoundLimitNode(percentage); +} + +BoundLimitNode BoundLimitNode::ExpressionValue(unique_ptr expression) { + return BoundLimitNode(std::move(expression), false); +} + +BoundLimitNode BoundLimitNode::ExpressionPercentage(unique_ptr expression) { + return BoundLimitNode(std::move(expression), true); +} + +idx_t BoundLimitNode::GetConstantValue() const { + if (Type() != LimitNodeType::CONSTANT_VALUE) { + throw InternalException("BoundLimitNode::GetConstantValue called but limit is not a constant value"); + } + return constant_integer; +} + +double BoundLimitNode::GetConstantPercentage() const { + if (Type() != LimitNodeType::CONSTANT_PERCENTAGE) { + throw InternalException("BoundLimitNode::GetConstantPercentage called but limit is not a constant percentage"); + } + return constant_percentage; +} + +const Expression &BoundLimitNode::GetValueExpression() const { + if (Type() != LimitNodeType::EXPRESSION_VALUE) { + throw InternalException("BoundLimitNode::GetValueExpression called but limit is not an expression value"); + } + return *expression; +} + +const Expression &BoundLimitNode::GetPercentageExpression() const { + if (Type() != LimitNodeType::EXPRESSION_PERCENTAGE) { + throw InternalException( + "BoundLimitNode::GetPercentageExpression called but limit is not an expression percentage"); + } + return *expression; +} + BoundLimitModifier::BoundLimitModifier() : BoundResultModifier(ResultModifierType::LIMIT_MODIFIER) { } @@ -125,8 +192,4 @@ BoundOrderModifier::BoundOrderModifier() : BoundResultModifier(ResultModifierTyp BoundDistinctModifier::BoundDistinctModifier() : BoundResultModifier(ResultModifierType::DISTINCT_MODIFIER) { } -BoundLimitPercentModifier::BoundLimitPercentModifier() - : BoundResultModifier(ResultModifierType::LIMIT_PERCENT_MODIFIER) { -} - } // namespace duckdb diff --git a/src/planner/logical_operator_visitor.cpp b/src/planner/logical_operator_visitor.cpp index e692473f1fac..367fb5f20438 100644 --- a/src/planner/logical_operator_visitor.cpp +++ b/src/planner/logical_operator_visitor.cpp @@ -87,21 +87,11 @@ void LogicalOperatorVisitor::EnumerateExpressions(LogicalOperator &op, } case LogicalOperatorType::LOGICAL_LIMIT: { auto &limit = op.Cast(); - if (limit.limit) { - callback(&limit.limit); + if (limit.limit_val.GetExpression()) { + callback(&limit.limit_val.GetExpression()); } - if (limit.offset) { - callback(&limit.offset); - } - break; - } - case LogicalOperatorType::LOGICAL_LIMIT_PERCENT: { - auto &limit = op.Cast(); - if (limit.limit) { - callback(&limit.limit); - } - if (limit.offset) { - callback(&limit.offset); + if (limit.offset_val.GetExpression()) { + callback(&limit.offset_val.GetExpression()); } break; } diff --git a/src/planner/operator/CMakeLists.txt b/src/planner/operator/CMakeLists.txt index d2350edc3de3..2823f1cee6c7 100644 --- a/src/planner/operator/CMakeLists.txt +++ b/src/planner/operator/CMakeLists.txt @@ -25,7 +25,6 @@ add_library_unity( logical_insert.cpp logical_join.cpp logical_limit.cpp - logical_limit_percent.cpp logical_order.cpp logical_pivot.cpp logical_positional_join.cpp diff --git a/src/planner/operator/logical_limit.cpp b/src/planner/operator/logical_limit.cpp index e413078aa970..332da4ab55e0 100644 --- a/src/planner/operator/logical_limit.cpp +++ b/src/planner/operator/logical_limit.cpp @@ -2,10 +2,9 @@ namespace duckdb { -LogicalLimit::LogicalLimit(int64_t limit_val, int64_t offset_val, unique_ptr limit, - unique_ptr offset) - : LogicalOperator(LogicalOperatorType::LOGICAL_LIMIT), limit_val(limit_val), offset_val(offset_val), - limit(std::move(limit)), offset(std::move(offset)) { +LogicalLimit::LogicalLimit(BoundLimitNode limit_val, BoundLimitNode offset_val) + : LogicalOperator(LogicalOperatorType::LOGICAL_LIMIT), limit_val(std::move(limit_val)), + offset_val(std::move(offset_val)) { } vector LogicalLimit::GetColumnBindings() { @@ -14,8 +13,17 @@ vector LogicalLimit::GetColumnBindings() { idx_t LogicalLimit::EstimateCardinality(ClientContext &context) { auto child_cardinality = children[0]->EstimateCardinality(context); - if (limit_val >= 0 && idx_t(limit_val) < child_cardinality) { - child_cardinality = limit_val; + switch (limit_val.Type()) { + case LimitNodeType::CONSTANT_VALUE: + if (limit_val.GetConstantValue() < child_cardinality) { + child_cardinality = limit_val.GetConstantValue(); + } + break; + case LimitNodeType::CONSTANT_PERCENTAGE: + child_cardinality = idx_t(double(child_cardinality) * limit_val.GetConstantPercentage()); + break; + default: + break; } return child_cardinality; } diff --git a/src/planner/operator/logical_limit_percent.cpp b/src/planner/operator/logical_limit_percent.cpp deleted file mode 100644 index c020ac4b56a9..000000000000 --- a/src/planner/operator/logical_limit_percent.cpp +++ /dev/null @@ -1,14 +0,0 @@ -#include "duckdb/planner/operator/logical_limit_percent.hpp" -#include - -namespace duckdb { - -idx_t LogicalLimitPercent::EstimateCardinality(ClientContext &context) { - auto child_cardinality = LogicalOperator::EstimateCardinality(context); - if ((limit_percent < 0 || limit_percent > 100) || std::isnan(limit_percent)) { - return child_cardinality; - } - return idx_t(child_cardinality * (limit_percent / 100.0)); -} - -} // namespace duckdb diff --git a/src/planner/subquery/flatten_dependent_join.cpp b/src/planner/subquery/flatten_dependent_join.cpp index 6720e8ad3c15..b5bd370d6e47 100644 --- a/src/planner/subquery/flatten_dependent_join.cpp +++ b/src/planner/subquery/flatten_dependent_join.cpp @@ -447,8 +447,26 @@ unique_ptr FlattenDependentJoins::PushDownDependentJoinInternal } case LogicalOperatorType::LOGICAL_LIMIT: { auto &limit = plan->Cast(); - if (limit.limit || limit.offset) { - throw ParserException("Non-constant limit or offset not supported in correlated subquery"); + switch (limit.limit_val.Type()) { + case LimitNodeType::CONSTANT_PERCENTAGE: + case LimitNodeType::EXPRESSION_PERCENTAGE: + // NOTE: limit percent could be supported in a manner similar to the LIMIT above + // but instead of filtering by an exact number of rows, the limit should be expressed as + // COUNT computed over the partition multiplied by the percentage + throw ParserException("Limit percent operator not supported in correlated subquery"); + case LimitNodeType::EXPRESSION_VALUE: + throw ParserException("Non-constant limit not supported in correlated subquery"); + default: + break; + } + switch (limit.offset_val.Type()) { + case LimitNodeType::EXPRESSION_VALUE: + throw ParserException("Non-constant offset not supported in correlated subquery"); + case LimitNodeType::CONSTANT_PERCENTAGE: + case LimitNodeType::EXPRESSION_PERCENTAGE: + throw InternalException("Percentage offset in FlattenDependentJoin"); + default: + break; } auto rownum_alias = "limit_rownum"; unique_ptr child; @@ -495,19 +513,34 @@ unique_ptr FlattenDependentJoins::PushDownDependentJoinInternal auto row_num_ref = make_uniq(rownum_alias, LogicalType::BIGINT, ColumnBinding(window_index, 0)); - int64_t upper_bound_limit = NumericLimits::Maximum(); - TryAddOperator::Operation(limit.offset_val, limit.limit_val, upper_bound_limit); - auto upper_bound = make_uniq(Value::BIGINT(upper_bound_limit)); - condition = make_uniq(ExpressionType::COMPARE_LESSTHANOREQUALTO, row_num_ref->Copy(), - std::move(upper_bound)); + if (limit.limit_val.Type() == LimitNodeType::CONSTANT_VALUE) { + auto upper_bound_limit = NumericLimits::Maximum(); + auto limit_val = int64_t(limit.limit_val.GetConstantValue()); + if (limit.offset_val.Type() == LimitNodeType::CONSTANT_VALUE) { + // both offset and limit specified - upper bound is offset + limit + auto offset_val = int64_t(limit.offset_val.GetConstantValue()); + TryAddOperator::Operation(limit_val, offset_val, upper_bound_limit); + } else { + // no offset - upper bound is only the limit + upper_bound_limit = limit_val; + } + auto upper_bound = make_uniq(Value::BIGINT(upper_bound_limit)); + condition = make_uniq(ExpressionType::COMPARE_LESSTHANOREQUALTO, + row_num_ref->Copy(), std::move(upper_bound)); + } // we only need to add "row_number >= offset + 1" if offset is bigger than 0 - if (limit.offset_val > 0) { - auto lower_bound = make_uniq(Value::BIGINT(limit.offset_val)); + if (limit.offset_val.Type() == LimitNodeType::CONSTANT_VALUE) { + auto offset_val = int64_t(limit.offset_val.GetConstantValue()); + auto lower_bound = make_uniq(Value::BIGINT(offset_val)); auto lower_comp = make_uniq(ExpressionType::COMPARE_GREATERTHAN, row_num_ref->Copy(), std::move(lower_bound)); - auto conj = make_uniq(ExpressionType::CONJUNCTION_AND, std::move(lower_comp), - std::move(condition)); - condition = std::move(conj); + if (condition) { + auto conj = make_uniq(ExpressionType::CONJUNCTION_AND, + std::move(lower_comp), std::move(condition)); + condition = std::move(conj); + } else { + condition = std::move(lower_comp); + } } filter->expressions.push_back(std::move(condition)); filter->children.push_back(std::move(window)); @@ -517,12 +550,6 @@ unique_ptr FlattenDependentJoins::PushDownDependentJoinInternal } return std::move(filter); } - case LogicalOperatorType::LOGICAL_LIMIT_PERCENT: { - // NOTE: limit percent could be supported in a manner similar to the LIMIT above - // but instead of filtering by an exact number of rows, the limit should be expressed as - // COUNT computed over the partition multiplied by the percentage - throw ParserException("Limit percent operator not supported in correlated subquery"); - } case LogicalOperatorType::LOGICAL_WINDOW: { auto &window = plan->Cast(); // push into children diff --git a/src/storage/serialization/serialize_logical_operator.cpp b/src/storage/serialization/serialize_logical_operator.cpp index fd061c1dbb80..1c155485c4a9 100644 --- a/src/storage/serialization/serialize_logical_operator.cpp +++ b/src/storage/serialization/serialize_logical_operator.cpp @@ -124,9 +124,6 @@ unique_ptr LogicalOperator::Deserialize(Deserializer &deseriali case LogicalOperatorType::LOGICAL_LIMIT: result = LogicalLimit::Deserialize(deserializer); break; - case LogicalOperatorType::LOGICAL_LIMIT_PERCENT: - result = LogicalLimitPercent::Deserialize(deserializer); - break; case LogicalOperatorType::LOGICAL_LOAD: result = LogicalSimple::Deserialize(deserializer); break; @@ -506,35 +503,14 @@ unique_ptr LogicalInsert::Deserialize(Deserializer &deserialize void LogicalLimit::Serialize(Serializer &serializer) const { LogicalOperator::Serialize(serializer); - serializer.WritePropertyWithDefault(200, "limit_val", limit_val); - serializer.WritePropertyWithDefault(201, "offset_val", offset_val); - serializer.WritePropertyWithDefault>(202, "limit", limit); - serializer.WritePropertyWithDefault>(203, "offset", offset); + serializer.WriteProperty(200, "limit_val", limit_val); + serializer.WriteProperty(201, "offset_val", offset_val); } unique_ptr LogicalLimit::Deserialize(Deserializer &deserializer) { - auto limit_val = deserializer.ReadPropertyWithDefault(200, "limit_val"); - auto offset_val = deserializer.ReadPropertyWithDefault(201, "offset_val"); - auto limit = deserializer.ReadPropertyWithDefault>(202, "limit"); - auto offset = deserializer.ReadPropertyWithDefault>(203, "offset"); - auto result = duckdb::unique_ptr(new LogicalLimit(limit_val, offset_val, std::move(limit), std::move(offset))); - return std::move(result); -} - -void LogicalLimitPercent::Serialize(Serializer &serializer) const { - LogicalOperator::Serialize(serializer); - serializer.WriteProperty(200, "limit_percent", limit_percent); - serializer.WritePropertyWithDefault(201, "offset_val", offset_val); - serializer.WritePropertyWithDefault>(202, "limit", limit); - serializer.WritePropertyWithDefault>(203, "offset", offset); -} - -unique_ptr LogicalLimitPercent::Deserialize(Deserializer &deserializer) { - auto limit_percent = deserializer.ReadProperty(200, "limit_percent"); - auto offset_val = deserializer.ReadPropertyWithDefault(201, "offset_val"); - auto limit = deserializer.ReadPropertyWithDefault>(202, "limit"); - auto offset = deserializer.ReadPropertyWithDefault>(203, "offset"); - auto result = duckdb::unique_ptr(new LogicalLimitPercent(limit_percent, offset_val, std::move(limit), std::move(offset))); + auto limit_val = deserializer.ReadProperty(200, "limit_val"); + auto offset_val = deserializer.ReadProperty(201, "offset_val"); + auto result = duckdb::unique_ptr(new LogicalLimit(std::move(limit_val), std::move(offset_val))); return std::move(result); } diff --git a/src/storage/serialization/serialize_nodes.cpp b/src/storage/serialization/serialize_nodes.cpp index 3717b5622823..2be3e4e5f002 100644 --- a/src/storage/serialization/serialize_nodes.cpp +++ b/src/storage/serialization/serialize_nodes.cpp @@ -45,6 +45,22 @@ BoundCaseCheck BoundCaseCheck::Deserialize(Deserializer &deserializer) { return result; } +void BoundLimitNode::Serialize(Serializer &serializer) const { + serializer.WriteProperty(100, "type", type); + serializer.WritePropertyWithDefault(101, "constant_integer", constant_integer); + serializer.WriteProperty(102, "constant_percentage", constant_percentage); + serializer.WritePropertyWithDefault>(103, "expression", expression); +} + +BoundLimitNode BoundLimitNode::Deserialize(Deserializer &deserializer) { + auto type = deserializer.ReadProperty(100, "type"); + auto constant_integer = deserializer.ReadPropertyWithDefault(101, "constant_integer"); + auto constant_percentage = deserializer.ReadProperty(102, "constant_percentage"); + auto expression = deserializer.ReadPropertyWithDefault>(103, "expression"); + BoundLimitNode result(type, constant_integer, constant_percentage, std::move(expression)); + return result; +} + void BoundOrderByNode::Serialize(Serializer &serializer) const { serializer.WriteProperty(100, "type", type); serializer.WriteProperty(101, "null_order", null_order); diff --git a/test/api/serialized_plans/serialized_plans.binary b/test/api/serialized_plans/serialized_plans.binary index 996645e3e423..42a316d52da3 100644 Binary files a/test/api/serialized_plans/serialized_plans.binary and b/test/api/serialized_plans/serialized_plans.binary differ diff --git a/test/fuzzer/pedro/setop_type_mismatch.test b/test/fuzzer/pedro/setop_type_mismatch.test index 12ff24ee61a8..3c58498db60d 100644 --- a/test/fuzzer/pedro/setop_type_mismatch.test +++ b/test/fuzzer/pedro/setop_type_mismatch.test @@ -12,7 +12,17 @@ statement ok INSERT INTO t1 (c1) VALUES (1); query I -SELECT ((SELECT 0) UNION (SELECT t1.c0) OFFSET 1) FROM t1; +SELECT ((SELECT 0) UNION (SELECT t1.c0) OFFSET 2) FROM t1; +---- +NULL + +query I +SELECT ((SELECT 0) UNION (SELECT t1.c0) ORDER BY 1 NULLS FIRST OFFSET 1) FROM t1; +---- +0 + +query I +SELECT ((SELECT 0) UNION (SELECT t1.c0) ORDER BY 1 NULLS LAST OFFSET 1) FROM t1; ---- NULL diff --git a/test/sql/subquery/scalar/test_correlated_subquery.test b/test/sql/subquery/scalar/test_correlated_subquery.test index dc8cb558590b..da27f83ad4a3 100644 --- a/test/sql/subquery/scalar/test_correlated_subquery.test +++ b/test/sql/subquery/scalar/test_correlated_subquery.test @@ -9,13 +9,10 @@ statement ok PRAGMA enable_verification statement ok -CREATE TABLE integers(i INTEGER) +CREATE TABLE integers(i INTEGER); statement ok -INSERT INTO integers VALUES (1), (2), (3), (NULL) - -# temporary solution until decorrelation of materialized CTEs is implemented -require noalternativeverify +INSERT INTO integers VALUES (1), (2), (3), (NULL); # scalar select with correlation query II @@ -98,6 +95,21 @@ NULL NULL 2 3 3 4 +# subquery with OFFSET and without LIMIT +query I +select (select val + i from generate_series(1, 2, 1) t(i) offset 1) from (select 42 val) t; +---- +44 + +# subquery with OFFSET and without LIMIT with ORDER BY +query II +select i, (select i1.i + i + i from generate_series(1, 100, 1) t(i) ORDER BY i DESC OFFSET 99) from integers i1 order by i; +---- +NULL NULL +1 3 +2 4 +3 5 + # subquery with ORDER BY query II SELECT i, (SELECT i+i1.i FROM integers ORDER BY i NULLS LAST LIMIT 1) AS j FROM integers i1 ORDER BY i;