From 55f3a7e8cc045a59fa0f2eef72f2488b9c26ae9c Mon Sep 17 00:00:00 2001 From: Denis Hirn Date: Thu, 2 May 2024 14:11:24 +0200 Subject: [PATCH] Add InsertionOrderPreservingMap for CTEs This commit adds an insertion order preserving map, while keeping the serialization format of a regular map. --- extension/sqlsmith/statement_generator.cpp | 2 +- .../common/insertion_order_preserving_map.hpp | 39 +++++++++++++++++++ .../duckdb/common/serializer/deserializer.hpp | 19 +++++++++ .../serializer/serialization_traits.hpp | 19 +++++++++ .../duckdb/common/serializer/serializer.hpp | 17 ++++++++ src/include/duckdb/parser/query_node.hpp | 5 +-- .../duckdb/storage/serialization/nodes.json | 7 +--- src/parser/query_node.cpp | 14 +++---- .../transform/helpers/transform_cte.cpp | 14 +++---- src/parser/transformer.cpp | 6 +-- src/planner/binder.cpp | 4 +- src/storage/serialization/serialize_nodes.cpp | 6 +-- 12 files changed, 115 insertions(+), 37 deletions(-) create mode 100644 src/include/duckdb/common/insertion_order_preserving_map.hpp diff --git a/extension/sqlsmith/statement_generator.cpp b/extension/sqlsmith/statement_generator.cpp index 1074ceaf8959..d7e9d15124f1 100644 --- a/extension/sqlsmith/statement_generator.cpp +++ b/extension/sqlsmith/statement_generator.cpp @@ -175,7 +175,7 @@ void StatementGenerator::GenerateCTEs(QueryNode &node) { for (idx_t i = 0; i < 1 + RandomValue(10); i++) { cte->aliases.push_back(GenerateIdentifier()); } - node.cte_map.map_idx[GenerateTableIdentifier()] = node.cte_map.map.size(); + node.cte_map.map.map_idx[GenerateTableIdentifier()] = node.cte_map.map.size(); node.cte_map.map.push_back(std::move(cte)); } } diff --git a/src/include/duckdb/common/insertion_order_preserving_map.hpp b/src/include/duckdb/common/insertion_order_preserving_map.hpp new file mode 100644 index 000000000000..01084290211c --- /dev/null +++ b/src/include/duckdb/common/insertion_order_preserving_map.hpp @@ -0,0 +1,39 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/insertion_order_preserving_map.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb/common/unordered_map.hpp" +#include "duckdb/common/unordered_set.hpp" +#include "duckdb/common/string.hpp" +#include "duckdb/common/string_util.hpp" +#include "duckdb/common/helper.hpp" +#include "duckdb/common/case_insensitive_map.hpp" + +namespace duckdb { + +template +class InsertionOrderPreservingMap : public vector { +public: + InsertionOrderPreservingMap() { + } + case_insensitive_map_t map_idx; + +public: + vector Keys() const { + vector keys; + keys.resize(this->size()); + for (auto &kv : map_idx) { + keys[kv.second] = kv.first; + } + + return keys; + } +}; + +} // namespace duckdb diff --git a/src/include/duckdb/common/serializer/deserializer.hpp b/src/include/duckdb/common/serializer/deserializer.hpp index e4096878a810..c5d88a07e417 100644 --- a/src/include/duckdb/common/serializer/deserializer.hpp +++ b/src/include/duckdb/common/serializer/deserializer.hpp @@ -302,6 +302,25 @@ class Deserializer { return map; } + template + inline typename std::enable_if::value, T>::type Read() { + using KEY_TYPE = typename is_insertion_preserving_map::KEY_TYPE; + using VALUE_TYPE = typename is_insertion_preserving_map::VALUE_TYPE; + + T map; + auto size = OnListBegin(); + for (idx_t i = 0; i < size; i++) { + OnObjectBegin(); + auto key = ReadProperty(0, "key"); + auto value = ReadProperty(1, "value"); + OnObjectEnd(); + map.push_back(std::move(value)); + map.map_idx[key] = i; + } + OnListEnd(); + return map; + } + // Deserialize an unordered set template inline typename std::enable_if::value, T>::type Read() { diff --git a/src/include/duckdb/common/serializer/serialization_traits.hpp b/src/include/duckdb/common/serializer/serialization_traits.hpp index 616d90f2320f..83c85e07df23 100644 --- a/src/include/duckdb/common/serializer/serialization_traits.hpp +++ b/src/include/duckdb/common/serializer/serialization_traits.hpp @@ -12,6 +12,7 @@ #include "duckdb/common/unique_ptr.hpp" #include "duckdb/common/optional_ptr.hpp" #include "duckdb/common/optional_idx.hpp" +#include "duckdb/common/insertion_order_preserving_map.hpp" namespace duckdb { @@ -92,6 +93,14 @@ struct is_map> : std::true_type { typedef typename std::tuple_element<3, std::tuple>::type EQUAL_TYPE; }; +template +struct is_insertion_preserving_map : std::false_type {}; +template +struct is_insertion_preserving_map> : std::true_type { + typedef typename std::tuple_element<0, std::tuple>::type KEY_TYPE; + typedef typename std::tuple_element<1, std::tuple>::type VALUE_TYPE; +}; + template struct is_unique_ptr : std::false_type {}; template @@ -253,6 +262,16 @@ struct SerializationDefaultValue { return value.empty(); } + template + static inline typename std::enable_if::value, T>::type GetDefault() { + return T(); + } + + template + static inline bool IsDefault(const typename std::enable_if::value, T>::type &value) { + return value.empty(); + } + template static inline typename std::enable_if::value, T>::type GetDefault() { return T(); diff --git a/src/include/duckdb/common/serializer/serializer.hpp b/src/include/duckdb/common/serializer/serializer.hpp index f791b4a892df..88807cb38b6e 100644 --- a/src/include/duckdb/common/serializer/serializer.hpp +++ b/src/include/duckdb/common/serializer/serializer.hpp @@ -18,6 +18,7 @@ #include "duckdb/common/optional_idx.hpp" #include "duckdb/common/value_operations/value_operations.hpp" #include "duckdb/execution/operator/csv_scanner/csv_option.hpp" +#include "duckdb/common/insertion_order_preserving_map.hpp" namespace duckdb { @@ -259,6 +260,22 @@ class Serializer { OnListEnd(); } + // Map + // serialized as a list of pairs + template + void WriteValue(const duckdb::InsertionOrderPreservingMap &map) { + auto count = map.size(); + auto keys = map.Keys(); + OnListBegin(count); + for (idx_t i = 0; i < count; i++) { + OnObjectBegin(); + WriteProperty(0, "key", keys[i]); + WriteProperty(1, "value", map[i]); + OnObjectEnd(); + } + OnListEnd(); + } + // class or struct implementing `Serialize(Serializer& Serializer)`; template typename std::enable_if::value>::type WriteValue(const T &value) { diff --git a/src/include/duckdb/parser/query_node.hpp b/src/include/duckdb/parser/query_node.hpp index 74187940268d..e9da26a1fb64 100644 --- a/src/include/duckdb/parser/query_node.hpp +++ b/src/include/duckdb/parser/query_node.hpp @@ -12,7 +12,7 @@ #include "duckdb/parser/parsed_expression.hpp" #include "duckdb/parser/result_modifier.hpp" #include "duckdb/parser/common_table_expression_info.hpp" -#include "duckdb/common/case_insensitive_map.hpp" +#include "duckdb/common/insertion_order_preserving_map.hpp" #include "duckdb/common/exception.hpp" namespace duckdb { @@ -34,8 +34,7 @@ class CommonTableExpressionMap { public: CommonTableExpressionMap(); - vector> map; - case_insensitive_map_t map_idx; + InsertionOrderPreservingMap> map; public: string ToString() const; diff --git a/src/include/duckdb/storage/serialization/nodes.json b/src/include/duckdb/storage/serialization/nodes.json index 3d7372342d87..37a84d723f45 100644 --- a/src/include/duckdb/storage/serialization/nodes.json +++ b/src/include/duckdb/storage/serialization/nodes.json @@ -54,12 +54,7 @@ { "id": 100, "name": "map", - "type": "vector" - }, - { - "id": 101, - "name": "map_idx", - "type": "case_insensitive_map_t" + "type": "InsertionOrderPreservingMap" } ], "pointer_type": "none" diff --git a/src/parser/query_node.cpp b/src/parser/query_node.cpp index f1b84eed3942..1c87b6d4a452 100644 --- a/src/parser/query_node.cpp +++ b/src/parser/query_node.cpp @@ -16,7 +16,7 @@ CommonTableExpressionMap::CommonTableExpressionMap() { CommonTableExpressionMap CommonTableExpressionMap::Copy() const { CommonTableExpressionMap res; res.map.resize(this->map.size()); - for (auto &kv_idx : this->map_idx) { + for (auto &kv_idx : this->map.map_idx) { auto kv_info = make_uniq(); auto &kv = this->map.at(kv_idx.second); for (auto &al : kv->aliases) { @@ -25,7 +25,7 @@ CommonTableExpressionMap CommonTableExpressionMap::Copy() const { kv_info->query = unique_ptr_cast(kv->query->Copy()); kv_info->materialized = kv->materialized; res.map[kv_idx.second] = std::move(kv_info); - res.map_idx[kv_idx.first] = kv_idx.second; + res.map.map_idx[kv_idx.first] = kv_idx.second; } return res; @@ -49,11 +49,7 @@ string CommonTableExpressionMap::ToString() const { } bool first_cte = true; - vector names; - names.resize(map.size()); - for (auto &kv : map_idx) { - names[kv.second] = kv.first; - } + vector names = map.Keys(); for (idx_t i = 0; i < map.size(); i++) { auto &kv = map[i]; @@ -163,7 +159,7 @@ void QueryNode::CopyProperties(QueryNode &other) const { other.modifiers.push_back(modifier->Copy()); } other.cte_map.map.resize(cte_map.map.size()); - for (auto &kv_idx : cte_map.map_idx) { + for (auto &kv_idx : cte_map.map.map_idx) { auto &kv = cte_map.map[kv_idx.second]; auto kv_info = make_uniq(); for (auto &al : kv->aliases) { @@ -172,7 +168,7 @@ void QueryNode::CopyProperties(QueryNode &other) const { kv_info->query = unique_ptr_cast(kv->query->Copy()); kv_info->materialized = kv->materialized; other.cte_map.map[kv_idx.second] = std::move(kv_info); - other.cte_map.map_idx[kv_idx.first] = kv_idx.second; + other.cte_map.map.map_idx[kv_idx.first] = kv_idx.second; } } diff --git a/src/parser/transform/helpers/transform_cte.cpp b/src/parser/transform/helpers/transform_cte.cpp index 16b732674b1a..81df4f101723 100644 --- a/src/parser/transform/helpers/transform_cte.cpp +++ b/src/parser/transform/helpers/transform_cte.cpp @@ -17,13 +17,13 @@ unique_ptr CommonTableExpressionInfo::Copy() { void Transformer::ExtractCTEsRecursive(CommonTableExpressionMap &cte_map) { for (auto &cte_entry : stored_cte_map) { - for (auto &entry : cte_entry->map_idx) { - auto found_entry = cte_map.map_idx.find(entry.first); - if (found_entry != cte_map.map_idx.end()) { + for (auto &entry : cte_entry->map.map_idx) { + auto found_entry = cte_map.map.map_idx.find(entry.first); + if (found_entry != cte_map.map.map_idx.end()) { // entry already present - use top-most entry continue; } - cte_map.map_idx[entry.first] = cte_map.map.size(); + cte_map.map.map_idx[entry.first] = cte_map.map.size(); cte_map.map.push_back(cte_entry->map[entry.second]->Copy()); } } @@ -77,8 +77,8 @@ void Transformer::TransformCTE(duckdb_libpgquery::PGWithClause &de_with_clause, D_ASSERT(info->query); auto cte_name = string(cte.ctename); - auto it = cte_map.map_idx.find(cte_name); - if (it != cte_map.map_idx.end()) { + auto it = cte_map.map.map_idx.find(cte_name); + if (it != cte_map.map.map_idx.end()) { // can't have two CTEs with same name throw ParserException("Duplicate CTE name \"%s\"", cte_name); } @@ -91,7 +91,7 @@ void Transformer::TransformCTE(duckdb_libpgquery::PGWithClause &de_with_clause, info->materialized = CTEMaterialize::CTE_MATERIALIZE_ALWAYS; } - cte_map.map_idx[cte_name] = cte_map.map.size(); + cte_map.map.map_idx[cte_name] = cte_map.map.size(); cte_map.map.push_back(std::move(info)); } } diff --git a/src/parser/transformer.cpp b/src/parser/transformer.cpp index 924eb155d2a6..64e965db1d42 100644 --- a/src/parser/transformer.cpp +++ b/src/parser/transformer.cpp @@ -225,11 +225,7 @@ unique_ptr Transformer::TransformStatementInternal(duckdb_libpgque unique_ptr Transformer::TransformMaterializedCTE(unique_ptr root) { // Extract materialized CTEs from cte_map vector> materialized_ctes; - vector names; - names.resize(root->cte_map.map_idx.size()); - for (auto &kv : root->cte_map.map_idx) { - names[kv.second] = kv.first; - } + vector names = root->cte_map.map.Keys(); for (idx_t i = 0; i < root->cte_map.map.size(); i++) { auto &cte = root->cte_map.map[i]; diff --git a/src/planner/binder.cpp b/src/planner/binder.cpp index a85182af94f4..0e8444a90cc4 100644 --- a/src/planner/binder.cpp +++ b/src/planner/binder.cpp @@ -72,7 +72,7 @@ unique_ptr Binder::BindMaterializedCTE(CommonTableExpressionMap &c vector> materialized_ctes; vector names; names.resize(cte_map.map.size()); - for (auto &kv : cte_map.map_idx) { + for (auto &kv : cte_map.map.map_idx) { names[kv.second] = kv.first; } @@ -201,7 +201,7 @@ BoundStatement Binder::Bind(SQLStatement &statement) { } void Binder::AddCTEMap(CommonTableExpressionMap &cte_map) { - for (auto &cte_it : cte_map.map_idx) { + for (auto &cte_it : cte_map.map.map_idx) { AddCTE(cte_it.first, *cte_map.map[cte_it.second]); } } diff --git a/src/storage/serialization/serialize_nodes.cpp b/src/storage/serialization/serialize_nodes.cpp index 59da2558b4b2..1af866970dfc 100644 --- a/src/storage/serialization/serialize_nodes.cpp +++ b/src/storage/serialization/serialize_nodes.cpp @@ -272,14 +272,12 @@ unique_ptr CommonTableExpressionInfo::Deserialize(Des } void CommonTableExpressionMap::Serialize(Serializer &serializer) const { - serializer.WritePropertyWithDefault>>(100, "map", map); - serializer.WritePropertyWithDefault>(101, "map_idx", map_idx); + serializer.WritePropertyWithDefault>>(100, "map", map); } CommonTableExpressionMap CommonTableExpressionMap::Deserialize(Deserializer &deserializer) { CommonTableExpressionMap result; - deserializer.ReadPropertyWithDefault>>(100, "map", result.map); - deserializer.ReadPropertyWithDefault>(101, "map_idx", result.map_idx); + deserializer.ReadPropertyWithDefault>>(100, "map", result.map); return result; }