Skip to content

Commit

Permalink
Add InsertionOrderPreservingMap for CTEs
Browse files Browse the repository at this point in the history
This commit adds an insertion order preserving map,
while keeping the serialization format of a regular map.
  • Loading branch information
kryonix committed May 2, 2024
1 parent 370dfb2 commit 55f3a7e
Show file tree
Hide file tree
Showing 12 changed files with 115 additions and 37 deletions.
2 changes: 1 addition & 1 deletion extension/sqlsmith/statement_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ void StatementGenerator::GenerateCTEs(QueryNode &node) {
for (idx_t i = 0; i < 1 + RandomValue(10); i++) {
cte->aliases.push_back(GenerateIdentifier());
}
node.cte_map.map_idx[GenerateTableIdentifier()] = node.cte_map.map.size();
node.cte_map.map.map_idx[GenerateTableIdentifier()] = node.cte_map.map.size();
node.cte_map.map.push_back(std::move(cte));
}
}
Expand Down
39 changes: 39 additions & 0 deletions src/include/duckdb/common/insertion_order_preserving_map.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
//===----------------------------------------------------------------------===//
// DuckDB
//
// duckdb/common/insertion_order_preserving_map.hpp
//
//
//===----------------------------------------------------------------------===//

#pragma once

#include "duckdb/common/unordered_map.hpp"
#include "duckdb/common/unordered_set.hpp"
#include "duckdb/common/string.hpp"
#include "duckdb/common/string_util.hpp"
#include "duckdb/common/helper.hpp"
#include "duckdb/common/case_insensitive_map.hpp"

namespace duckdb {

template <typename K, typename V>
class InsertionOrderPreservingMap : public vector<V> {
public:
InsertionOrderPreservingMap() {
}
case_insensitive_map_t<K> map_idx;

public:
vector<string> Keys() const {
vector<string> keys;
keys.resize(this->size());
for (auto &kv : map_idx) {
keys[kv.second] = kv.first;
}

return keys;
}
};

} // namespace duckdb
19 changes: 19 additions & 0 deletions src/include/duckdb/common/serializer/deserializer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,25 @@ class Deserializer {
return map;
}

template <typename T = void>
inline typename std::enable_if<is_insertion_preserving_map<T>::value, T>::type Read() {
using KEY_TYPE = typename is_insertion_preserving_map<T>::KEY_TYPE;
using VALUE_TYPE = typename is_insertion_preserving_map<T>::VALUE_TYPE;

T map;
auto size = OnListBegin();
for (idx_t i = 0; i < size; i++) {
OnObjectBegin();
auto key = ReadProperty<string>(0, "key");
auto value = ReadProperty<VALUE_TYPE>(1, "value");
OnObjectEnd();
map.push_back(std::move(value));
map.map_idx[key] = i;
}
OnListEnd();
return map;
}

// Deserialize an unordered set
template <typename T = void>
inline typename std::enable_if<is_unordered_set<T>::value, T>::type Read() {
Expand Down
19 changes: 19 additions & 0 deletions src/include/duckdb/common/serializer/serialization_traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "duckdb/common/unique_ptr.hpp"
#include "duckdb/common/optional_ptr.hpp"
#include "duckdb/common/optional_idx.hpp"
#include "duckdb/common/insertion_order_preserving_map.hpp"

namespace duckdb {

Expand Down Expand Up @@ -92,6 +93,14 @@ struct is_map<typename duckdb::map<Args...>> : std::true_type {
typedef typename std::tuple_element<3, std::tuple<Args...>>::type EQUAL_TYPE;
};

template <typename T>
struct is_insertion_preserving_map : std::false_type {};
template <typename... Args>
struct is_insertion_preserving_map<typename duckdb::InsertionOrderPreservingMap<Args...>> : std::true_type {
typedef typename std::tuple_element<0, std::tuple<Args...>>::type KEY_TYPE;
typedef typename std::tuple_element<1, std::tuple<Args...>>::type VALUE_TYPE;
};

template <typename T>
struct is_unique_ptr : std::false_type {};
template <typename T>
Expand Down Expand Up @@ -253,6 +262,16 @@ struct SerializationDefaultValue {
return value.empty();
}

template <typename T = void>
static inline typename std::enable_if<is_insertion_preserving_map<T>::value, T>::type GetDefault() {
return T();
}

template <typename T = void>
static inline bool IsDefault(const typename std::enable_if<is_insertion_preserving_map<T>::value, T>::type &value) {
return value.empty();
}

template <typename T = void>
static inline typename std::enable_if<std::is_same<T, string>::value, T>::type GetDefault() {
return T();
Expand Down
17 changes: 17 additions & 0 deletions src/include/duckdb/common/serializer/serializer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "duckdb/common/optional_idx.hpp"
#include "duckdb/common/value_operations/value_operations.hpp"
#include "duckdb/execution/operator/csv_scanner/csv_option.hpp"
#include "duckdb/common/insertion_order_preserving_map.hpp"

namespace duckdb {

Expand Down Expand Up @@ -259,6 +260,22 @@ class Serializer {
OnListEnd();
}

// Map
// serialized as a list of pairs
template <class K, class V>
void WriteValue(const duckdb::InsertionOrderPreservingMap<K, V> &map) {
auto count = map.size();
auto keys = map.Keys();
OnListBegin(count);
for (idx_t i = 0; i < count; i++) {
OnObjectBegin();
WriteProperty(0, "key", keys[i]);
WriteProperty(1, "value", map[i]);
OnObjectEnd();
}
OnListEnd();
}

// class or struct implementing `Serialize(Serializer& Serializer)`;
template <typename T>
typename std::enable_if<has_serialize<T>::value>::type WriteValue(const T &value) {
Expand Down
5 changes: 2 additions & 3 deletions src/include/duckdb/parser/query_node.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include "duckdb/parser/parsed_expression.hpp"
#include "duckdb/parser/result_modifier.hpp"
#include "duckdb/parser/common_table_expression_info.hpp"
#include "duckdb/common/case_insensitive_map.hpp"
#include "duckdb/common/insertion_order_preserving_map.hpp"
#include "duckdb/common/exception.hpp"

namespace duckdb {
Expand All @@ -34,8 +34,7 @@ class CommonTableExpressionMap {
public:
CommonTableExpressionMap();

vector<unique_ptr<CommonTableExpressionInfo>> map;
case_insensitive_map_t<idx_t> map_idx;
InsertionOrderPreservingMap<idx_t, unique_ptr<CommonTableExpressionInfo>> map;

public:
string ToString() const;
Expand Down
7 changes: 1 addition & 6 deletions src/include/duckdb/storage/serialization/nodes.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,7 @@
{
"id": 100,
"name": "map",
"type": "vector<CommonTableExpressionInfo*>"
},
{
"id": 101,
"name": "map_idx",
"type": "case_insensitive_map_t<idx_t>"
"type": "InsertionOrderPreservingMap<idx_t, CommonTableExpressionInfo*>"
}
],
"pointer_type": "none"
Expand Down
14 changes: 5 additions & 9 deletions src/parser/query_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ CommonTableExpressionMap::CommonTableExpressionMap() {
CommonTableExpressionMap CommonTableExpressionMap::Copy() const {
CommonTableExpressionMap res;
res.map.resize(this->map.size());
for (auto &kv_idx : this->map_idx) {
for (auto &kv_idx : this->map.map_idx) {
auto kv_info = make_uniq<CommonTableExpressionInfo>();
auto &kv = this->map.at(kv_idx.second);
for (auto &al : kv->aliases) {
Expand All @@ -25,7 +25,7 @@ CommonTableExpressionMap CommonTableExpressionMap::Copy() const {
kv_info->query = unique_ptr_cast<SQLStatement, SelectStatement>(kv->query->Copy());
kv_info->materialized = kv->materialized;
res.map[kv_idx.second] = std::move(kv_info);
res.map_idx[kv_idx.first] = kv_idx.second;
res.map.map_idx[kv_idx.first] = kv_idx.second;
}

return res;
Expand All @@ -49,11 +49,7 @@ string CommonTableExpressionMap::ToString() const {
}
bool first_cte = true;

vector<string> names;
names.resize(map.size());
for (auto &kv : map_idx) {
names[kv.second] = kv.first;
}
vector<string> names = map.Keys();

for (idx_t i = 0; i < map.size(); i++) {
auto &kv = map[i];
Expand Down Expand Up @@ -163,7 +159,7 @@ void QueryNode::CopyProperties(QueryNode &other) const {
other.modifiers.push_back(modifier->Copy());
}
other.cte_map.map.resize(cte_map.map.size());
for (auto &kv_idx : cte_map.map_idx) {
for (auto &kv_idx : cte_map.map.map_idx) {
auto &kv = cte_map.map[kv_idx.second];
auto kv_info = make_uniq<CommonTableExpressionInfo>();
for (auto &al : kv->aliases) {
Expand All @@ -172,7 +168,7 @@ void QueryNode::CopyProperties(QueryNode &other) const {
kv_info->query = unique_ptr_cast<SQLStatement, SelectStatement>(kv->query->Copy());
kv_info->materialized = kv->materialized;
other.cte_map.map[kv_idx.second] = std::move(kv_info);
other.cte_map.map_idx[kv_idx.first] = kv_idx.second;
other.cte_map.map.map_idx[kv_idx.first] = kv_idx.second;
}
}

Expand Down
14 changes: 7 additions & 7 deletions src/parser/transform/helpers/transform_cte.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ unique_ptr<CommonTableExpressionInfo> CommonTableExpressionInfo::Copy() {

void Transformer::ExtractCTEsRecursive(CommonTableExpressionMap &cte_map) {
for (auto &cte_entry : stored_cte_map) {
for (auto &entry : cte_entry->map_idx) {
auto found_entry = cte_map.map_idx.find(entry.first);
if (found_entry != cte_map.map_idx.end()) {
for (auto &entry : cte_entry->map.map_idx) {
auto found_entry = cte_map.map.map_idx.find(entry.first);
if (found_entry != cte_map.map.map_idx.end()) {
// entry already present - use top-most entry
continue;
}
cte_map.map_idx[entry.first] = cte_map.map.size();
cte_map.map.map_idx[entry.first] = cte_map.map.size();
cte_map.map.push_back(cte_entry->map[entry.second]->Copy());
}
}
Expand Down Expand Up @@ -77,8 +77,8 @@ void Transformer::TransformCTE(duckdb_libpgquery::PGWithClause &de_with_clause,
D_ASSERT(info->query);
auto cte_name = string(cte.ctename);

auto it = cte_map.map_idx.find(cte_name);
if (it != cte_map.map_idx.end()) {
auto it = cte_map.map.map_idx.find(cte_name);
if (it != cte_map.map.map_idx.end()) {
// can't have two CTEs with same name
throw ParserException("Duplicate CTE name \"%s\"", cte_name);
}
Expand All @@ -91,7 +91,7 @@ void Transformer::TransformCTE(duckdb_libpgquery::PGWithClause &de_with_clause,
info->materialized = CTEMaterialize::CTE_MATERIALIZE_ALWAYS;
}

cte_map.map_idx[cte_name] = cte_map.map.size();
cte_map.map.map_idx[cte_name] = cte_map.map.size();
cte_map.map.push_back(std::move(info));
}
}
Expand Down
6 changes: 1 addition & 5 deletions src/parser/transformer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,11 +225,7 @@ unique_ptr<SQLStatement> Transformer::TransformStatementInternal(duckdb_libpgque
unique_ptr<QueryNode> Transformer::TransformMaterializedCTE(unique_ptr<QueryNode> root) {
// Extract materialized CTEs from cte_map
vector<unique_ptr<CTENode>> materialized_ctes;
vector<string> names;
names.resize(root->cte_map.map_idx.size());
for (auto &kv : root->cte_map.map_idx) {
names[kv.second] = kv.first;
}
vector<string> names = root->cte_map.map.Keys();

for (idx_t i = 0; i < root->cte_map.map.size(); i++) {
auto &cte = root->cte_map.map[i];
Expand Down
4 changes: 2 additions & 2 deletions src/planner/binder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ unique_ptr<BoundCTENode> Binder::BindMaterializedCTE(CommonTableExpressionMap &c
vector<unique_ptr<CTENode>> materialized_ctes;
vector<string> names;
names.resize(cte_map.map.size());
for (auto &kv : cte_map.map_idx) {
for (auto &kv : cte_map.map.map_idx) {
names[kv.second] = kv.first;
}

Expand Down Expand Up @@ -201,7 +201,7 @@ BoundStatement Binder::Bind(SQLStatement &statement) {
}

void Binder::AddCTEMap(CommonTableExpressionMap &cte_map) {
for (auto &cte_it : cte_map.map_idx) {
for (auto &cte_it : cte_map.map.map_idx) {
AddCTE(cte_it.first, *cte_map.map[cte_it.second]);
}
}
Expand Down
6 changes: 2 additions & 4 deletions src/storage/serialization/serialize_nodes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -272,14 +272,12 @@ unique_ptr<CommonTableExpressionInfo> CommonTableExpressionInfo::Deserialize(Des
}

void CommonTableExpressionMap::Serialize(Serializer &serializer) const {
serializer.WritePropertyWithDefault<vector<unique_ptr<CommonTableExpressionInfo>>>(100, "map", map);
serializer.WritePropertyWithDefault<case_insensitive_map_t<idx_t>>(101, "map_idx", map_idx);
serializer.WritePropertyWithDefault<InsertionOrderPreservingMap<idx_t, unique_ptr<CommonTableExpressionInfo>>>(100, "map", map);
}

CommonTableExpressionMap CommonTableExpressionMap::Deserialize(Deserializer &deserializer) {
CommonTableExpressionMap result;
deserializer.ReadPropertyWithDefault<vector<unique_ptr<CommonTableExpressionInfo>>>(100, "map", result.map);
deserializer.ReadPropertyWithDefault<case_insensitive_map_t<idx_t>>(101, "map_idx", result.map_idx);
deserializer.ReadPropertyWithDefault<InsertionOrderPreservingMap<idx_t, unique_ptr<CommonTableExpressionInfo>>>(100, "map", result.map);
return result;
}

Expand Down

0 comments on commit 55f3a7e

Please sign in to comment.