From 1e08a2e7bfab06be625f2efde5159b78eba24bcd Mon Sep 17 00:00:00 2001
From: gibber9809 <devinbook1@gmail.com>
Date: Thu, 23 May 2024 18:49:44 +0000
Subject: [PATCH 01/11] Implement serialization for structurized arrays

---
 components/core/src/clp_s/JsonSerializer.hpp |  14 +-
 components/core/src/clp_s/SchemaReader.cpp   | 251 ++++++++++++++++++-
 components/core/src/clp_s/SchemaReader.hpp   |  24 ++
 3 files changed, 284 insertions(+), 5 deletions(-)

diff --git a/components/core/src/clp_s/JsonSerializer.hpp b/components/core/src/clp_s/JsonSerializer.hpp
index 4b7ada80d..b5fe866b5 100644
--- a/components/core/src/clp_s/JsonSerializer.hpp
+++ b/components/core/src/clp_s/JsonSerializer.hpp
@@ -23,7 +23,9 @@ class JsonSerializer {
         AddStringValue,
         AddNullValue,
         BeginArray,
-        EndArray
+        EndArray,
+        BeginDocument,
+        BeginArrayDocument,
     };
 
     static int64_t const cReservedLength = 4096;
@@ -76,19 +78,25 @@ class JsonSerializer {
     void end_document() { m_json_string[m_json_string.size() - 1] = '}'; }
 
     void end_object() {
-        if (m_op_list[m_op_list_index - 2] != BeginObject) {
+        if (m_op_list[m_op_list_index - 2] != BeginObject
+            && m_op_list[m_op_list_index - 2] != BeginDocument)
+        {
             m_json_string.pop_back();
         }
         m_json_string += "},";
     }
 
+    void begin_array_document() { m_json_string += "["; }
+
     void begin_array() {
         append_key();
         m_json_string += "[";
     }
 
     void end_array() {
-        if (m_op_list[m_op_list_index - 2] != BeginArray) {
+        if (m_op_list[m_op_list_index - 2] != BeginArray
+            && m_op_list[m_op_list_index - 2] != BeginArrayDocument)
+        {
             m_json_string.pop_back();
         }
         m_json_string += "],";
diff --git a/components/core/src/clp_s/SchemaReader.cpp b/components/core/src/clp_s/SchemaReader.cpp
index dfe3b1934..5ca6674d8 100644
--- a/components/core/src/clp_s/SchemaReader.cpp
+++ b/components/core/src/clp_s/SchemaReader.cpp
@@ -72,6 +72,10 @@ void SchemaReader::generate_json_string() {
                 m_json_serializer.end_object();
                 break;
             }
+            case JsonSerializer::Op::BeginDocument: {
+                m_json_serializer.begin_document();
+                break;
+            }
             case JsonSerializer::Op::BeginArray: {
                 m_json_serializer.begin_array();
                 break;
@@ -80,6 +84,10 @@ void SchemaReader::generate_json_string() {
                 m_json_serializer.end_array();
                 break;
             }
+            case JsonSerializer::Op::BeginArrayDocument: {
+                m_json_serializer.begin_array_document();
+                break;
+            }
             case JsonSerializer::Op::AddIntField: {
                 column = m_reordered_columns[column_id_index++];
                 auto const& name = m_global_schema_tree->get_node(column->get_id()).get_key_name();
@@ -294,6 +302,236 @@ int32_t SchemaReader::get_first_column_in_span(std::span<int32_t> schema) {
     return -1;
 }
 
+void SchemaReader::find_intersection_and_fix_brackets(
+        int32_t cur_root,
+        int32_t next_root,
+        std::vector<int32_t>& path_to_intersection
+) {
+    auto const* cur_node = &m_global_schema_tree->get_node(cur_root);
+    auto const* next_node = &m_global_schema_tree->get_node(next_root);
+    while (cur_node->get_parent_id() != next_node->get_parent_id()) {
+        if (cur_node->get_depth() > next_node->get_depth()) {
+            cur_root = cur_node->get_parent_id();
+            cur_node = &m_global_schema_tree->get_node(cur_root);
+            m_json_serializer.add_op(JsonSerializer::Op::EndObject);
+        } else if (cur_node->get_depth() < next_node->get_depth()) {
+            path_to_intersection.push_back(next_root);
+            next_root = next_node->get_parent_id();
+            next_node = &m_global_schema_tree->get_node(next_root);
+        } else {
+            cur_root = cur_node->get_parent_id();
+            cur_node = &m_global_schema_tree->get_node(cur_root);
+            m_json_serializer.add_op(JsonSerializer::Op::EndObject);
+            path_to_intersection.push_back(next_root);
+            next_root = next_node->get_parent_id();
+            next_node = &m_global_schema_tree->get_node(next_root);
+        }
+    }
+
+    for (auto it = path_to_intersection.rbegin(); it != path_to_intersection.rend(); ++it) {
+        auto const& node = m_global_schema_tree->get_node(*it);
+        bool no_name = true;
+        if (false == node.get_key_name().empty()) {
+            m_json_serializer.add_special_key(node.get_key_name());
+            no_name = false;
+        }
+        if (NodeType::Object == node.get_type()) {
+            m_json_serializer.add_op(
+                    no_name ? JsonSerializer::Op::BeginDocument : JsonSerializer::Op::BeginObject
+            );
+        } else if (NodeType::StructuredArray == node.get_type()) {
+            m_json_serializer.add_op(
+                    no_name ? JsonSerializer::Op::BeginArrayDocument
+                            : JsonSerializer::Op::BeginArray
+            );
+        }
+    }
+    path_to_intersection.clear();
+}
+
+size_t SchemaReader::generate_structured_array_template(
+        int32_t array_root,
+        size_t column_start,
+        std::span<int32_t> schema
+) {
+    size_t column_idx = column_start;
+    std::vector<int32_t> path_to_intersection;
+    int32_t depth = m_global_schema_tree->get_node(array_root).get_depth();
+
+    for (size_t i = 0; i < schema.size(); ++i) {
+        int32_t global_column_id = schema[i];
+        if (Schema::schema_entry_is_unordered_object(global_column_id)) {
+            auto type = Schema::get_unordered_object_type(global_column_id);
+            size_t length = Schema::get_unordered_object_length(global_column_id);
+            auto sub_object_schema = schema.subspan(i + 1, length);
+            if (NodeType::StructuredArray == type) {
+                int32_t sub_array_root
+                        = m_global_schema_tree->find_matching_subtree_root_in_subtree(
+                                array_root,
+                                get_first_column_in_span(sub_object_schema),
+                                NodeType::StructuredArray
+                        );
+                m_json_serializer.add_op(JsonSerializer::Op::BeginArrayDocument);
+                column_idx = generate_structured_array_template(
+                        sub_array_root,
+                        column_idx,
+                        sub_object_schema
+                );
+                m_json_serializer.add_op(JsonSerializer::Op::EndArray);
+            } else if (NodeType::Object == type) {
+                int32_t object_root = m_global_schema_tree->find_matching_subtree_root_in_subtree(
+                        array_root,
+                        get_first_column_in_span(sub_object_schema),
+                        NodeType::Object
+                );
+                m_json_serializer.add_op(JsonSerializer::Op::BeginDocument);
+                column_idx = generate_structured_object_template(
+                        object_root,
+                        column_idx,
+                        sub_object_schema
+                );
+                m_json_serializer.add_op(JsonSerializer::Op::EndObject);
+            }
+            i += length;
+        } else {
+            auto const& node = m_global_schema_tree->get_node(global_column_id);
+            switch (node.get_type()) {
+                case NodeType::Object: {
+                    find_intersection_and_fix_brackets(
+                            array_root,
+                            node.get_id(),
+                            path_to_intersection
+                    );
+                    for (int j = 0; j < (node.get_depth() - depth); ++j) {
+                        m_json_serializer.add_op(JsonSerializer::Op::EndObject);
+                    }
+                    break;
+                }
+                case NodeType::StructuredArray: {
+                    m_json_serializer.add_op(JsonSerializer::Op::BeginArrayDocument);
+                    m_json_serializer.add_op(JsonSerializer::Op::EndArray);
+                    break;
+                }
+                case NodeType::Integer: {
+                    m_json_serializer.add_op(JsonSerializer::Op::AddIntValue);
+                    m_reordered_columns.push_back(m_columns[column_idx++]);
+                    break;
+                }
+                case NodeType::Float: {
+                    m_json_serializer.add_op(JsonSerializer::Op::AddFloatValue);
+                    m_reordered_columns.push_back(m_columns[column_idx++]);
+                    break;
+                }
+                case NodeType::Boolean: {
+                    m_json_serializer.add_op(JsonSerializer::Op::AddBoolValue);
+                    m_reordered_columns.push_back(m_columns[column_idx++]);
+                    break;
+                }
+                case NodeType::ClpString:
+                case NodeType::VarString: {
+                    m_json_serializer.add_op(JsonSerializer::Op::AddStringValue);
+                    m_reordered_columns.push_back(m_columns[column_idx++]);
+                    break;
+                }
+                case NodeType::NullValue: {
+                    m_json_serializer.add_op(JsonSerializer::Op::AddNullValue);
+                    break;
+                }
+                case NodeType::DateString:
+                case NodeType::UnstructuredArray:
+                case NodeType::Unknown:
+                    break;
+            }
+        }
+    }
+    return column_idx;
+}
+
+size_t SchemaReader::generate_structured_object_template(
+        int32_t object_root,
+        size_t column_start,
+        std::span<int32_t> schema
+) {
+    int32_t root = object_root;
+    size_t column_idx = column_start;
+    std::vector<int32_t> path_to_intersection;
+
+    for (size_t i = 0; i < schema.size(); ++i) {
+        int32_t global_column_id = schema[i];
+        if (Schema::schema_entry_is_unordered_object(global_column_id)) {
+            // It should only be possible to encounter arrays inside of structured objects
+            size_t array_length = Schema::get_unordered_object_length(global_column_id);
+            auto array_schema = schema.subspan(i + 1, array_length);
+            // we can guarantee that the last array we hit on the path to object root must be the
+            // right one because otherwise we'd be inside the structured array generator
+            int32_t array_root = m_global_schema_tree->find_matching_subtree_root_in_subtree(
+                    object_root,
+                    get_first_column_in_span(array_schema),
+                    NodeType::StructuredArray
+            );
+
+            find_intersection_and_fix_brackets(root, array_root, path_to_intersection);
+            column_idx = generate_structured_array_template(array_root, column_idx, array_schema);
+            m_json_serializer.add_op(JsonSerializer::Op::EndArray);
+            i += array_length;
+            // root is parent of the array object since we close the array bracket above
+            auto const& node = m_global_schema_tree->get_node(array_root);
+            root = node.get_parent_id();
+        } else {
+            auto const& node = m_global_schema_tree->get_node(global_column_id);
+            int32_t next_root = node.get_parent_id();
+            find_intersection_and_fix_brackets(root, next_root, path_to_intersection);
+            root = next_root;
+            switch (node.get_type()) {
+                case NodeType::Object: {
+                    m_json_serializer.add_op(JsonSerializer::Op::BeginObject);
+                    m_json_serializer.add_special_key(node.get_key_name());
+                    m_json_serializer.add_op(JsonSerializer::Op::EndObject);
+                    break;
+                }
+                case NodeType::StructuredArray: {
+                    m_json_serializer.add_op(JsonSerializer::Op::BeginArray);
+                    m_json_serializer.add_special_key(node.get_key_name());
+                    m_json_serializer.add_op(JsonSerializer::Op::EndArray);
+                    break;
+                }
+                case NodeType::Integer: {
+                    m_json_serializer.add_op(JsonSerializer::Op::AddIntField);
+                    m_reordered_columns.push_back(m_columns[column_idx++]);
+                    break;
+                }
+                case NodeType::Float: {
+                    m_json_serializer.add_op(JsonSerializer::Op::AddFloatField);
+                    m_reordered_columns.push_back(m_columns[column_idx++]);
+                    break;
+                }
+                case NodeType::Boolean: {
+                    m_json_serializer.add_op(JsonSerializer::Op::AddBoolField);
+                    m_reordered_columns.push_back(m_columns[column_idx++]);
+                    break;
+                }
+                case NodeType::ClpString:
+                case NodeType::VarString: {
+                    m_json_serializer.add_op(JsonSerializer::Op::AddStringField);
+                    m_reordered_columns.push_back(m_columns[column_idx++]);
+                    break;
+                }
+                case NodeType::NullValue: {
+                    m_json_serializer.add_op(JsonSerializer::Op::AddNullField);
+                    m_json_serializer.add_special_key(node.get_key_name());
+                    break;
+                }
+                case NodeType::DateString:
+                case NodeType::UnstructuredArray:
+                case NodeType::Unknown:
+                    break;
+            }
+        }
+    }
+    find_intersection_and_fix_brackets(root, object_root, path_to_intersection);
+    return column_idx;
+}
+
 void SchemaReader::initialize_serializer() {
     if (m_serializer_initialized) {
         return;
@@ -339,10 +577,19 @@ void SchemaReader::generate_json_template(int32_t id) {
                 break;
             }
             case NodeType::StructuredArray: {
-                // Note: Marshalling structured arrays is left intentionally stubbed out so that we
-                // can split up the PR for supporting structurized arrays.
                 m_json_serializer.add_op(JsonSerializer::Op::BeginArray);
                 m_json_serializer.add_special_key(key);
+                int32_t global_child_id = m_local_id_to_global_id[child_id];
+                auto structured_it = m_global_id_to_unordered_object.find(global_child_id);
+                if (m_global_id_to_unordered_object.end() != structured_it) {
+                    size_t column_start = structured_it->second.first;
+                    std::span<int32_t> structured_schema = structured_it->second.second;
+                    generate_structured_array_template(
+                            global_child_id,
+                            column_start,
+                            structured_schema
+                    );
+                }
                 m_json_serializer.add_op(JsonSerializer::Op::EndArray);
                 break;
             }
diff --git a/components/core/src/clp_s/SchemaReader.hpp b/components/core/src/clp_s/SchemaReader.hpp
index 6ea5f57df..1e5e6a349 100644
--- a/components/core/src/clp_s/SchemaReader.hpp
+++ b/components/core/src/clp_s/SchemaReader.hpp
@@ -193,6 +193,24 @@ class SchemaReader {
      */
     void generate_json_template(int32_t id);
 
+    /**
+     * Generates a json template for a structured array
+     * @param id
+     * @param column_start
+     * @param schema
+     */
+    size_t
+    generate_structured_array_template(int32_t id, size_t column_start, std::span<int32_t> schema);
+
+    /**
+     * Generates a json template for a structured object
+     * @param id
+     * @param column_start
+     * @param schema
+     */
+    size_t
+    generate_structured_object_template(int32_t id, size_t column_start, std::span<int32_t> schema);
+
     /**
      * @param schema
      * @return the first column ID found in the given schema, or -1 if the schema contains no
@@ -200,6 +218,12 @@ class SchemaReader {
      */
     static inline int32_t get_first_column_in_span(std::span<int32_t> schema);
 
+    void find_intersection_and_fix_brackets(
+            int32_t cur_root,
+            int32_t next_root,
+            std::vector<int32_t>& path_to_intersection
+    );
+
     /**
      * Generates a json string from the extracted values
      */

From a906592f0f1ae1ff47a02f03c20b69f87c0420be Mon Sep 17 00:00:00 2001
From: gibber9809 <devinbook1@gmail.com>
Date: Fri, 24 May 2024 20:06:48 +0000
Subject: [PATCH 02/11] Rename some JsonSerializer ops

---
 components/core/src/clp_s/JsonSerializer.hpp |  8 ++++----
 components/core/src/clp_s/SchemaReader.cpp   | 16 ++++++++--------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/components/core/src/clp_s/JsonSerializer.hpp b/components/core/src/clp_s/JsonSerializer.hpp
index b5fe866b5..01a8a1e74 100644
--- a/components/core/src/clp_s/JsonSerializer.hpp
+++ b/components/core/src/clp_s/JsonSerializer.hpp
@@ -24,8 +24,8 @@ class JsonSerializer {
         AddNullValue,
         BeginArray,
         EndArray,
-        BeginDocument,
-        BeginArrayDocument,
+        BeginUnnamedObject,
+        BeginUnnamedArray,
     };
 
     static int64_t const cReservedLength = 4096;
@@ -79,7 +79,7 @@ class JsonSerializer {
 
     void end_object() {
         if (m_op_list[m_op_list_index - 2] != BeginObject
-            && m_op_list[m_op_list_index - 2] != BeginDocument)
+            && m_op_list[m_op_list_index - 2] != BeginUnnamedObject)
         {
             m_json_string.pop_back();
         }
@@ -95,7 +95,7 @@ class JsonSerializer {
 
     void end_array() {
         if (m_op_list[m_op_list_index - 2] != BeginArray
-            && m_op_list[m_op_list_index - 2] != BeginArrayDocument)
+            && m_op_list[m_op_list_index - 2] != BeginUnnamedArray)
         {
             m_json_string.pop_back();
         }
diff --git a/components/core/src/clp_s/SchemaReader.cpp b/components/core/src/clp_s/SchemaReader.cpp
index 5ca6674d8..2b63b0ee7 100644
--- a/components/core/src/clp_s/SchemaReader.cpp
+++ b/components/core/src/clp_s/SchemaReader.cpp
@@ -72,7 +72,7 @@ void SchemaReader::generate_json_string() {
                 m_json_serializer.end_object();
                 break;
             }
-            case JsonSerializer::Op::BeginDocument: {
+            case JsonSerializer::Op::BeginUnnamedObject: {
                 m_json_serializer.begin_document();
                 break;
             }
@@ -84,7 +84,7 @@ void SchemaReader::generate_json_string() {
                 m_json_serializer.end_array();
                 break;
             }
-            case JsonSerializer::Op::BeginArrayDocument: {
+            case JsonSerializer::Op::BeginUnnamedArray: {
                 m_json_serializer.begin_array_document();
                 break;
             }
@@ -337,12 +337,12 @@ void SchemaReader::find_intersection_and_fix_brackets(
         }
         if (NodeType::Object == node.get_type()) {
             m_json_serializer.add_op(
-                    no_name ? JsonSerializer::Op::BeginDocument : JsonSerializer::Op::BeginObject
+                    no_name ? JsonSerializer::Op::BeginUnnamedObject
+                            : JsonSerializer::Op::BeginObject
             );
         } else if (NodeType::StructuredArray == node.get_type()) {
             m_json_serializer.add_op(
-                    no_name ? JsonSerializer::Op::BeginArrayDocument
-                            : JsonSerializer::Op::BeginArray
+                    no_name ? JsonSerializer::Op::BeginUnnamedArray : JsonSerializer::Op::BeginArray
             );
         }
     }
@@ -371,7 +371,7 @@ size_t SchemaReader::generate_structured_array_template(
                                 get_first_column_in_span(sub_object_schema),
                                 NodeType::StructuredArray
                         );
-                m_json_serializer.add_op(JsonSerializer::Op::BeginArrayDocument);
+                m_json_serializer.add_op(JsonSerializer::Op::BeginUnnamedArray);
                 column_idx = generate_structured_array_template(
                         sub_array_root,
                         column_idx,
@@ -384,7 +384,7 @@ size_t SchemaReader::generate_structured_array_template(
                         get_first_column_in_span(sub_object_schema),
                         NodeType::Object
                 );
-                m_json_serializer.add_op(JsonSerializer::Op::BeginDocument);
+                m_json_serializer.add_op(JsonSerializer::Op::BeginUnnamedObject);
                 column_idx = generate_structured_object_template(
                         object_root,
                         column_idx,
@@ -408,7 +408,7 @@ size_t SchemaReader::generate_structured_array_template(
                     break;
                 }
                 case NodeType::StructuredArray: {
-                    m_json_serializer.add_op(JsonSerializer::Op::BeginArrayDocument);
+                    m_json_serializer.add_op(JsonSerializer::Op::BeginUnnamedArray);
                     m_json_serializer.add_op(JsonSerializer::Op::EndArray);
                     break;
                 }

From 9d14226031e37edb072d83344fe1c4de66c6909f Mon Sep 17 00:00:00 2001
From: gibber9809 <devinbook1@gmail.com>
Date: Wed, 29 May 2024 17:24:53 +0000
Subject: [PATCH 03/11] Fix a bug where empty structured arrays do not get
 marshalled

---
 components/core/src/clp_s/ArchiveReader.cpp | 33 +++++++++++++--------
 components/core/src/clp_s/ArchiveReader.hpp |  4 +--
 components/core/src/clp_s/SchemaReader.hpp  | 14 ++++-----
 3 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/components/core/src/clp_s/ArchiveReader.cpp b/components/core/src/clp_s/ArchiveReader.cpp
index 084593639..cc3734a8a 100644
--- a/components/core/src/clp_s/ArchiveReader.cpp
+++ b/components/core/src/clp_s/ArchiveReader.cpp
@@ -143,11 +143,10 @@ BaseColumnReader* ArchiveReader::append_reader_column(SchemaReader& reader, int3
 
 void ArchiveReader::append_unordered_reader_columns(
         SchemaReader& reader,
-        NodeType unordered_object_type,
+        int32_t mst_subtree_root_node_id,
         std::span<int32_t> schema_ids,
         bool should_marshal_records
 ) {
-    int32_t mst_subtree_root_node_id = INT32_MAX;
     size_t object_begin_pos = reader.get_column_size();
     for (int32_t column_id : schema_ids) {
         if (Schema::schema_entry_is_unordered_object(column_id)) {
@@ -155,13 +154,6 @@ void ArchiveReader::append_unordered_reader_columns(
         }
         BaseColumnReader* column_reader = nullptr;
         auto const& node = m_schema_tree->get_node(column_id);
-        if (INT32_MAX == mst_subtree_root_node_id) {
-            mst_subtree_root_node_id = m_schema_tree->find_matching_subtree_root_in_subtree(
-                    -1,
-                    column_id,
-                    unordered_object_type
-            );
-        }
         switch (node.get_type()) {
             case NodeType::Integer:
                 column_reader = new Int64ColumnReader(column_id);
@@ -214,19 +206,36 @@ SchemaReader& ArchiveReader::create_schema_reader(
             should_marshal_records
     );
     auto timestamp_column_ids = m_timestamp_dict->get_authoritative_timestamp_column_ids();
-
     for (size_t i = 0; i < schema.size(); ++i) {
         int32_t column_id = schema[i];
         if (Schema::schema_entry_is_unordered_object(column_id)) {
             size_t length = Schema::get_unordered_object_length(column_id);
+
+            auto sub_schema = schema.get_view(i + 1, length);
+            auto mst_subtree_root_node_id = m_schema_tree->find_matching_subtree_root_in_subtree(
+                    -1,
+                    SchemaReader::get_first_column_in_span(sub_schema),
+                    Schema::get_unordered_object_type(column_id)
+            );
             append_unordered_reader_columns(
                     m_schema_reader,
-                    Schema::get_unordered_object_type(column_id),
-                    schema.get_view(i + 1, length),
+                    mst_subtree_root_node_id,
+                    sub_schema,
                     should_marshal_records
             );
             i += length;
             continue;
+        } else if (i >= schema.get_num_ordered()) {
+            // Length one unordered object that doesn't have a tag. This is only allowed when the
+            // column id is the root of the unordered object, so we can pass it directly to
+            // append_unordered_reader_columns.
+            append_unordered_reader_columns(
+                    m_schema_reader,
+                    column_id,
+                    schema.get_view(i, 0),
+                    should_marshal_records
+            );
+            continue;
         }
         BaseColumnReader* column_reader = append_reader_column(m_schema_reader, column_id);
 
diff --git a/components/core/src/clp_s/ArchiveReader.hpp b/components/core/src/clp_s/ArchiveReader.hpp
index 6ce881e91..54eb42698 100644
--- a/components/core/src/clp_s/ArchiveReader.hpp
+++ b/components/core/src/clp_s/ArchiveReader.hpp
@@ -149,13 +149,13 @@ class ArchiveReader {
     /**
      * Appends columns for the entire schema of an unordered object.
      * @param reader
-     * @param unordered_object_type
+     * @param mst_subtree_root_node_id
      * @param schema_ids
      * @param should_marshal_records
      */
     void append_unordered_reader_columns(
             SchemaReader& reader,
-            NodeType unordered_object_type,
+            int32_t mst_subtree_root_node_id,
             std::span<int32_t> schema_ids,
             bool should_marshal_records
     );
diff --git a/components/core/src/clp_s/SchemaReader.hpp b/components/core/src/clp_s/SchemaReader.hpp
index 1e5e6a349..1df5cbf1b 100644
--- a/components/core/src/clp_s/SchemaReader.hpp
+++ b/components/core/src/clp_s/SchemaReader.hpp
@@ -178,6 +178,13 @@ class SchemaReader {
 
     int32_t get_schema_id() const { return m_schema_id; }
 
+    /**
+     * @param schema
+     * @return the first column ID found in the given schema, or -1 if the schema contains no
+     * columns
+     */
+    static int32_t get_first_column_in_span(std::span<int32_t> schema);
+
 private:
     /**
      * Merges the current local schema tree with the section of the global schema tree corresponding
@@ -211,13 +218,6 @@ class SchemaReader {
     size_t
     generate_structured_object_template(int32_t id, size_t column_start, std::span<int32_t> schema);
 
-    /**
-     * @param schema
-     * @return the first column ID found in the given schema, or -1 if the schema contains no
-     * columns
-     */
-    static inline int32_t get_first_column_in_span(std::span<int32_t> schema);
-
     void find_intersection_and_fix_brackets(
             int32_t cur_root,
             int32_t next_root,

From a9aff7ca08bcdfdec3ebf00ed4a09f4c26d9e881 Mon Sep 17 00:00:00 2001
From: gibber9809 <devinbook1@gmail.com>
Date: Wed, 29 May 2024 17:48:54 +0000
Subject: [PATCH 04/11] Fix bug where find_intersection_and_fix_brackets can
 sometimes miss one level of bracket fixing

---
 components/core/src/clp_s/SchemaReader.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/components/core/src/clp_s/SchemaReader.cpp b/components/core/src/clp_s/SchemaReader.cpp
index 2b63b0ee7..e6f423b9f 100644
--- a/components/core/src/clp_s/SchemaReader.cpp
+++ b/components/core/src/clp_s/SchemaReader.cpp
@@ -328,6 +328,14 @@ void SchemaReader::find_intersection_and_fix_brackets(
         }
     }
 
+    // The loop above ends when the parent of next node and cur node matches. When these two nodes
+    // have the same parent but are different nodes we need to close the last bracket for the
+    // previous node, and add the first key for next node.
+    if (cur_node != next_node) {
+        m_json_serializer.add_op(JsonSerializer::Op::EndObject);
+        path_to_intersection.push_back(next_node->get_id());
+    }
+
     for (auto it = path_to_intersection.rbegin(); it != path_to_intersection.rend(); ++it) {
         auto const& node = m_global_schema_tree->get_node(*it);
         bool no_name = true;

From 1fe417f7aa1cd8999539568d9c9440ec755f1789 Mon Sep 17 00:00:00 2001
From: gibber9809 <devinbook1@gmail.com>
Date: Wed, 29 May 2024 18:32:29 +0000
Subject: [PATCH 05/11] Improve comments in SchemaReader.hpp

---
 components/core/src/clp_s/SchemaReader.hpp | 29 ++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/components/core/src/clp_s/SchemaReader.hpp b/components/core/src/clp_s/SchemaReader.hpp
index 1df5cbf1b..7c96ac57e 100644
--- a/components/core/src/clp_s/SchemaReader.hpp
+++ b/components/core/src/clp_s/SchemaReader.hpp
@@ -203,8 +203,9 @@ class SchemaReader {
     /**
      * Generates a json template for a structured array
      * @param id
-     * @param column_start
+     * @param column_start the index of the first reader in m_columns belonging to this array
      * @param schema
+     * @return the index of the next reader in m_columns after those consumed by this array
      */
     size_t
     generate_structured_array_template(int32_t id, size_t column_start, std::span<int32_t> schema);
@@ -212,12 +213,36 @@ class SchemaReader {
     /**
      * Generates a json template for a structured object
      * @param id
-     * @param column_start
+     * @param column_start the index of the first reader in m_columns belonging to this object
      * @param schema
+     * @return the index of the next reader in m_columns after those consumed by this object
      */
     size_t
     generate_structured_object_template(int32_t id, size_t column_start, std::span<int32_t> schema);
 
+    /**
+     * Finds the common root of the subtree containing cur_root and next_root, and adds brackets
+     * and keys to m_json_serializer as necessary so that the json object is correct between the
+     * previous field which is a child of cur_root, and the next field which is a child of
+     * next_root.
+     *
+     * For example for the object {"a": {"b":"c"}, "d": {"e":{"f":"g"}} after appending "b" cur_root
+     * would be "a", and next_root would be "e". (since it is the parent of the next field "f").
+     * The current state of the object would look like {"a":{"b":"c" -- to prepare for "f" we would
+     * add },"d":{"e":{ or in other words close one bracket, add "d" and open bracket, add "e" and
+     * open bracket. After adding field "f" the current root is "e", and the next root is the
+     * original object which is the parent of "a" so we add }}.
+     *
+     * This works by tracing the path between both cur_root and next_root to their nearest common
+     * ancestor. For every step cur_root takes towards this common ancestor we must close a bracket,
+     * and for every step on the path from next_root a key must be added and a bracket must be
+     * opened. The parameter `path_to_intersection` is used as a buffer to store the path from
+     * next_root to this intersection so that the keys can be added to m_json_serializer in the
+     * correct order.
+     * @param cur_root
+     * @param next_root
+     * @param path_to_intersection
+     */
     void find_intersection_and_fix_brackets(
             int32_t cur_root,
             int32_t next_root,

From da90ddb89c75cdc264d73a8937459728c90177ee Mon Sep 17 00:00:00 2001
From: gibber9809 <devinbook1@gmail.com>
Date: Wed, 29 May 2024 18:37:17 +0000
Subject: [PATCH 06/11] Address review comment

---
 components/core/src/clp_s/SchemaReader.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/components/core/src/clp_s/SchemaReader.cpp b/components/core/src/clp_s/SchemaReader.cpp
index e6f423b9f..ae3ad1937 100644
--- a/components/core/src/clp_s/SchemaReader.cpp
+++ b/components/core/src/clp_s/SchemaReader.cpp
@@ -587,13 +587,12 @@ void SchemaReader::generate_json_template(int32_t id) {
             case NodeType::StructuredArray: {
                 m_json_serializer.add_op(JsonSerializer::Op::BeginArray);
                 m_json_serializer.add_special_key(key);
-                int32_t global_child_id = m_local_id_to_global_id[child_id];
-                auto structured_it = m_global_id_to_unordered_object.find(global_child_id);
+                auto structured_it = m_global_id_to_unordered_object.find(child_global_id);
                 if (m_global_id_to_unordered_object.end() != structured_it) {
                     size_t column_start = structured_it->second.first;
                     std::span<int32_t> structured_schema = structured_it->second.second;
                     generate_structured_array_template(
-                            global_child_id,
+                            child_global_id,
                             column_start,
                             structured_schema
                     );

From 249f7f64f308c524e2de0ff5e666c6c24cb71a55 Mon Sep 17 00:00:00 2001
From: gibber9809 <devinbook1@gmail.com>
Date: Wed, 29 May 2024 19:01:13 +0000
Subject: [PATCH 07/11] Update comment

---
 components/core/src/clp_s/SchemaReader.hpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/components/core/src/clp_s/SchemaReader.hpp b/components/core/src/clp_s/SchemaReader.hpp
index 7c96ac57e..8597316a6 100644
--- a/components/core/src/clp_s/SchemaReader.hpp
+++ b/components/core/src/clp_s/SchemaReader.hpp
@@ -226,11 +226,11 @@ class SchemaReader {
      * previous field which is a child of cur_root, and the next field which is a child of
      * next_root.
      *
-     * For example for the object {"a": {"b":"c"}, "d": {"e":{"f":"g"}} after appending "b" cur_root
-     * would be "a", and next_root would be "e". (since it is the parent of the next field "f").
-     * The current state of the object would look like {"a":{"b":"c" -- to prepare for "f" we would
-     * add },"d":{"e":{ or in other words close one bracket, add "d" and open bracket, add "e" and
-     * open bracket. After adding field "f" the current root is "e", and the next root is the
+     * For example for the object {"a": {"b": "c"}, "d": {"e": {"f": "g"}}} after appending "b"
+     * cur_root would be "a", and next_root would be "e". (since it is the parent of the next field
+     * "f"). The current state of the object would look like "a":{"b":"c" -- to prepare for "f" we
+     * would add },"d":{"e":{ or in other words close one bracket, add "d" and open bracket, add "e"
+     * and open bracket. After adding field "f" the current root is "e", and the next root is the
      * original object which is the parent of "a" so we add }}.
      *
      * This works by tracing the path between both cur_root and next_root to their nearest common

From cf4227029b2e1fcc63bfa8b920fb0bcd777e6d00 Mon Sep 17 00:00:00 2001
From: Devin Gibson <gibber9809@users.noreply.github.com>
Date: Mon, 3 Jun 2024 14:32:19 -0400
Subject: [PATCH 08/11] Update components/core/src/clp_s/SchemaReader.cpp

Co-authored-by: wraymo <37269683+wraymo@users.noreply.github.com>
---
 components/core/src/clp_s/SchemaReader.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/components/core/src/clp_s/SchemaReader.cpp b/components/core/src/clp_s/SchemaReader.cpp
index ae3ad1937..03edebf69 100644
--- a/components/core/src/clp_s/SchemaReader.cpp
+++ b/components/core/src/clp_s/SchemaReader.cpp
@@ -407,7 +407,7 @@ size_t SchemaReader::generate_structured_array_template(
                 case NodeType::Object: {
                     find_intersection_and_fix_brackets(
                             array_root,
-                            node.get_id(),
+                            global_column_id,
                             path_to_intersection
                     );
                     for (int j = 0; j < (node.get_depth() - depth); ++j) {

From e8235bbc59588ef1541c47b8de1265f164a9c1fb Mon Sep 17 00:00:00 2001
From: gibber9809 <devinbook1@gmail.com>
Date: Thu, 6 Jun 2024 16:30:28 +0000
Subject: [PATCH 09/11] Address review comment

---
 components/core/src/clp_s/ArchiveReader.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/components/core/src/clp_s/ArchiveReader.cpp b/components/core/src/clp_s/ArchiveReader.cpp
index cc3734a8a..6219f4b42 100644
--- a/components/core/src/clp_s/ArchiveReader.cpp
+++ b/components/core/src/clp_s/ArchiveReader.cpp
@@ -232,7 +232,7 @@ SchemaReader& ArchiveReader::create_schema_reader(
             append_unordered_reader_columns(
                     m_schema_reader,
                     column_id,
-                    schema.get_view(i, 0),
+                    std::span<int32_t>(),
                     should_marshal_records
             );
             continue;

From 2e0b6dbbf4b6b1eed3195555f0b4668a1ff7955d Mon Sep 17 00:00:00 2001
From: Devin Gibson <gibber9809@users.noreply.github.com>
Date: Thu, 6 Jun 2024 14:42:57 -0400
Subject: [PATCH 10/11] Update components/core/src/clp_s/ArchiveReader.cpp

Co-authored-by: wraymo <37269683+wraymo@users.noreply.github.com>
---
 components/core/src/clp_s/ArchiveReader.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/components/core/src/clp_s/ArchiveReader.cpp b/components/core/src/clp_s/ArchiveReader.cpp
index 6219f4b42..29e17080a 100644
--- a/components/core/src/clp_s/ArchiveReader.cpp
+++ b/components/core/src/clp_s/ArchiveReader.cpp
@@ -225,7 +225,8 @@ SchemaReader& ArchiveReader::create_schema_reader(
             );
             i += length;
             continue;
-        } else if (i >= schema.get_num_ordered()) {
+        } 
+        if (i >= schema.get_num_ordered()) {
             // Length one unordered object that doesn't have a tag. This is only allowed when the
             // column id is the root of the unordered object, so we can pass it directly to
             // append_unordered_reader_columns.

From e8d82188268a5866cf488e025f5b45ab33b44dc7 Mon Sep 17 00:00:00 2001
From: gibber9809 <devinbook1@gmail.com>
Date: Thu, 6 Jun 2024 21:12:38 +0000
Subject: [PATCH 11/11] Fix lint

---
 components/core/src/clp_s/ArchiveReader.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/components/core/src/clp_s/ArchiveReader.cpp b/components/core/src/clp_s/ArchiveReader.cpp
index 29e17080a..93f905e3b 100644
--- a/components/core/src/clp_s/ArchiveReader.cpp
+++ b/components/core/src/clp_s/ArchiveReader.cpp
@@ -225,7 +225,7 @@ SchemaReader& ArchiveReader::create_schema_reader(
             );
             i += length;
             continue;
-        } 
+        }
         if (i >= schema.get_num_ordered()) {
             // Length one unordered object that doesn't have a tag. This is only allowed when the
             // column id is the root of the unordered object, so we can pass it directly to