diff --git a/Makefile b/Makefile index 11154f6cf9b..4ec2b959928 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,6 @@ -SUBDIRS = src test -test_DEPS = src +SUBDIRS = src test convert +convert_DEPS = src +test_DEPS = src convert PASSIVE_SUBDIRS = doc/ref_cpp/examples doc_ref_cpp_examples_DEPS = src diff --git a/convert/.gitignore b/convert/.gitignore new file mode 100644 index 00000000000..7054821fdba --- /dev/null +++ b/convert/.gitignore @@ -0,0 +1,13 @@ +/*.d +/*.o + +# Coverage data +/*.gcno +/*.gcda + +/tightdb-convert +/tightdb-convert-noinst +/tightdb-convert-dbg +/tightdb-convert-dbg-noinst +/tightdb-convert-cov +/tightdb-convert-cov-noinst diff --git a/convert/Makefile b/convert/Makefile new file mode 100644 index 00000000000..2b34a9cc0bb --- /dev/null +++ b/convert/Makefile @@ -0,0 +1,6 @@ +bin_PROGRAMS = tightdb-convert + +tightdb_convert_SOURCES = main.cpp +tightdb_convert_LIBS = ../src/tightdb/libtightdb.a + +include ../src/generic.mk diff --git a/convert/main.cpp b/convert/main.cpp new file mode 100644 index 00000000000..151f6aec920 --- /dev/null +++ b/convert/main.cpp @@ -0,0 +1,698 @@ +#include +#include +#include + +#include +#include + +using namespace std; +using namespace tightdb; + +/* + +NB: Currently, no conversion is done! This code just shows how to go +about handling conversion when it becomes necessary. + +The main idea is to allow the incoming file format older version to be +older than the one supported by the current version of the core +library. + +This is handled by accessing the incoming database in a low-level way, +where version differences can be incorporated as alternative +branches. The new copy is built using the high-level API which will +ensure that the new copy uses the current format. + +Testing: + +To be able to test this, we need a repository of datbase files using +older file format versions. Each file must contain data that expresses +all important variations of the file format: Tables of various size +such that there at least a 0, 1, and 2 level B+-tree. Tables with all +column types, including string enumerations. Strings and binrary data +of various sizes to trigger each leaf type. + +*/ + + +// FIXME: Command line switch to optimize output group + +namespace { + +template class Wrap { +public: + A m_array; + bool m_must_destroy; + Wrap(Allocator& alloc): m_array(alloc), m_must_destroy(false) {} + ~Wrap() { if (m_must_destroy) m_array.destroy(); } + bool empty() const { return m_array.is_empty(); } + size_t size() const { return m_array.size(); } + size_t get_as_ref(size_t i) const { return m_array.get_as_ref(i); } +}; + + +class Converter { +public: + Converter(SlabAlloc& alloc, ref_type top_ref, int version, Group& group): + m_alloc(alloc), m_top_ref(top_ref), m_version(version), m_new_group(group) {} + + void convert() + { + convert_group(m_top_ref, m_new_group); + } + +private: + SlabAlloc& m_alloc; + ref_type m_top_ref; + int m_version; + Group &m_new_group; + + void convert_group(ref_type ref, Group& new_group) + { + Wrap top(m_alloc); + init(top, ref); + Wrap table_names(m_alloc); + Wrap table_refs(m_alloc); + init(table_names, top.get_as_ref(0)); + init(table_refs, top.get_as_ref(1)); + size_t n = table_refs.size(); + for (size_t i = 0; i != n; ++i) { + StringData name = table_names.m_array.get(i); + cout << "Converting table: '" << name << "'\n"; + TableRef new_table = new_group.get_table(name); + convert_table_and_spec(table_refs.m_array.get(i), *new_table); + } + } + + void convert_table_and_spec(ref_type ref, Table& new_table) + { + Wrap top(m_alloc); + init(top, ref); + DescriptorRef new_desc = new_table.get_descriptor(); + convert_spec(top.get_as_ref(0), *new_desc); + convert_columns(top.get_as_ref(0), top.get_as_ref(1), new_table); + } + + void convert_spec(ref_type ref, Descriptor& new_desc) + { + Wrap top(m_alloc); + init(top, ref); + if (top.size() != 2 && top.size() != 3) + throw runtime_error("Unexpected size of spec top array"); + Wrap column_types(m_alloc); + Wrap column_names(m_alloc); + Wrap column_subspecs(m_alloc); + init(column_types, top.get_as_ref(0)); + init(column_names, top.get_as_ref(1)); + if (2 < top.size()) + init(column_subspecs, top.get_as_ref(3)); + size_t name_ndx = 0; + size_t subspec_ndx = 0; + size_t n = column_types.size(); + for (size_t i = 0; i != n; ++i) { + ColumnType type = ColumnType(column_types.m_array.get(i)); + DataType new_type = DataType(); + switch (type) { + case col_type_Int: + case col_type_Bool: + case col_type_DateTime: + case col_type_Float: + case col_type_Double: + case col_type_String: + case col_type_Binary: + case col_type_Table: + case col_type_Mixed: + new_type = DataType(type); + break; + case col_type_StringEnum: + new_type = type_String; + break; + case col_type_Reserved1: + case col_type_Reserved4: + throw runtime_error("Unexpected column type"); + } + StringData name = column_names.m_array.get(name_ndx); + ++name_ndx; + cout << "col name: " << name << "\n"; + DescriptorRef subdesc; + new_desc.add_column(new_type, name, &subdesc); + if (new_type == type_Table) { + convert_spec(column_subspecs.get_as_ref(subspec_ndx), *subdesc); + ++subspec_ndx; + } + } + } + + void convert_columns(ref_type spec_ref, ref_type columns_ref, Table& new_table) + { + Wrap column_types(m_alloc); + Wrap column_names(m_alloc); + Wrap column_attribs(m_alloc); + Wrap column_subspecs(m_alloc); + Wrap column_enumkeys(m_alloc); + Wrap column_refs(m_alloc); + { + Wrap spec(m_alloc); + init(spec, spec_ref); + init(column_types, spec.get_as_ref(0)); + init(column_names, spec.get_as_ref(1)); + init(column_attribs, spec.get_as_ref(2)); + if (3 < spec.size()) + init(column_subspecs, spec.get_as_ref(3)); + if (4 < spec.size()) + init(column_enumkeys, spec.get_as_ref(4)); + } + init(column_refs, columns_ref); + + size_t num_cols = new_table.get_column_count(); + + // Determine number of rows + size_t num_rows = 0; + if (0 < num_cols) { + ref_type ref = column_refs.m_array.front(); + ColumnType type = ColumnType(column_types.m_array.front()); + if (type == col_type_Mixed) { + Array top(m_alloc); + top.init_from_ref(ref); + ref = top.get_as_ref(0); + type = col_type_Int; + } + MemRef mem(ref, m_alloc); + bool is_inner_node = Array::get_hasrefs_from_header(mem.m_addr); + if (is_inner_node) { + Wrap inner_node(m_alloc); + init(inner_node, mem); + if (inner_node.size() < 3) + throw runtime_error("Too few elements in inner B+-tree node"); + int_fast64_t v = inner_node.m_array.back(); + if (v % 2 == 0) + throw runtime_error("Unexpected ref at back of inner B+-tree node"); + num_rows = to_ref(v / 2); + } + else { + switch (type) { + case col_type_Int: + case col_type_Bool: + case col_type_DateTime: + case col_type_StringEnum: + case col_type_Table: { + Array leaf(m_alloc); + leaf.init_from_mem(mem); + num_rows = leaf.size(); + break; + } + case col_type_Float: { + ArrayFloat leaf(m_alloc); + leaf.init_from_mem(mem); + num_rows = leaf.size(); + break; + } + case col_type_Double: { + ArrayDouble leaf(m_alloc); + leaf.init_from_mem(mem); + num_rows = leaf.size(); + break; + } + case col_type_String: { + bool long_strings = Array::get_hasrefs_from_header(mem.m_addr); + if (!long_strings) { + // Small strings + ArrayString leaf(m_alloc); + leaf.init_from_mem(mem); + num_rows = leaf.size(); + break; + } + bool is_big = Array::get_context_flag_from_header(mem.m_addr); + if (!is_big) { + // Medium strings + ArrayStringLong leaf(m_alloc); + leaf.init_from_mem(mem); + num_rows = leaf.size(); + break; + } + // Big strings + ArrayBigBlobs leaf(m_alloc); + leaf.init_from_mem(mem); + num_rows = leaf.size(); + break; + } + case col_type_Binary: { + bool is_big = Array::get_context_flag_from_header(mem.m_addr); + if (!is_big) { + // Small blobs + ArrayBinary leaf(m_alloc); + leaf.init_from_mem(mem); + num_rows = leaf.size(); + break; + } + // Big blobs + ArrayBigBlobs leaf(m_alloc); + leaf.init_from_mem(mem); + num_rows = leaf.size(); + break; + } + case col_type_Mixed: + case col_type_Reserved1: + case col_type_Reserved4: + throw runtime_error("Unexpected column type"); + } + } + } + + new_table.add_empty_row(num_rows); + + size_t column_ref_ndx = 0; + size_t column_subspec_ndx = 0; + size_t column_enumkeys_ndx = 0; + for (size_t i = 0; i != num_cols; ++i) { + ref_type column_ref = column_refs.m_array.get(column_ref_ndx); + ++column_ref_ndx; + ColumnType type = ColumnType(column_types.m_array.get(i)); + switch (type) { + case col_type_Int: + convert_int_column(column_ref, new_table, i); + break; + case col_type_Bool: + convert_bool_column(column_ref, new_table, i); + break; + case col_type_DateTime: + convert_datetime_column(column_ref, new_table, i); + break; + case col_type_Float: + convert_float_column(column_ref, new_table, i); + break; + case col_type_Double: + convert_double_column(column_ref, new_table, i); + break; + case col_type_String: + convert_string_column(column_ref, new_table, i); + break; + case col_type_StringEnum: { + ref_type strings_ref = column_enumkeys.m_array.get(column_enumkeys_ndx); + ++column_enumkeys_ndx; + convert_string_enum_column(strings_ref, column_ref, new_table, i); + break; + } + case col_type_Binary: + convert_binary_column(column_ref, new_table, i); + break; + case col_type_Table: { + ref_type subspec_ref = column_subspecs.get_as_ref(column_subspec_ndx); + ++column_subspec_ndx; + convert_subtable_column(subspec_ref, column_ref, new_table, i); + break; + } + case col_type_Mixed: + convert_mixed_column(column_ref, new_table, i); + break; + case col_type_Reserved1: + case col_type_Reserved4: + throw runtime_error("Unexpected column type"); + } + ColumnAttr attr = ColumnAttr(column_attribs.m_array.get(i)); + switch (attr) { + case col_attr_None: + break; + case col_attr_Indexed: + ++column_ref_ndx; + new_table.set_index(i); + break; + case col_attr_Unique: + case col_attr_Sorted: + throw runtime_error("Unexpected column attribute"); + } + } + } + + void convert_int_column(ref_type ref, Table& new_table, size_t col_ndx) + { + cout << "column_ref = " << ref << "\n"; + Column col(ref, 0, 0, m_alloc); + size_t n = col.size(); + if (n != new_table.size()) + throw runtime_error("Unexpected column size"); + for (size_t i=0; i class IntegerLeafHandler { + public: + IntegerLeafHandler(const Converter& conv, ElemHandler& elem_handler): + m_conv(conv), + m_elem_handler(elem_handler) + { + } + void operator()(MemRef mem) + { + Array leaf(m_conv.m_alloc); + leaf.init_from_mem(mem); + size_t n = leaf.size(); + for (size_t i = 0; i != n; ++i) { + int_fast64_t value = leaf.get(i); + m_elem_handler(value); + } + } + private: + const Converter& m_conv; + ElemHandler& m_elem_handler; + }; + + + template class StringLeafHandler { + public: + StringLeafHandler(const Converter& conv, ElemHandler& elem_handler): + m_conv(conv), + m_elem_handler(elem_handler) + { + } + void operator()(MemRef mem) + { + bool long_strings = Array::get_hasrefs_from_header(mem.m_addr); + if (!long_strings) { + // Small strings + ArrayString leaf(m_conv.m_alloc); + leaf.init_from_mem(mem); + size_t n = leaf.size(); + for (size_t i = 0; i != n; ++i) { + StringData str = leaf.get(i); + m_elem_handler(str); + } + return; + } + bool is_big = Array::get_context_flag_from_header(mem.m_addr); + if (!is_big) { + // Medium strings + ArrayStringLong leaf(m_conv.m_alloc); + leaf.init_from_mem(mem); + size_t n = leaf.size(); + for (size_t i = 0; i != n; ++i) { + StringData str = leaf.get(i); + m_elem_handler(str); + } + return; + } + // Big strings + ArrayBigBlobs leaf(m_conv.m_alloc); + leaf.init_from_mem(mem); + size_t n = leaf.size(); + for (size_t i = 0; i != n; ++i) { + StringData str = leaf.get_string(i); + m_elem_handler(str); + } + } + private: + const Converter& m_conv; + ElemHandler& m_elem_handler; + }; + + + class StringSetter { + public: + StringSetter(Table& table, size_t col_ndx): + m_table(table), + m_col_ndx(col_ndx), + m_row_ndx(0) + { + } + void operator()(StringData str) + { + m_table.set_string(m_col_ndx, m_row_ndx, str); + ++m_row_ndx; + } + private: + Table& m_table; + const size_t m_col_ndx; + size_t m_row_ndx; + }; + + void convert_string_column(ref_type ref, Table& new_table, size_t col_ndx) + { + cout << "string_column_ref = " << ref << "\n"; + StringSetter elem_handler(new_table, col_ndx); + StringLeafHandler leaf_handler(*this, elem_handler); + foreach_bptree_leaf(ref, leaf_handler); + } + + + class StringCollector { + public: + StringCollector(vector& strings): + m_strings(strings) + { + } + void operator()(StringData str) + { + m_strings.push_back(str); + } + private: + vector& m_strings; + }; + + class StringEnumSetter: StringSetter { + public: + StringEnumSetter(Table& table, size_t col_ndx, const vector& strings): + StringSetter(table, col_ndx), + m_strings(strings) + { + } + void operator()(int_fast64_t index) + { + StringData str = m_strings[index]; + StringSetter::operator()(str); + } + private: + const vector& m_strings; + }; + + void convert_string_enum_column(ref_type strings_ref, ref_type indexes_ref, Table& new_table, size_t col_ndx) + { + cout << "string_enum_column_strings_ref = " << strings_ref << "\n"; + cout << "string_enum_column_indexes_ref = " << indexes_ref << "\n"; + vector strings; + { + StringCollector elem_handler(strings); + StringLeafHandler leaf_handler(*this, elem_handler); + foreach_bptree_leaf(strings_ref, leaf_handler); + } + { + StringEnumSetter elem_handler(new_table, col_ndx, strings); + IntegerLeafHandler leaf_handler(*this, elem_handler); + foreach_bptree_leaf(indexes_ref, leaf_handler); + } + } + + + void convert_binary_column(ref_type ref, Table& new_table, size_t col_ndx) + { + cout << "binary_column_ref = " << ref << "\n"; + ColumnBinary col(ref, 0, 0, m_alloc); + size_t n = col.size(); + if (n != new_table.size()) + throw runtime_error("Unexpected column size"); + for (size_t i=0; i void init(Wrap& array, MemRef mem) + { + if (init(array.m_array, mem)) + array.m_must_destroy = true; + } + + template void init(Wrap& array, ref_type ref) + { + MemRef mem(ref, m_alloc); + init(array, mem); + } + + bool init(Array& array, MemRef mem) + { + // If conversion of the array is needed (a decision which may + // be based on m_version) then that conversion should be done + // here. When converting, allocate space for a new array, and + // return true. + array.init_from_mem(mem); + return false; + } + + template void foreach_bptree_leaf(ref_type ref, H& handler) + { + MemRef mem(ref, m_alloc); + if (!Array::get_is_inner_bptree_node_from_header(mem.m_addr)) { + handler(mem); + return; + } + + Wrap inner_node(m_alloc); + init(inner_node, mem); + if (inner_node.size() < 3) + throw runtime_error("Too few elements in inner B+-tree node"); + size_t n = inner_node.size() - 2; + for (size_t i = 0; i != n; ++i) + foreach_bptree_leaf(inner_node.get_as_ref(1 + i), handler); + } +}; + +} // Anonymous namespace + + +int main(int argc, char* argv[]) +{ + // Process command line + { + const char* prog = argv[0]; + --argc; + ++argv; + bool error = false; + bool help = false; + int argc2 = 0; + for (int i=0; i(file_header[16 + 4 + valid_part]); - if (version != current_file_format_version) - return false; // unsupported version + if (get_version) { + *get_version = version; + } + else { + if (version != current_file_format_version) + return false; // unsupported version + } // Top_ref should always point within buffer const uint64_t* top_refs = reinterpret_cast(data); diff --git a/src/tightdb/alloc_slab.hpp b/src/tightdb/alloc_slab.hpp index 58d15b46184..dd304c24c5c 100644 --- a/src/tightdb/alloc_slab.hpp +++ b/src/tightdb/alloc_slab.hpp @@ -81,16 +81,21 @@ class SlabAlloc: public Allocator { /// /// \param no_create Fail if the file does not already exist. /// - /// \param bool skip_validate Skip validation of file header. In a - /// set of overlapping SharedGroups, only the first one (the one - /// that creates/initlializes the coordination file) may validate - /// the header, otherwise it will result in a race condition. + /// \param skip_validate Skip validation of file header. In a set + /// of overlapping SharedGroups, only the first one (the one that + /// creates/initlializes the coordination file) may validate the + /// header, otherwise it will result in a race condition. + /// + /// \param get_version If specified and `skip_validate` is not + /// true, store the file format version number into the referenced + /// integer. Otherwise fail if the file format version is not + /// exactly as expected. /// /// \return The `ref` of the root node, or zero if there is none. /// /// \throw util::File::AccessError ref_type attach_file(const std::string& path, bool is_shared, bool read_only, bool no_create, - bool skip_validate); + bool skip_validate, int* get_version = 0); /// Attach this allocator to the specified memory buffer. /// @@ -102,7 +107,7 @@ class SlabAlloc: public Allocator { /// \sa own_buffer() /// /// \throw InvalidDatabase - ref_type attach_buffer(char* data, std::size_t size); + ref_type attach_buffer(char* data, std::size_t size, int* get_version = 0); /// Attach this allocator to an empty buffer. /// @@ -305,7 +310,7 @@ class SlabAlloc: public Allocator { /// Throws if free-lists are no longer valid. const FreeSpace& get_free_read_only() const; - bool validate_buffer(const char* data, std::size_t len, ref_type& top_ref); + bool validate_buffer(const char* data, std::size_t len, ref_type& top_ref, int* get_version); void do_prepare_for_update(char* mutable_data);