-
Notifications
You must be signed in to change notification settings - Fork 169
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Use BPlusTree to hold backlinks #6673
Changes from 5 commits
2864d8a
1704ded
8621ce6
f1d3e68
6dbc250
333bb68
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -19,6 +19,7 @@ | |||||||||||||||||||||||||||||||
#include <realm/bplustree.hpp> | ||||||||||||||||||||||||||||||||
#include <realm/impl/destroy_guard.hpp> | ||||||||||||||||||||||||||||||||
#include <realm/array_unsigned.hpp> | ||||||||||||||||||||||||||||||||
#include <realm/array_integer.hpp> | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
using namespace realm; | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
|
@@ -130,6 +131,8 @@ class BPlusTreeInner : public BPlusTreeNode, private Array { | |||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||
void ensure_offsets(); | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
std::unique_ptr<BPlusTreeInner> split_root(); | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
private: | ||||||||||||||||||||||||||||||||
ArrayUnsigned m_offsets; | ||||||||||||||||||||||||||||||||
size_t m_my_offset; | ||||||||||||||||||||||||||||||||
|
@@ -221,6 +224,37 @@ bool BPlusTreeLeaf::bptree_traverse(TraverseFunc func) | |||||||||||||||||||||||||||||||
return func(this, 0) == IteratorControl::Stop; | ||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
template <> | ||||||||||||||||||||||||||||||||
void BPlusTree<int64_t>::split_root() | ||||||||||||||||||||||||||||||||
{ | ||||||||||||||||||||||||||||||||
if (m_root->is_leaf()) { | ||||||||||||||||||||||||||||||||
auto sz = m_root->get_node_size(); | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
LeafNode* leaf = static_cast<LeafNode*>(m_root.get()); | ||||||||||||||||||||||||||||||||
auto new_root = std::make_unique<BPlusTreeInner>(this); | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
new_root->create(REALM_MAX_BPNODE_SIZE); | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
size_t ndx = 0; | ||||||||||||||||||||||||||||||||
while (ndx < sz) { | ||||||||||||||||||||||||||||||||
LeafNode new_leaf(this); | ||||||||||||||||||||||||||||||||
new_leaf.create(); | ||||||||||||||||||||||||||||||||
size_t to_move = std::min(size_t(REALM_MAX_BPNODE_SIZE), sz - ndx); | ||||||||||||||||||||||||||||||||
for (size_t i = 0; i < to_move; i++, ndx++) { | ||||||||||||||||||||||||||||||||
new_leaf.insert(i, leaf->get(ndx)); | ||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||
new_root->add_bp_node_ref(new_leaf.get_ref()); // Throws | ||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||
new_root->append_tree_size(sz); | ||||||||||||||||||||||||||||||||
leaf->destroy(); | ||||||||||||||||||||||||||||||||
replace_root(std::move(new_root)); | ||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||
else { | ||||||||||||||||||||||||||||||||
BPlusTreeInner* inner = static_cast<BPlusTreeInner*>(m_root.get()); | ||||||||||||||||||||||||||||||||
replace_root(inner->split_root()); | ||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
/****************************** BPlusTreeInner *******************************/ | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
BPlusTreeInner::BPlusTreeInner(BPlusTreeBase* tree) | ||||||||||||||||||||||||||||||||
|
@@ -514,6 +548,36 @@ void BPlusTreeInner::ensure_offsets() | |||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
std::unique_ptr<BPlusTreeInner> BPlusTreeInner::split_root() | ||||||||||||||||||||||||||||||||
{ | ||||||||||||||||||||||||||||||||
auto new_root = std::make_unique<BPlusTreeInner>(m_tree); | ||||||||||||||||||||||||||||||||
auto sz = get_node_size(); | ||||||||||||||||||||||||||||||||
size_t elems_per_child = get_elems_per_child(); | ||||||||||||||||||||||||||||||||
new_root->create(REALM_MAX_BPNODE_SIZE * elems_per_child); | ||||||||||||||||||||||||||||||||
size_t ndx = 0; | ||||||||||||||||||||||||||||||||
size_t tree_size = get_tree_size(); | ||||||||||||||||||||||||||||||||
size_t accumulated_size = 0; | ||||||||||||||||||||||||||||||||
while (ndx < sz) { | ||||||||||||||||||||||||||||||||
BPlusTreeInner new_inner(m_tree); | ||||||||||||||||||||||||||||||||
size_t to_move = std::min(size_t(REALM_MAX_BPNODE_SIZE), sz - ndx); | ||||||||||||||||||||||||||||||||
new_inner.create(elems_per_child); | ||||||||||||||||||||||||||||||||
for (size_t i = 0; i < to_move; i++, ndx++) { | ||||||||||||||||||||||||||||||||
new_inner.add_bp_node_ref(get_bp_node_ref(ndx)); | ||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||
size_t this_size = to_move * elems_per_child; | ||||||||||||||||||||||||||||||||
if (accumulated_size + this_size > tree_size) { | ||||||||||||||||||||||||||||||||
this_size = tree_size - accumulated_size; | ||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||
accumulated_size += this_size; | ||||||||||||||||||||||||||||||||
new_inner.append_tree_size(this_size); | ||||||||||||||||||||||||||||||||
new_root->add_bp_node_ref(new_inner.get_ref()); // Throws | ||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||
REALM_ASSERT(accumulated_size == tree_size); | ||||||||||||||||||||||||||||||||
new_root->append_tree_size(tree_size); | ||||||||||||||||||||||||||||||||
destroy(); | ||||||||||||||||||||||||||||||||
return new_root; | ||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
inline BPlusTreeLeaf* BPlusTreeInner::cache_leaf(MemRef mem, size_t ndx, size_t offset) | ||||||||||||||||||||||||||||||||
{ | ||||||||||||||||||||||||||||||||
BPlusTreeLeaf* leaf = m_tree->cache_leaf(mem); | ||||||||||||||||||||||||||||||||
|
@@ -769,3 +833,14 @@ std::unique_ptr<BPlusTreeNode> BPlusTreeBase::create_root_from_ref(ref_type ref) | |||||||||||||||||||||||||||||||
return new_root; | ||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
size_t BPlusTreeBase::size_from_header(const char* header) | ||||||||||||||||||||||||||||||||
{ | ||||||||||||||||||||||||||||||||
auto node_size = Array::get_size_from_header(header); | ||||||||||||||||||||||||||||||||
if (Array::get_is_inner_bptree_node_from_header(header)) { | ||||||||||||||||||||||||||||||||
auto data = Array::get_data_from_header(header); | ||||||||||||||||||||||||||||||||
auto width = Array::get_width_from_header(header); | ||||||||||||||||||||||||||||||||
node_size = size_t(get_direct(data, width, node_size - 1)) >> 1; | ||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||
return node_size; | ||||||||||||||||||||||||||||||||
Comment on lines
+839
to
+845
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, double checking because my memory of this is fuzzy, is the divide by two always correct for an inner node? Or is it possible that it is not in the compact form. Eg should we divide by There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is more a shift right than a divide by two. The size of the sub tree is always stored as the last entry and coded as a number - that is shifted one up and or-ed with 1. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not blocking, but you may want to use this suggestion to avoid an unnecessary read from the array header if looking at an inner node. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I just don't think your suggesting would compile :-) |
||||||||||||||||||||||||||||||||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -164,6 +164,8 @@ class BPlusTreeBase { | |
return m_size; | ||
} | ||
|
||
static size_t size_from_header(const char* header); | ||
|
||
bool is_empty() const | ||
{ | ||
return m_size == 0; | ||
|
@@ -188,16 +190,12 @@ class BPlusTreeBase { | |
|
||
bool init_from_parent() | ||
{ | ||
ref_type ref = m_parent->get_child_ref(m_ndx_in_parent); | ||
if (!ref) { | ||
return false; | ||
; | ||
if (ref_type ref = m_parent->get_child_ref(m_ndx_in_parent)) { | ||
init_from_ref(ref); | ||
return true; | ||
} | ||
auto new_root = create_root_from_ref(ref); | ||
new_root->bp_set_parent(m_parent, m_ndx_in_parent); | ||
m_root = std::move(new_root); | ||
invalidate_leaf_cache(); | ||
m_size = m_root->get_tree_size(); | ||
return true; | ||
return false; | ||
} | ||
|
||
void set_parent(ArrayParent* parent, size_t ndx_in_parent) | ||
|
@@ -568,6 +566,13 @@ class BPlusTree : public BPlusTreeBase { | |
m_root->bptree_traverse(func); | ||
} | ||
|
||
void split_if_needed() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this needed? I am under the impression that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the current node is just a plain array, then it should be split into a BPlusTree. I am open for another name - I struggled a bit myself to find one. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So this is an optimization that defers creating a tree until we reach more than 1000 links and then does the transformation on the fly rather than in the migration function of the file format. If that is correct, we should keep current performance the same for fewer than 1000 backlinks? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the size is below 1000, then a BPlusTree is just a plain array, so no need for any transformation. But the BPlusTree implementation is just slower than a plain array. I did not want to create two code paths - one for sizes under 1000 and one for sizes over. |
||
{ | ||
while (m_root->get_node_size() > REALM_MAX_BPNODE_SIZE) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please test more than one iteration of this loop eg. more than 1000 * 1000 elements. I think the second iteration will fail because There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is tested more thoroughly when the node size is 4. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, you had the implementation correct all along and I misread it. Thanks for adding the extra tests though! |
||
split_root(); | ||
} | ||
} | ||
|
||
protected: | ||
LeafNode m_leaf_cache; | ||
|
||
|
@@ -606,6 +611,8 @@ class BPlusTree : public BPlusTreeBase { | |
|
||
template <class R> | ||
friend R bptree_sum(const BPlusTree<T>& tree); | ||
|
||
void split_root(); | ||
}; | ||
|
||
template <class T> | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -431,4 +431,23 @@ TEST(BPlusTree_LeafCache) | |
tree.destroy(); | ||
} | ||
|
||
TEST(BPlusTree_UpgradeFromArray) | ||
{ | ||
Array arr(Allocator::get_default()); | ||
arr.create(Node::type_Normal, false, 0, 0); | ||
|
||
for (int i = 0; i < 65; i++) { | ||
arr.add(i); | ||
} | ||
|
||
BPlusTree<Int> tree(Allocator::get_default()); | ||
tree.init_from_ref(arr.get_ref()); | ||
tree.split_if_needed(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this doesn't do a split because there are less than 1000 elements, can you add more tests like this checking various sizes: 999, 1000, 1001, 1000*1000 + 1 etc There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added more sizes - but the test will still only be effective when node size is 4. |
||
|
||
tree.add(100); | ||
tree.verify(); | ||
|
||
tree.destroy(); | ||
} | ||
|
||
#endif // TEST_BPLUS_TREE |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this going to interpret the data correctly if it is a simple Array that hasn't been split yet?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. A BPlusTree only holding one leaf array, is just a leaf array.