Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support more memory ext #6

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions bplustree/ext/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# -*- coding: utf-8 -*-
37 changes: 37 additions & 0 deletions bplustree/ext/memory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# -*- coding: utf-8 -*-

from typing import Union, Tuple, Optional
from bplustree.memory import BaseMemory, ReachedEndOfFile
from io import BytesIO


class RAMMemory(BaseMemory):
last_page = 1
pages = []
tree_conf = None

def __init__(self, tree_conf, data=''):
if data:
pass
self.tree_conf = tree_conf

def close(self):
del self

def write_page_in_tree(self, page: int, data: Union[bytes, bytearray], fsync: bool = True):
if len(self.pages) > page:
self.pages[page].seek(0)
self.pages[page].write(data)
return self.pages[page]
new = BytesIO()
new.write(data)
self.pages.append(new)
return new

def read_page(self, page: int) -> bytes:
if len(self.pages) < page:
return self.pages[page].getvalue()
raise ReachedEndOfFile

def open_or_init(self):
pass
216 changes: 216 additions & 0 deletions bplustree/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,3 +492,219 @@ def rollback(self):

def __repr__(self):
return '<WAL: {}>'.format(self.filename)


class BaseMemory(object):
last_page = 0

def open_or_init(self):
raise NotImplementedError

def read_page(self, page: int) -> bytes:
raise NotImplementedError

def write_page_in_tree(self, page: int, data: Union[bytes, bytearray],
fsync: bool = True):
raise NotImplementedError

def close(self):
raise NotImplementedError


class CommonMemory(object):
_fd = None

def __init__(self, fd: BaseMemory, tree_conf: TreeConf, cache_size: int = 512):
self._fd = fd
self._tree_conf = tree_conf
self._lock = rwlock.RWLock()
if cache_size == 0:
self._cache = FakeCache()
else:
self._cache = cachetools.LRUCache(maxsize=cache_size)
# Get the next available page
self._fd.open_or_init()

self.last_page = self._fd.last_page
self._freelist_start_page = 0

# Todo: Remove this, it should only be in Tree
self._root_node_page = 0

def get_node(self, page: int):
"""Get a node from storage.

The cache is not there to prevent hitting the disk, the OS is already
very good at it. It is there to avoid paying the price of deserializing
the data to create the Node object and its entry. This is a very
expensive operation in Python.

Since we have at most a single writer we can write to cache on
`set_node` if we invalidate the cache when a transaction is rolled
back.
"""
node = self._cache.get(page)
if node is not None:
return node

# data = self._wal.get_page(page)
# if not data:
data = self._fd.read_page(page)
node = Node.from_page_data(self._tree_conf, data=data, page=page)
self._cache[node.page] = node
return node

def set_node(self, node: Node):
self._fd.write_page_in_tree(node.page, node.dump())
self._cache[node.page] = node

def del_node(self, node: Node):
self._insert_in_freelist(node.page)

def del_page(self, page: int):
self._insert_in_freelist(page)

@property
def next_available_page(self) -> int:
last_freelist_page = self._pop_from_freelist()
if last_freelist_page is not None:
return last_freelist_page

self.last_page += 1
return self.last_page

def _traverse_free_list(self) -> Tuple[Optional[FreelistNode],
Optional[FreelistNode]]:
if self._freelist_start_page == 0:
return None, None

second_to_last_node = None
last_node = self.get_node(self._freelist_start_page)

while last_node.next_page is not None:
second_to_last_node = last_node
last_node = self.get_node(second_to_last_node.next_page)

return second_to_last_node, last_node

def _insert_in_freelist(self, page: int):
"""Insert a page at the end of the freelist."""
_, last_node = self._traverse_free_list()

self.set_node(FreelistNode(self._tree_conf, page=page, next_page=None))

if last_node is None:
# Write in metadata that the freelist got a new starting point
self._freelist_start_page = page
self.set_metadata(None, None)
else:
last_node.next_page = page
self.set_node(last_node)

def _pop_from_freelist(self) -> Optional[int]:
"""Remove the last page from the freelist and return its page."""
second_to_last_node, last_node = self._traverse_free_list()

if last_node is None:
# Freelist is completely empty, nothing to pop
return None

if second_to_last_node is None:
# Write in metadata that the freelist is empty
self._freelist_start_page = 0
self.set_metadata(None, None)
else:
second_to_last_node.next_page = None
self.set_node(second_to_last_node)

return last_node.page

# Todo: make metadata as a normal Node
def get_metadata(self) -> tuple:
try:
data = self._fd.read_page(0)
except ReachedEndOfFile:
raise ValueError('Metadata not set yet')
end_root_node_page = PAGE_REFERENCE_BYTES
root_node_page = int.from_bytes(
data[0:end_root_node_page], ENDIAN
)
end_page_size = end_root_node_page + OTHERS_BYTES
page_size = int.from_bytes(
data[end_root_node_page:end_page_size], ENDIAN
)
end_order = end_page_size + OTHERS_BYTES
order = int.from_bytes(
data[end_page_size:end_order], ENDIAN
)
end_key_size = end_order + OTHERS_BYTES
key_size = int.from_bytes(
data[end_order:end_key_size], ENDIAN
)
end_value_size = end_key_size + OTHERS_BYTES
value_size = int.from_bytes(
data[end_key_size:end_value_size], ENDIAN
)
end_freelist_start_page = end_value_size + PAGE_REFERENCE_BYTES
self._freelist_start_page = int.from_bytes(
data[end_value_size:end_freelist_start_page], ENDIAN
)
self._tree_conf = TreeConf(
page_size, order, key_size, value_size, self._tree_conf.serializer
)
self._root_node_page = root_node_page
return root_node_page, self._tree_conf

def set_metadata(self, root_node_page: Optional[int],
tree_conf: Optional[TreeConf]):

if root_node_page is None:
root_node_page = self._root_node_page

if tree_conf is None:
tree_conf = self._tree_conf

length = 2 * PAGE_REFERENCE_BYTES + 4 * OTHERS_BYTES
data = (
root_node_page.to_bytes(PAGE_REFERENCE_BYTES, ENDIAN) +
tree_conf.page_size.to_bytes(OTHERS_BYTES, ENDIAN) +
tree_conf.order.to_bytes(OTHERS_BYTES, ENDIAN) +
tree_conf.key_size.to_bytes(OTHERS_BYTES, ENDIAN) +
tree_conf.value_size.to_bytes(OTHERS_BYTES, ENDIAN) +
self._freelist_start_page.to_bytes(PAGE_REFERENCE_BYTES, ENDIAN) +
bytes(tree_conf.page_size - length)
)
self._fd.write_page_in_tree(0, data, fsync=True)

self._tree_conf = tree_conf
self._root_node_page = root_node_page

def close(self):
self._fd.close()

def __repr__(self):
return '<CommonMemory: {}>'.format(str(self._fd))

@property
def read_transaction(self):

class ReadTransaction:
def __enter__(self2):
self._lock.reader_lock.acquire()

def __exit__(self2, exc_type, exc_val, exc_tb):
self._lock.reader_lock.release()

return ReadTransaction()

@property
def write_transaction(self):

class WriteTransaction:
def __enter__(self2):
self._lock.writer_lock.acquire()

def __exit__(self2, exc_type, exc_val, exc_tb):
self._lock.writer_lock.release()

return WriteTransaction()
14 changes: 14 additions & 0 deletions bplustree/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,3 +443,17 @@ def _get_value_from_record(self, record: Record) -> bytes:
return record.value

return self._read_from_overflow(record.overflow_page)


class ExtBPlusTree(BPlusTree):
def __init__(self, mem, tree_conf: TreeConf):
self._tree_conf = tree_conf
self._create_partials()
self._mem = mem
try:
metadata = self._mem.get_metadata()
except ValueError:
self._initialize_empty_tree()
else:
self._root_node_page, self._tree_conf = metadata
self._is_open = True
26 changes: 26 additions & 0 deletions tests/test_extmemory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# -*- coding: utf-8 -*-


from bplustree.tree import ExtBPlusTree, TreeConf, IntSerializer
from bplustree.memory import CommonMemory, ENDIAN
from bplustree.ext.memory import RAMMemory
from random import random


def test_ext_ram_memory():
tree_conf = TreeConf(8096, 100, 16, 32, IntSerializer())
test = RAMMemory(tree_conf)
tree = ExtBPlusTree(CommonMemory(test, tree_conf), tree_conf=tree_conf)

for i in range(500):
record = int(random() * 100)
try:
tree.insert(record, record.to_bytes(32, ENDIAN))
except ValueError:
pass
for record in tree:
assert record == int.from_bytes(tree[record], ENDIAN)
assert len(test.pages) > 0

if __name__ == '__main__':
test_ext_ram_memory()