diff --git a/bplustree/ext/__init__.py b/bplustree/ext/__init__.py new file mode 100644 index 0000000..40a96af --- /dev/null +++ b/bplustree/ext/__init__.py @@ -0,0 +1 @@ +# -*- coding: utf-8 -*- diff --git a/bplustree/ext/memory.py b/bplustree/ext/memory.py new file mode 100644 index 0000000..2896011 --- /dev/null +++ b/bplustree/ext/memory.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- + +from typing import Union, Tuple, Optional +from bplustree.memory import BaseMemory, ReachedEndOfFile +from io import BytesIO + + +class RAMMemory(BaseMemory): + last_page = 1 + pages = [] + tree_conf = None + + def __init__(self, tree_conf, data=''): + if data: + pass + self.tree_conf = tree_conf + + def close(self): + del self + + def write_page_in_tree(self, page: int, data: Union[bytes, bytearray], fsync: bool = True): + if len(self.pages) > page: + self.pages[page].seek(0) + self.pages[page].write(data) + return self.pages[page] + new = BytesIO() + new.write(data) + self.pages.append(new) + return new + + def read_page(self, page: int) -> bytes: + if len(self.pages) < page: + return self.pages[page].getvalue() + raise ReachedEndOfFile + + def open_or_init(self): + pass diff --git a/bplustree/memory.py b/bplustree/memory.py index 54b96a9..88b03ce 100644 --- a/bplustree/memory.py +++ b/bplustree/memory.py @@ -492,3 +492,219 @@ def rollback(self): def __repr__(self): return ''.format(self.filename) + + +class BaseMemory(object): + last_page = 0 + + def open_or_init(self): + raise NotImplementedError + + def read_page(self, page: int) -> bytes: + raise NotImplementedError + + def write_page_in_tree(self, page: int, data: Union[bytes, bytearray], + fsync: bool = True): + raise NotImplementedError + + def close(self): + raise NotImplementedError + + +class CommonMemory(object): + _fd = None + + def __init__(self, fd: BaseMemory, tree_conf: TreeConf, cache_size: int = 512): + self._fd = fd + self._tree_conf = tree_conf + self._lock = rwlock.RWLock() + if cache_size == 0: + self._cache = FakeCache() + else: + self._cache = cachetools.LRUCache(maxsize=cache_size) + # Get the next available page + self._fd.open_or_init() + + self.last_page = self._fd.last_page + self._freelist_start_page = 0 + + # Todo: Remove this, it should only be in Tree + self._root_node_page = 0 + + def get_node(self, page: int): + """Get a node from storage. + + The cache is not there to prevent hitting the disk, the OS is already + very good at it. It is there to avoid paying the price of deserializing + the data to create the Node object and its entry. This is a very + expensive operation in Python. + + Since we have at most a single writer we can write to cache on + `set_node` if we invalidate the cache when a transaction is rolled + back. + """ + node = self._cache.get(page) + if node is not None: + return node + + # data = self._wal.get_page(page) + # if not data: + data = self._fd.read_page(page) + node = Node.from_page_data(self._tree_conf, data=data, page=page) + self._cache[node.page] = node + return node + + def set_node(self, node: Node): + self._fd.write_page_in_tree(node.page, node.dump()) + self._cache[node.page] = node + + def del_node(self, node: Node): + self._insert_in_freelist(node.page) + + def del_page(self, page: int): + self._insert_in_freelist(page) + + @property + def next_available_page(self) -> int: + last_freelist_page = self._pop_from_freelist() + if last_freelist_page is not None: + return last_freelist_page + + self.last_page += 1 + return self.last_page + + def _traverse_free_list(self) -> Tuple[Optional[FreelistNode], + Optional[FreelistNode]]: + if self._freelist_start_page == 0: + return None, None + + second_to_last_node = None + last_node = self.get_node(self._freelist_start_page) + + while last_node.next_page is not None: + second_to_last_node = last_node + last_node = self.get_node(second_to_last_node.next_page) + + return second_to_last_node, last_node + + def _insert_in_freelist(self, page: int): + """Insert a page at the end of the freelist.""" + _, last_node = self._traverse_free_list() + + self.set_node(FreelistNode(self._tree_conf, page=page, next_page=None)) + + if last_node is None: + # Write in metadata that the freelist got a new starting point + self._freelist_start_page = page + self.set_metadata(None, None) + else: + last_node.next_page = page + self.set_node(last_node) + + def _pop_from_freelist(self) -> Optional[int]: + """Remove the last page from the freelist and return its page.""" + second_to_last_node, last_node = self._traverse_free_list() + + if last_node is None: + # Freelist is completely empty, nothing to pop + return None + + if second_to_last_node is None: + # Write in metadata that the freelist is empty + self._freelist_start_page = 0 + self.set_metadata(None, None) + else: + second_to_last_node.next_page = None + self.set_node(second_to_last_node) + + return last_node.page + + # Todo: make metadata as a normal Node + def get_metadata(self) -> tuple: + try: + data = self._fd.read_page(0) + except ReachedEndOfFile: + raise ValueError('Metadata not set yet') + end_root_node_page = PAGE_REFERENCE_BYTES + root_node_page = int.from_bytes( + data[0:end_root_node_page], ENDIAN + ) + end_page_size = end_root_node_page + OTHERS_BYTES + page_size = int.from_bytes( + data[end_root_node_page:end_page_size], ENDIAN + ) + end_order = end_page_size + OTHERS_BYTES + order = int.from_bytes( + data[end_page_size:end_order], ENDIAN + ) + end_key_size = end_order + OTHERS_BYTES + key_size = int.from_bytes( + data[end_order:end_key_size], ENDIAN + ) + end_value_size = end_key_size + OTHERS_BYTES + value_size = int.from_bytes( + data[end_key_size:end_value_size], ENDIAN + ) + end_freelist_start_page = end_value_size + PAGE_REFERENCE_BYTES + self._freelist_start_page = int.from_bytes( + data[end_value_size:end_freelist_start_page], ENDIAN + ) + self._tree_conf = TreeConf( + page_size, order, key_size, value_size, self._tree_conf.serializer + ) + self._root_node_page = root_node_page + return root_node_page, self._tree_conf + + def set_metadata(self, root_node_page: Optional[int], + tree_conf: Optional[TreeConf]): + + if root_node_page is None: + root_node_page = self._root_node_page + + if tree_conf is None: + tree_conf = self._tree_conf + + length = 2 * PAGE_REFERENCE_BYTES + 4 * OTHERS_BYTES + data = ( + root_node_page.to_bytes(PAGE_REFERENCE_BYTES, ENDIAN) + + tree_conf.page_size.to_bytes(OTHERS_BYTES, ENDIAN) + + tree_conf.order.to_bytes(OTHERS_BYTES, ENDIAN) + + tree_conf.key_size.to_bytes(OTHERS_BYTES, ENDIAN) + + tree_conf.value_size.to_bytes(OTHERS_BYTES, ENDIAN) + + self._freelist_start_page.to_bytes(PAGE_REFERENCE_BYTES, ENDIAN) + + bytes(tree_conf.page_size - length) + ) + self._fd.write_page_in_tree(0, data, fsync=True) + + self._tree_conf = tree_conf + self._root_node_page = root_node_page + + def close(self): + self._fd.close() + + def __repr__(self): + return ''.format(str(self._fd)) + + @property + def read_transaction(self): + + class ReadTransaction: + def __enter__(self2): + self._lock.reader_lock.acquire() + + def __exit__(self2, exc_type, exc_val, exc_tb): + self._lock.reader_lock.release() + + return ReadTransaction() + + @property + def write_transaction(self): + + class WriteTransaction: + def __enter__(self2): + self._lock.writer_lock.acquire() + + def __exit__(self2, exc_type, exc_val, exc_tb): + self._lock.writer_lock.release() + + return WriteTransaction() diff --git a/bplustree/tree.py b/bplustree/tree.py index bed6578..d943e9a 100644 --- a/bplustree/tree.py +++ b/bplustree/tree.py @@ -443,3 +443,17 @@ def _get_value_from_record(self, record: Record) -> bytes: return record.value return self._read_from_overflow(record.overflow_page) + + +class ExtBPlusTree(BPlusTree): + def __init__(self, mem, tree_conf: TreeConf): + self._tree_conf = tree_conf + self._create_partials() + self._mem = mem + try: + metadata = self._mem.get_metadata() + except ValueError: + self._initialize_empty_tree() + else: + self._root_node_page, self._tree_conf = metadata + self._is_open = True diff --git a/tests/test_extmemory.py b/tests/test_extmemory.py new file mode 100644 index 0000000..9a3a0ab --- /dev/null +++ b/tests/test_extmemory.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- + + +from bplustree.tree import ExtBPlusTree, TreeConf, IntSerializer +from bplustree.memory import CommonMemory, ENDIAN +from bplustree.ext.memory import RAMMemory +from random import random + + +def test_ext_ram_memory(): + tree_conf = TreeConf(8096, 100, 16, 32, IntSerializer()) + test = RAMMemory(tree_conf) + tree = ExtBPlusTree(CommonMemory(test, tree_conf), tree_conf=tree_conf) + + for i in range(500): + record = int(random() * 100) + try: + tree.insert(record, record.to_bytes(32, ENDIAN)) + except ValueError: + pass + for record in tree: + assert record == int.from_bytes(tree[record], ENDIAN) + assert len(test.pages) > 0 + +if __name__ == '__main__': + test_ext_ram_memory() \ No newline at end of file