Skip to content

Commit

Permalink
Improve support searching indexes
Browse files Browse the repository at this point in the history
  • Loading branch information
Schamper committed Jan 28, 2025
1 parent 8ce6797 commit 4d1b6cb
Show file tree
Hide file tree
Showing 11 changed files with 543 additions and 146 deletions.
156 changes: 156 additions & 0 deletions dissect/esedb/btree.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
from __future__ import annotations

from typing import TYPE_CHECKING

from dissect.esedb.exceptions import KeyNotFoundError, NoNeighbourPageError

if TYPE_CHECKING:
from dissect.esedb.esedb import EseDB
from dissect.esedb.page import Node, Page

Check warning on line 9 in dissect/esedb/btree.py

View check run for this annotation

Codecov / codecov/patch

dissect/esedb/btree.py#L8-L9

Added lines #L8 - L9 were not covered by tests


class BTree:
"""A simple implementation for searching the ESE B+Trees.
This is a stateful interactive class that moves an internal cursor to a position within the BTree.
Args:
esedb: An instance of :class:`~dissect.esedb.esedb.EseDB`.
page: The page to open a BTree on.
"""

def __init__(self, esedb: EseDB, root: int | Page):
self.esedb = esedb

if isinstance(root, int):
page_num = root
root = esedb.page(page_num)

Check warning on line 27 in dissect/esedb/btree.py

View check run for this annotation

Codecov / codecov/patch

dissect/esedb/btree.py#L26-L27

Added lines #L26 - L27 were not covered by tests
else:
page_num = root.num

self.root = root

self._page = root
self._page_num = page_num
self._node_num = 0

def reset(self) -> None:
"""Reset the internal state to the root of the BTree."""
self._page = self.root
self._page_num = self._page.num
self._node_num = 0

def node(self) -> Node:
"""Return the node the BTree is currently on."""
return self._page.node(self._node_num)

def next(self) -> Node:
"""Move the BTree to the next node and return it.
Can move the BTree to the next page as a side effect.
"""
if self._node_num + 1 > self._page.node_count - 1:
self.next_page()
else:
self._node_num += 1

return self.node()

def next_page(self) -> None:
"""Move the BTree to the next page in the tree.
Raises:
NoNeighbourPageError: If the current page has no next page.
"""
if self._page.next_page:
self._page = self.esedb.page(self._page.next_page)
self._node_num = 0
else:
raise NoNeighbourPageError(f"{self._page} has no next page")

def prev(self) -> Node:
"""Move the BTree to the previous node and return it.
Can move the BTree to the previous page as a side effect.
"""
if self._node_num - 1 < 0:
self.prev_page()

Check warning on line 77 in dissect/esedb/btree.py

View check run for this annotation

Codecov / codecov/patch

dissect/esedb/btree.py#L77

Added line #L77 was not covered by tests
else:
self._node_num -= 1

return self.node()

def prev_page(self) -> None:
"""Move the BTree to the previous page in the tree.
Raises:
NoNeighbourPageError: If the current page has no previous page.
"""
if self._page.previous_page:
self._page = self.esedb.page(self._page.previous_page)
self._node_num = self._page.node_count - 1

Check warning on line 91 in dissect/esedb/btree.py

View check run for this annotation

Codecov / codecov/patch

dissect/esedb/btree.py#L89-L91

Added lines #L89 - L91 were not covered by tests
else:
raise NoNeighbourPageError(f"{self._page} has no previous page")

Check warning on line 93 in dissect/esedb/btree.py

View check run for this annotation

Codecov / codecov/patch

dissect/esedb/btree.py#L93

Added line #L93 was not covered by tests

def search(self, key: bytes, exact: bool = True) -> Node:
"""Search the tree for the given key.
Moves the BTree to the matching node, or on the last node that is less than the requested key.
Args:
key: The key to search for.
exact: Whether to only return successfully on an exact match.
Raises:
KeyNotFoundError: If an ``exact`` match was requested but not found.
"""
page = self._page
while True:
node = find_node(page, key)

if page.is_branch:
page = self.esedb.page(node.child)
else:
self._page = page
self._page_num = page.num
self._node_num = node.num
break

if exact and key != node.key:
raise KeyNotFoundError(f"Can't find key: {key}")

Check warning on line 120 in dissect/esedb/btree.py

View check run for this annotation

Codecov / codecov/patch

dissect/esedb/btree.py#L120

Added line #L120 was not covered by tests

return self.node()


def find_node(page: Page, key: bytes) -> Node:
"""Search a page for a node matching ``key``.
Args:
page: The page to search.
key: The key to search.
"""
first_node_idx = 0
last_node_idx = page.node_count - 1

node = None
while first_node_idx < last_node_idx:
node_idx = (first_node_idx + last_node_idx) // 2
node = page.node(node_idx)

# It turns out that the way BTree keys are compared matches 1:1 with how Python compares bytes
# First compare data, then length
if key < node.key:
last_node_idx = node_idx
elif key == node.key:
if page.is_branch:
# If there's an exact match on a key on a branch page, the actual leaf nodes are in the next branch
# Page keys for branch pages appear to be non-inclusive upper bounds
node_idx = min(node_idx + 1, page.node_count - 1)
node = page.node(node_idx)

return node
else:
first_node_idx = node_idx + 1

# We're at the last node
return page.node(first_node_idx)
30 changes: 30 additions & 0 deletions dissect/esedb/c_esedb.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,34 @@
DotNetGuid = 0x00040000, // index over GUID column according to .Net GUID sort order
ImmutableStructure = 0x00080000, // Do not write to the input structures during a JetCreateIndexN call.
};
flag IDBFLAG : uint16 {
Unique = 0x0001, // Duplicate keys not allowed
AllowAllNulls = 0x0002, // Make entries for NULL keys (all segments are null)
AllowFirstNull = 0x0004, // First index column NULL allowed in index
AllowSomeNulls = 0x0008, // Make entries for keys with some null segments
NoNullSeg = 0x0010, // Don't allow a NULL key segment
Primary = 0x0020, // Index is the primary index
LocaleSet = 0x0040, // Index locale information (locale name) is set (JET_bitIndexUnicode was specified).
Multivalued = 0x0080, // Has a multivalued segment
TemplateIndex = 0x0100, // Index of a template table
DerivedIndex = 0x0200, // Index derived from template table
// Note that this flag is persisted, but
// never used in an in-memory IDB, because
// we use the template index IDB instead.
LocalizedText = 0x0400, // Has a unicode text column? (code page is 1200)
SortNullsHigh = 0x0800, // NULL sorts after data
// Jan 2012: MSU is being removed. fidbUnicodeFixupOn should no longer be referenced.
UnicodeFixupOn_Deprecated = 0x1000, // Track entries with undefined Unicode codepoints
CrossProduct = 0x2000, // all combinations of multi-valued columns are indexed
DisallowTruncation = 0x4000, // fail update rather than allow key truncation
NestedTable = 0x8000, // combinations of multi-valued columns of same itagSequence are indexed
};
flag IDXFLAG : uint16 {
ExtendedColumns = 0x0001, // IDXSEGs are comprised of JET_COLUMNIDs, not FIDs
DotNetGuid = 0x0002, // GUIDs sort according to .Net rules
};
""" # noqa E501

c_esedb = cstruct().load(esedb_def)
Expand All @@ -444,6 +472,8 @@
TAGFLD_HEADER = c_esedb.TAGFLD_HEADER
CODEPAGE = c_esedb.CODEPAGE
COMPRESSION_SCHEME = c_esedb.COMPRESSION_SCHEME
IDBFLAG = c_esedb.IDBFLAG
IDXFLAG = c_esedb.IDXFLAG

CODEPAGE_MAP = {
CODEPAGE.UNICODE: "utf-16-le",
Expand Down
Loading

0 comments on commit 4d1b6cb

Please sign in to comment.