Skip to content

Commit

Permalink
use lru cache compileunit and we can configure cache size
Browse files Browse the repository at this point in the history
In some larger ELF files, traversing the CUs may cause an OOM
use lru and set a cache size, can avoid it

Signed-off-by: anjiahao <[email protected]>
  • Loading branch information
anjiahao1 committed Dec 5, 2024
1 parent 907a9c3 commit f7c6cfb
Showing 1 changed file with 32 additions and 51 deletions.
83 changes: 32 additions & 51 deletions elftools/dwarf/compileunit.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# Eli Bendersky ([email protected])
# This code is in the public domain
#-------------------------------------------------------------------------------
from bisect import bisect_right
from functools import lru_cache
from .die import DIE
from ..common.utils import dwarf_assert

Expand Down Expand Up @@ -55,16 +55,9 @@ def __init__(self, header, dwarfinfo, structs, cu_offset, cu_die_offset):
# requested.
self._abbrev_table = None

# A list of DIEs belonging to this CU.
# This list is lazily constructed as DIEs are iterated over.
self._dielist = []
# A list of file offsets, corresponding (by index) to the DIEs
# in `self._dielist`. This list exists separately from
# `self._dielist` to make it binary searchable, enabling the
# DIE population strategy used in `iter_DIE_children`.
# Like `self._dielist`, this list is lazily constructed
# as DIEs are iterated over.
self._diemap = []
# Cache of Top DIE of this CU
self._top_die = None
self.configure_die_cache()

def dwarf_format(self):
""" Get the DWARF format (32 or 64) for this CU
Expand All @@ -86,26 +79,23 @@ def get_top_DIE(self):

# Note that a top DIE always has minimal offset and is therefore
# at the beginning of our lists, so no bisect is required.
if len(self._diemap) > 0:
return self._dielist[0]
if self._top_die is not None:
return self._top_die

top = DIE(
cu=self,
stream=self.dwarfinfo.debug_info_sec.stream,
offset=self.cu_die_offset)

self._dielist.insert(0, top)
self._diemap.insert(0, self.cu_die_offset)

top._translate_indirect_attributes() # Can't translate indirect attributes until the top DIE has been parsed to the end

self._top_die = top
return top

def has_top_DIE(self):
""" Returns whether the top DIE in this CU has already been parsed and cached.
No parsing on demand!
"""
return len(self._diemap) > 0
return self._top_die is not None

@property
def size(self):
Expand Down Expand Up @@ -186,6 +176,30 @@ def iter_DIE_children(self, die):
pass

cur_offset = child._terminator.offset + child._terminator.size
def configure_die_cache(self, cachesize = None):
@lru_cache(maxsize=cachesize)
def _get_cached_DIE(offset):
""" Given a DIE offset, look it up in the cache. If not present,
parse the DIE and insert it into the cache.
offset:
The offset of the DIE in the debug_info section to retrieve.
The stream reference is copied from the top DIE. The top die will
also be parsed and cached if needed.
See also get_DIE_from_refaddr(self, refaddr).
"""
# The top die must be in the cache if any DIE is in the cache.
# The stream is the same for all DIEs in this CU, so populate
# the top DIE and obtain a reference to its stream.

top_die_stream = self.get_top_DIE().stream
die = DIE(cu=self, stream=top_die_stream, offset=offset)

return die

self._get_cached_DIE = _get_cached_DIE

#------ PRIVATE ------#

Expand All @@ -209,37 +223,4 @@ def _iter_DIE_subtree(self, die):
yield d
yield die._terminator

def _get_cached_DIE(self, offset):
""" Given a DIE offset, look it up in the cache. If not present,
parse the DIE and insert it into the cache.
offset:
The offset of the DIE in the debug_info section to retrieve.
The stream reference is copied from the top DIE. The top die will
also be parsed and cached if needed.
See also get_DIE_from_refaddr(self, refaddr).
"""
# The top die must be in the cache if any DIE is in the cache.
# The stream is the same for all DIEs in this CU, so populate
# the top DIE and obtain a reference to its stream.
top_die_stream = self.get_top_DIE().stream

# `offset` is the offset in the stream of the DIE we want to return.
# The map is maintined as a parallel array to the list. We call
# bisect each time to ensure new DIEs are inserted in the correct
# order within both `self._dielist` and `self._diemap`.
i = bisect_right(self._diemap, offset)

# Note that `self._diemap` cannot be empty because a the top DIE
# was inserted by the call to .get_top_DIE(). Also it has the minimal
# offset, so the bisect_right insert point will always be at least 1.
if offset == self._diemap[i - 1]:
die = self._dielist[i - 1]
else:
die = DIE(cu=self, stream=top_die_stream, offset=offset)
self._dielist.insert(i, die)
self._diemap.insert(i, offset)

return die

0 comments on commit f7c6cfb

Please sign in to comment.